embulk 0.7.11-java → 0.8.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +3 -3
  3. data/README.md +1 -1
  4. data/build.gradle +2 -2
  5. data/embulk-core/build.gradle +2 -0
  6. data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +11 -3
  7. data/embulk-core/src/main/java/org/embulk/config/YamlTagResolver.java +53 -0
  8. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +0 -1
  9. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +479 -69
  10. data/embulk-core/src/main/java/org/embulk/spi/Column.java +3 -0
  11. data/embulk-core/src/main/java/org/embulk/spi/ColumnVisitor.java +2 -0
  12. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +12 -5
  13. data/embulk-core/src/main/java/org/embulk/spi/Page.java +19 -0
  14. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +26 -5
  15. data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +13 -0
  16. data/embulk-core/src/main/java/org/embulk/spi/json/JsonParseException.java +17 -0
  17. data/embulk-core/src/main/java/org/embulk/spi/json/JsonParser.java +125 -0
  18. data/embulk-core/src/main/java/org/embulk/spi/json/RubyValueApi.java +55 -0
  19. data/embulk-core/src/main/java/org/embulk/spi/type/JsonType.java +14 -0
  20. data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +1 -0
  21. data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +2 -0
  22. data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetterFactory.java +6 -0
  23. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +5 -0
  24. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +10 -0
  25. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +3 -0
  26. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/BooleanColumnSetter.java +7 -0
  27. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DefaultValueSetter.java +2 -0
  28. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DoubleColumnSetter.java +7 -0
  29. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/JsonColumnSetter.java +73 -0
  30. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/LongColumnSetter.java +11 -2
  31. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/NullDefaultValueSetter.java +5 -0
  32. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java +5 -0
  33. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/StringColumnSetter.java +7 -0
  34. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/TimestampColumnSetter.java +9 -1
  35. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +7 -0
  36. data/embulk-docs/src/built-in.rst +40 -3
  37. data/embulk-docs/src/conf.py +2 -2
  38. data/embulk-docs/src/release.rst +1 -1
  39. data/embulk-docs/src/release/release-0.8.0.rst +68 -0
  40. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +12 -1
  41. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +18 -0
  42. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +1 -1
  43. data/embulk.gemspec +1 -1
  44. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  45. data/gradle/wrapper/gradle-wrapper.properties +2 -2
  46. data/gradlew +3 -7
  47. data/lib/embulk/column.rb +2 -0
  48. data/lib/embulk/command/embulk_migrate_plugin.rb +76 -10
  49. data/lib/embulk/command/embulk_new_plugin.rb +2 -0
  50. data/lib/embulk/command/embulk_run.rb +17 -10
  51. data/lib/embulk/data/bundle/.ruby-version +1 -1
  52. data/lib/embulk/data/new/java/build.gradle.erb +21 -0
  53. data/lib/embulk/data/new/java/config/checkstyle/checkstyle.xml +128 -0
  54. data/lib/embulk/data/new/java/config/checkstyle/default.xml +108 -0
  55. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
  56. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +2 -2
  57. data/lib/embulk/data/new/java/gradlew +3 -7
  58. data/lib/embulk/data/new/ruby/.ruby-version +1 -1
  59. data/lib/embulk/guess/csv.rb +1 -1
  60. data/lib/embulk/guess/schema_guess.rb +6 -0
  61. data/lib/embulk/guess_plugin.rb +1 -1
  62. data/lib/embulk/java/imports.rb +4 -0
  63. data/lib/embulk/plugin_registry.rb +8 -12
  64. data/lib/embulk/schema.rb +6 -0
  65. data/lib/embulk/version.rb +1 -1
  66. data/test/guess/test_csv_guess.rb +170 -0
  67. data/test/helper.rb +2 -0
  68. metadata +17 -15
  69. data/embulk-core/src/main/java/org/embulk/exec/LocalThreadExecutor.java +0 -34
  70. data/embulk-core/src/main/java/org/embulk/guice/Bootstrap.java +0 -157
  71. data/embulk-core/src/main/java/org/embulk/guice/CloseableInjector.java +0 -22
  72. data/embulk-core/src/main/java/org/embulk/guice/InjectorProxy.java +0 -145
  73. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjector.java +0 -26
  74. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjectorProxy.java +0 -61
  75. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleManager.java +0 -187
  76. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethods.java +0 -89
  77. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethodsMap.java +0 -38
  78. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleModule.java +0 -97
  79. data/embulk-docs/src/release/release-0.7.11.rst +0 -13
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 63d78296ab5c306a09832af9d838a380399c16a7
4
- data.tar.gz: d0a267d4052f88ec276ec8cf0000ce64c07e224e
3
+ metadata.gz: 55c8a96cd529ab0f895063f6d73c36522a96d0be
4
+ data.tar.gz: 6d416a8b3f768a941a5a386107d70893afff1509
5
5
  SHA512:
6
- metadata.gz: e303ef0b199c2ad2246be937103c8c26218635a032e1da7bb6c3798a92eca13c7fdbc91b7612c7b97d15506282b060892db5c4b03bf9206b804bbd3d8ca055ad
7
- data.tar.gz: 48b676b557f9be90f915bded8b6589f39671a24584e41989d436a3f7f746dc46aae79977ba364b1bfaef23e3593b842078d6ce00b517a367712c43946b599ba0
6
+ metadata.gz: fb9a45ea1d54286c3626ab98055783043b439e6a0e34380d3431cc8ef30bb3cb7f7043f70714f2929ef10421acc5bfdf795f05b481a419896a93efa17a5da535
7
+ data.tar.gz: 8947eb80b213318118325705087655d33c05b53a5861dccfc8d79d0be16f6c01612755bc6daed6a2f17790dcbadbed80d22653877a245ca434f171ca90869658
@@ -1,13 +1,13 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- embulk (0.7.9)
5
- jruby-jars (= 9.0.0.0)
4
+ embulk (0.8.0)
5
+ jruby-jars (= 9.0.4.0)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
- jruby-jars (9.0.0.0)
10
+ jruby-jars (9.0.4.0)
11
11
  kramdown (1.5.0)
12
12
  power_assert (0.2.2)
13
13
  rake (10.4.2)
data/README.md CHANGED
@@ -110,7 +110,7 @@ To use the bundle, add `-b <bundle_dir>` option to `guess`, `preview`, or `run`
110
110
  See the generated \<bundle_dir>/Gemfile file how to plugin bundles work.
111
111
 
112
112
  ```
113
- embulk mkbundle ./embulk_bundle
113
+ embulk mkbundle ./embulk_bundle # please edit ./embulk_bundle/Gemfile to add plugins. Detailed usage is written in the Gemfile
114
114
  embulk guess -b ./embulk_bundle ...
115
115
  embulk run -b ./embulk_bundle ...
116
116
  ```
@@ -16,10 +16,10 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
16
16
 
17
17
  allprojects {
18
18
  group = 'org.embulk'
19
- version = '0.7.11'
19
+ version = '0.8.0'
20
20
 
21
21
  ext {
22
- jrubyVersion = '9.0.0.0'
22
+ jrubyVersion = '9.0.4.0'
23
23
  }
24
24
 
25
25
  apply plugin: 'java'
@@ -16,6 +16,7 @@ import com.github.jrubygradle.JRubyPrepare
16
16
 
17
17
  // determine which dependencies have updates: $ gradle dependencyUpdates
18
18
  dependencies {
19
+ compile 'org.embulk:guice-bootstrap:0.1.1'
19
20
  compile 'com.google.guava:guava:18.0'
20
21
  compile 'com.google.inject:guice:4.0'
21
22
  compile 'com.google.inject.extensions:guice-multibindings:4.0'
@@ -37,6 +38,7 @@ dependencies {
37
38
  compile 'joda-time:joda-time:2.8.1'
38
39
  compile 'io.netty:netty-buffer:5.0.0.Alpha1'
39
40
  compile 'org.fusesource.jansi:jansi:1.11'
41
+ compile 'org.msgpack:msgpack-core:0.8.1'
40
42
 
41
43
  // For embulk/guess/charset.rb. See also embulk.gemspec
42
44
  compile 'com.ibm.icu:icu4j:54.1.1'
@@ -15,6 +15,9 @@ import com.fasterxml.jackson.databind.RuntimeJsonMappingException;
15
15
  import com.fasterxml.jackson.databind.node.ObjectNode;
16
16
  import com.fasterxml.jackson.databind.node.JsonNodeFactory;
17
17
  import org.yaml.snakeyaml.Yaml;
18
+ import org.yaml.snakeyaml.DumperOptions;
19
+ import org.yaml.snakeyaml.representer.Representer;
20
+ import org.yaml.snakeyaml.constructor.SafeConstructor;
18
21
 
19
22
  public class ConfigLoader
20
23
  {
@@ -60,7 +63,7 @@ public class ConfigLoader
60
63
 
61
64
  public ConfigSource fromYamlString(String string)
62
65
  {
63
- JsonNode node = objectToJson(new Yaml().load(string));
66
+ JsonNode node = objectToJson(newYaml().load(string));
64
67
  validateJsonNode(node);
65
68
  return new DataSourceImpl(model, (ObjectNode) node);
66
69
  }
@@ -74,7 +77,7 @@ public class ConfigLoader
74
77
 
75
78
  public ConfigSource fromYaml(InputStream stream) throws IOException
76
79
  {
77
- JsonNode node = objectToJson(new Yaml().load(stream));
80
+ JsonNode node = objectToJson(newYaml().load(stream));
78
81
  validateJsonNode(node);
79
82
  return new DataSourceImpl(model, (ObjectNode) node);
80
83
  }
@@ -107,7 +110,7 @@ public class ConfigLoader
107
110
  {
108
111
  ObjectNode source = new ObjectNode(JsonNodeFactory.instance);
109
112
  DataSource ds = new DataSourceImpl(model, source);
110
- Yaml yaml = new Yaml();
113
+ Yaml yaml = newYaml();
111
114
  for (Map.Entry<String, String> pair : props.entrySet()) {
112
115
  if (!pair.getKey().startsWith(keyPrefix)) {
113
116
  continue;
@@ -138,4 +141,9 @@ public class ConfigLoader
138
141
  throw new RuntimeException(ex);
139
142
  }
140
143
  }
144
+
145
+ private Yaml newYaml()
146
+ {
147
+ return new Yaml(new SafeConstructor(), new Representer(), new DumperOptions(), new YamlTagResolver());
148
+ }
141
149
  }
@@ -0,0 +1,53 @@
1
+ package org.embulk.config;
2
+
3
+ import java.util.List;
4
+ import java.util.regex.Pattern;
5
+ import org.yaml.snakeyaml.resolver.Resolver;
6
+ import org.yaml.snakeyaml.nodes.Tag;
7
+ import org.yaml.snakeyaml.nodes.NodeId;
8
+
9
+ public class YamlTagResolver
10
+ extends Resolver
11
+ {
12
+ // Resolver converts a node (scalar, sequence, map, or !!tag with them)
13
+ // to a tag (INT, FLOAT, STR, SEQ, MAP, ...). For example, converting
14
+ // "123" (scalar) to 123 (INT), or "true" (scalar) to true (BOOL).
15
+ // This is called by snakeyaml Composer which converts parser events
16
+ // into an object.
17
+ //
18
+ // jackson-dataformat-yaml doesn't use this because it traverses parser
19
+ // events without using Composer.
20
+
21
+ public static final Pattern FLOAT_EXCEPTING_ZERO_START = Pattern
22
+ .compile("^([-+]?(\\.[0-9]+|[1-9][0-9_]*(\\.[0-9_]*)?)([eE][-+]?[0-9]+)?|[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*|[-+]?\\.(?:inf|Inf|INF)|\\.(?:nan|NaN|NAN))$");
23
+
24
+ @Override
25
+ public void addImplicitResolver(Tag tag, Pattern regexp, String first)
26
+ {
27
+ // This method is called by constructor through addImplicitResolvers
28
+ // to setup default implicit resolvers.
29
+
30
+ if (tag.equals(Tag.FLOAT)) {
31
+ super.addImplicitResolver(Tag.FLOAT, FLOAT_EXCEPTING_ZERO_START, "-+0123456789.");
32
+ }
33
+ else if (tag.equals(Tag.BOOL)) {
34
+ // use stricter rule (reject 'On', 'Off', 'Yes', 'No')
35
+ super.addImplicitResolver(Tag.BOOL, Pattern.compile("^(?:[Tt]rue|[Ff]alse)$"), "TtFf");
36
+ }
37
+ else if (tag.equals(Tag.TIMESTAMP)) {
38
+ // This solves some unexpected behavior that snakeyaml
39
+ // deserializes "2015-01-01 00:00:00" to java.util.Date
40
+ // but jackson serializes java.util.Date to an integer.
41
+ return;
42
+ }
43
+ else {
44
+ super.addImplicitResolver(tag, regexp, first);
45
+ }
46
+ }
47
+
48
+ @Override
49
+ public Tag resolve(NodeId kind, String value, boolean implicit)
50
+ {
51
+ return super.resolve(kind, value, implicit); // checks implicit resolvers
52
+ }
53
+ }
@@ -36,7 +36,6 @@ public class ExecModule
36
36
  registerPluginTo(binder, ParserPlugin.class, "system_sampling", SamplingParserPlugin.class);
37
37
 
38
38
  // LocalExecutorPlugin
39
- binder.bind(LocalThreadExecutor.class).in(Scopes.SINGLETON);
40
39
  registerPluginTo(binder, ExecutorPlugin.class, "local", LocalExecutorPlugin.class);
41
40
 
42
41
  // serde
@@ -6,124 +6,534 @@ import java.util.concurrent.Callable;
6
6
  import java.util.concurrent.Future;
7
7
  import java.util.concurrent.ExecutorService;
8
8
  import java.util.concurrent.ExecutionException;
9
+ import com.google.common.base.Throwables;
10
+ import com.google.common.util.concurrent.ThreadFactoryBuilder;
9
11
  import org.slf4j.Logger;
10
12
  import com.google.inject.Inject;
11
13
  import org.embulk.config.ConfigSource;
14
+ import org.embulk.config.TaskSource;
12
15
  import org.embulk.config.TaskReport;
13
16
  import org.embulk.spi.Exec;
17
+ import org.embulk.spi.ExecSession;
14
18
  import org.embulk.spi.ExecutorPlugin;
15
19
  import org.embulk.spi.ProcessTask;
16
20
  import org.embulk.spi.ProcessState;
17
21
  import org.embulk.spi.Schema;
22
+ import org.embulk.spi.InputPlugin;
23
+ import org.embulk.spi.FilterPlugin;
24
+ import org.embulk.spi.OutputPlugin;
25
+ import org.embulk.spi.Page;
26
+ import org.embulk.spi.PageOutput;
27
+ import org.embulk.spi.AbortTransactionResource;
28
+ import org.embulk.spi.CloseResource;
29
+ import org.embulk.spi.TransactionalPageOutput;
30
+ import org.embulk.plugin.compat.PluginWrappers;
31
+ import org.embulk.spi.util.Filters;
18
32
  import org.embulk.spi.util.Executors;
19
33
  import org.embulk.spi.util.Executors.ProcessStateCallback;
20
34
 
21
35
  public class LocalExecutorPlugin
22
36
  implements ExecutorPlugin
23
37
  {
24
- private final ExecutorService executor;
38
+ private int defaultMaxThreads;
39
+ private int defaultMinThreads;
25
40
 
26
41
  @Inject
27
- public LocalExecutorPlugin(LocalThreadExecutor executor)
42
+ public LocalExecutorPlugin(@ForSystemConfig ConfigSource systemConfig)
28
43
  {
29
- this.executor = executor.getExecutorService();
44
+ int cores = Runtime.getRuntime().availableProcessors();
45
+ this.defaultMaxThreads = systemConfig.get(Integer.class, "max_threads", cores * 2);
46
+ this.defaultMinThreads = systemConfig.get(Integer.class, "min_output_tasks", cores);
30
47
  }
31
48
 
32
49
  @Override
33
- public void transaction(ConfigSource config, Schema outputSchema, final int inputTaskCount,
50
+ public void transaction(ConfigSource config, Schema outputSchema, int inputTaskCount,
34
51
  ExecutorPlugin.Control control)
35
52
  {
36
- control.transaction(outputSchema, inputTaskCount, new Executor() {
37
- public void execute(ProcessTask task, ProcessState state)
38
- {
39
- localExecute(task, inputTaskCount, state);
40
- }
41
- });
53
+ try (AbstractLocalExecutor exec = newExecutor(config, inputTaskCount)) {
54
+ control.transaction(outputSchema, exec.getOutputTaskCount(), exec);
55
+ }
42
56
  }
43
57
 
44
- private void localExecute(ProcessTask task, int taskCount, ProcessState state)
58
+ private AbstractLocalExecutor newExecutor(ConfigSource config, int inputTaskCount)
45
59
  {
46
60
  Logger log = Exec.getLogger(LocalExecutorPlugin.class);
61
+ int maxThreads = config.get(Integer.class, "max_threads", defaultMaxThreads);
62
+ int minThreads = config.get(Integer.class, "min_output_tasks", defaultMinThreads);
63
+ if (inputTaskCount < minThreads) {
64
+ int scatterCount = (minThreads + inputTaskCount - 1) / inputTaskCount;
65
+ log.info("Using local thread executor with max_threads={} / output tasks {} = input tasks {} * {}",
66
+ maxThreads, inputTaskCount * scatterCount, inputTaskCount, scatterCount);
67
+ return new ScatterExecutor(maxThreads, inputTaskCount, scatterCount);
68
+ }
69
+ else {
70
+ log.info("Using local thread executor with max_threads={} / tasks={}", maxThreads, inputTaskCount);
71
+ return new DirectExecutor(maxThreads, inputTaskCount);
72
+ }
73
+ }
47
74
 
48
- state.initialize(taskCount, taskCount);
75
+ private static abstract class AbstractLocalExecutor
76
+ implements Executor, AutoCloseable
77
+ {
78
+ protected final Logger log = Exec.getLogger(LocalExecutorPlugin.class);
49
79
 
50
- List<Future<Throwable>> futures = new ArrayList<>(taskCount);
51
- try {
52
- for (int i=0; i < taskCount; i++) {
53
- if (state.getOutputTaskState(i).isCommitted()) {
54
- log.warn("Skipped resumed task {}", i);
55
- futures.add(null); // resumed
56
- } else {
57
- futures.add(startProcessor(task, i, state));
58
- }
59
- }
60
- showProgress(log, state, taskCount);
80
+ protected final int inputTaskCount;
81
+ protected final int outputTaskCount;
82
+
83
+ public AbstractLocalExecutor(int inputTaskCount, int outputTaskCount)
84
+ {
85
+ this.inputTaskCount = inputTaskCount;
86
+ this.outputTaskCount = outputTaskCount;
87
+ }
88
+
89
+ public int getOutputTaskCount()
90
+ {
91
+ return outputTaskCount;
92
+ }
93
+
94
+ @Override
95
+ public void execute(ProcessTask task, ProcessState state)
96
+ {
97
+ state.initialize(inputTaskCount, outputTaskCount);
61
98
 
62
- for (int i=0; i < taskCount; i++) {
63
- if (futures.get(i) == null) {
64
- continue;
99
+ List<Future<Throwable>> futures = new ArrayList<>(inputTaskCount);
100
+ try {
101
+ for (int i = 0; i < inputTaskCount; i++) {
102
+ futures.add(startInputTask(task, state, i));
65
103
  }
66
- try {
67
- state.getInputTaskState(i).setException(futures.get(i).get());
68
- } catch (ExecutionException ex) {
69
- state.getInputTaskState(i).setException(ex.getCause());
70
- //Throwables.propagate(ex.getCause());
71
- } catch (InterruptedException ex) {
72
- state.getInputTaskState(i).setException(new ExecutionInterruptedException(ex));
104
+ showProgress(state, inputTaskCount);
105
+
106
+ for (int i = 0; i < inputTaskCount; i++) {
107
+ if (futures.get(i) == null) {
108
+ continue;
109
+ }
110
+ try {
111
+ state.getInputTaskState(i).setException(futures.get(i).get());
112
+ }
113
+ catch (ExecutionException ex) {
114
+ state.getInputTaskState(i).setException(ex.getCause());
115
+ //Throwables.propagate(ex.getCause());
116
+ }
117
+ catch (InterruptedException ex) {
118
+ state.getInputTaskState(i).setException(new ExecutionInterruptedException(ex));
119
+ }
120
+ showProgress(state, inputTaskCount);
73
121
  }
74
- showProgress(log, state, taskCount);
75
122
  }
76
- } finally {
77
- for (Future<Throwable> future : futures) {
78
- if (future != null && !future.isDone()) {
79
- future.cancel(true);
80
- // TODO join?
123
+ finally {
124
+ for (Future<Throwable> future : futures) {
125
+ if (future != null && !future.isDone()) {
126
+ future.cancel(true);
127
+ // TODO join?
128
+ }
81
129
  }
82
130
  }
83
131
  }
132
+
133
+ @Override
134
+ public abstract void close();
135
+
136
+ private void showProgress(ProcessState state, int taskCount)
137
+ {
138
+ int started = 0;
139
+ int finished = 0;
140
+ for (int i = 0; i < taskCount; i++) {
141
+ if (state.getOutputTaskState(i).isStarted()) { started++; }
142
+ if (state.getOutputTaskState(i).isFinished()) { finished++; }
143
+ }
144
+
145
+ log.info(String.format("{done:%3d / %d, running: %d}", finished, taskCount, started - finished));
146
+ }
147
+
148
+ protected abstract Future<Throwable> startInputTask(ProcessTask task, ProcessState state, int taskIndex);
149
+ }
150
+
151
+ public static class DirectExecutor
152
+ extends AbstractLocalExecutor
153
+ {
154
+ protected final ExecutorService executor;
155
+
156
+ public DirectExecutor(int maxThreads, int taskCount)
157
+ {
158
+ super(taskCount, taskCount);
159
+ this.executor = java.util.concurrent.Executors.newFixedThreadPool(maxThreads,
160
+ new ThreadFactoryBuilder()
161
+ .setNameFormat("embulk-executor-%d")
162
+ .setDaemon(true)
163
+ .build());
164
+ }
165
+
166
+ @Override
167
+ public void close()
168
+ {
169
+ executor.shutdown();
170
+ }
171
+
172
+ @Override
173
+ protected Future<Throwable> startInputTask(final ProcessTask task, final ProcessState state, final int taskIndex)
174
+ {
175
+ if (state.getOutputTaskState(taskIndex).isCommitted()) {
176
+ log.warn("Skipped resumed task {}", taskIndex);
177
+ return null; // resumed
178
+ }
179
+
180
+ return executor.submit(new Callable<Throwable>() {
181
+ public Throwable call()
182
+ {
183
+ try (SetCurrentThreadName dontCare = new SetCurrentThreadName(String.format("task-%04d", taskIndex))) {
184
+ Executors.process(Exec.session(), task, taskIndex, new ProcessStateCallback() {
185
+ public void started()
186
+ {
187
+ state.getInputTaskState(taskIndex).start();
188
+ state.getOutputTaskState(taskIndex).start();
189
+ }
190
+
191
+ public void inputCommitted(TaskReport report)
192
+ {
193
+ state.getInputTaskState(taskIndex).setTaskReport(report);
194
+ }
195
+
196
+ public void outputCommitted(TaskReport report)
197
+ {
198
+ state.getOutputTaskState(taskIndex).setTaskReport(report);
199
+ }
200
+ });
201
+ return null;
202
+ }
203
+ finally {
204
+ state.getInputTaskState(taskIndex).finish();
205
+ state.getOutputTaskState(taskIndex).finish();
206
+ }
207
+ }
208
+ });
209
+ }
84
210
  }
85
211
 
86
- private void showProgress(Logger log, ProcessState state, int taskCount)
212
+ public static class ScatterExecutor
213
+ extends AbstractLocalExecutor
87
214
  {
88
- int started = 0;
89
- int finished = 0;
90
- for (int i=0; i < taskCount; i++) {
91
- if (state.getInputTaskState(i).isStarted()) { started++; }
92
- if (state.getOutputTaskState(i).isFinished()) { finished++; }
215
+ private final int scatterCount;
216
+ private final int inputTaskCount;
217
+ private final ExecutorService inputExecutor;
218
+ private final ExecutorService outputExecutor;
219
+
220
+ public ScatterExecutor(int maxThreads, int inputTaskCount, int scatterCount)
221
+ {
222
+ super(inputTaskCount, inputTaskCount * scatterCount);
223
+ this.inputTaskCount = inputTaskCount;
224
+ this.scatterCount = scatterCount;
225
+ this.inputExecutor = java.util.concurrent.Executors.newFixedThreadPool(
226
+ Math.max(maxThreads / scatterCount, 1),
227
+ new ThreadFactoryBuilder()
228
+ .setNameFormat("embulk-input-executor-%d")
229
+ .setDaemon(true)
230
+ .build());
231
+ this.outputExecutor = java.util.concurrent.Executors.newCachedThreadPool(
232
+ new ThreadFactoryBuilder()
233
+ .setNameFormat("embulk-output-executor-%d")
234
+ .setDaemon(true)
235
+ .build());
236
+ }
237
+
238
+ @Override
239
+ public void close()
240
+ {
241
+ inputExecutor.shutdown();
242
+ outputExecutor.shutdown();
93
243
  }
94
244
 
95
- log.info(String.format("{done:%3d / %d, running: %d}", finished, taskCount, started - finished));
245
+ @Override
246
+ protected Future<Throwable> startInputTask(final ProcessTask task, final ProcessState state, final int taskIndex)
247
+ {
248
+ if(isAllScatterOutputFinished(state, taskIndex)) {
249
+ log.warn("Skipped resumed input task {}", taskIndex);
250
+ return null; // resumed
251
+ }
252
+
253
+ return inputExecutor.submit(new Callable<Throwable>() {
254
+ public Throwable call()
255
+ {
256
+ try (SetCurrentThreadName dontCare = new SetCurrentThreadName(String.format("task-%04d", taskIndex))) {
257
+ runInputTask(Exec.session(), task, state, taskIndex);
258
+ return null;
259
+ }
260
+ }
261
+ });
262
+ }
263
+
264
+ private boolean isAllScatterOutputFinished(ProcessState state, int taskIndex) {
265
+ for (int i = 0; i < scatterCount; i++) {
266
+ int outputTaskIndex = taskIndex * scatterCount + i;
267
+ if (!state.getOutputTaskState(outputTaskIndex).isCommitted()) {
268
+ return false;
269
+ }
270
+ }
271
+ return true;
272
+ }
273
+
274
+ private void runInputTask(ExecSession exec, ProcessTask task, ProcessState state, int taskIndex)
275
+ {
276
+ InputPlugin inputPlugin = exec.newPlugin(InputPlugin.class, task.getInputPluginType());
277
+ List<FilterPlugin> filterPlugins = Filters.newFilterPlugins(exec, task.getFilterPluginTypes());
278
+ OutputPlugin outputPlugin = exec.newPlugin(OutputPlugin.class, task.getOutputPluginType());
279
+
280
+ try (ScatterTransactionalPageOutput tran = new ScatterTransactionalPageOutput(state, taskIndex, scatterCount)) {
281
+ tran.openOutputs(outputPlugin, task.getOutputSchema(), task.getOutputTaskSource());
282
+
283
+ try (AbortTransactionResource aborter = new AbortTransactionResource(tran)) {
284
+ tran.openFilters(filterPlugins, task.getFilterSchemas(), task.getFilterTaskSources());
285
+
286
+ tran.startWorkers(outputExecutor);
287
+
288
+ // started
289
+ state.getInputTaskState(taskIndex).start();
290
+ for (int i = 0; i < scatterCount; i++) {
291
+ state.getOutputTaskState(taskIndex * scatterCount + i).start();
292
+ }
293
+
294
+ TaskReport inputTaskReport = inputPlugin.run(task.getInputTaskSource(), task.getInputSchema(), taskIndex, tran);
295
+
296
+ // inputCommitted
297
+ if (inputTaskReport == null) {
298
+ inputTaskReport = exec.newTaskReport();
299
+ }
300
+ state.getInputTaskState(taskIndex).setTaskReport(inputTaskReport);
301
+
302
+ // outputCommitted
303
+ tran.commit();
304
+ }
305
+ }
306
+ finally {
307
+ state.getInputTaskState(taskIndex).finish();
308
+ state.getOutputTaskState(taskIndex).finish();
309
+ }
310
+ }
96
311
  }
97
312
 
98
- private Future<Throwable> startProcessor(final ProcessTask task, final int taskIndex, final ProcessState state)
313
+ private static class ScatterTransactionalPageOutput
314
+ implements TransactionalPageOutput
99
315
  {
100
- return executor.submit(new Callable<Throwable>() {
101
- public Throwable call()
316
+ private static final Page DONE_PAGE = Page.allocate(0);
317
+
318
+ private static class OutputWorker
319
+ implements Callable<Throwable>
320
+ {
321
+ private final PageOutput output;
322
+ private final Future<Throwable> future;
323
+ private boolean done;
324
+ private Page queued;
325
+
326
+ public OutputWorker(PageOutput output, ExecutorService executor)
102
327
  {
103
- try (SetCurrentThreadName dontCare = new SetCurrentThreadName(String.format("task-%04d", taskIndex))) {
104
- Executors.process(Exec.session(), task, taskIndex, new ProcessStateCallback() {
105
- public void started()
106
- {
107
- state.getInputTaskState(taskIndex).start();
108
- state.getOutputTaskState(taskIndex).start();
109
- }
328
+ this.output = output;
329
+ this.done = done;
330
+ this.future = executor.submit(this);
331
+ }
110
332
 
111
- public void inputCommitted(TaskReport report)
112
- {
113
- state.getInputTaskState(taskIndex).setTaskReport(report);
114
- }
333
+ public synchronized void add(Page page)
334
+ throws InterruptedException
335
+ {
336
+ while (true) {
337
+ if (queued == null) {
338
+ queued = page;
339
+ notifyAll();
340
+ return;
341
+ }
342
+ else if (queued == DONE_PAGE) {
343
+ page.release();
344
+ return;
345
+ }
346
+ wait();
347
+ }
348
+ }
349
+
350
+ public Throwable join()
351
+ throws InterruptedException
352
+ {
353
+ try {
354
+ return future.get();
355
+ }
356
+ catch (ExecutionException ex) {
357
+ return ex.getCause();
358
+ }
359
+ }
115
360
 
116
- public void outputCommitted(TaskReport report)
117
- {
118
- state.getOutputTaskState(taskIndex).setTaskReport(report);
361
+ @Override
362
+ public synchronized Throwable call()
363
+ throws InterruptedException
364
+ {
365
+ try {
366
+ while (true) {
367
+ if (queued != null) {
368
+ if (queued == DONE_PAGE) {
369
+ return null;
370
+ }
371
+ output.add(queued);
372
+ queued = null;
373
+ notifyAll();
374
+ }
375
+ wait();
376
+ }
377
+ }
378
+ finally {
379
+ try {
380
+ if (queued != null && queued != DONE_PAGE) {
381
+ queued.release();
382
+ queued = null;
119
383
  }
120
- });
121
- return null;
122
- } finally {
123
- state.getInputTaskState(taskIndex).finish();
124
- state.getOutputTaskState(taskIndex).finish();
384
+ }
385
+ finally {
386
+ queued = DONE_PAGE;
387
+ }
388
+ notifyAll();
389
+ }
390
+ }
391
+ }
392
+
393
+ private final ProcessState state;
394
+ private final int taskIndex;
395
+ private final int scatterCount;
396
+
397
+ private final TransactionalPageOutput[] trans;
398
+ private final PageOutput[] filtereds;
399
+ private final CloseResource[] closeThese;
400
+
401
+ private final OutputWorker[] outputWorkers;
402
+
403
+ private long pageCount;
404
+
405
+ public ScatterTransactionalPageOutput(ProcessState state, int taskIndex, int scatterCount)
406
+ {
407
+ this.state = state;
408
+ this.taskIndex = taskIndex;
409
+ this.scatterCount = scatterCount;
410
+
411
+ this.trans = new TransactionalPageOutput[scatterCount];
412
+ this.filtereds = new PageOutput[scatterCount];
413
+ this.closeThese = new CloseResource[scatterCount];
414
+ for (int i = 0; i < scatterCount; i++) {
415
+ closeThese[i] = new CloseResource();
416
+ }
417
+ this.outputWorkers = new OutputWorker[scatterCount];
418
+ }
419
+
420
+ public void openOutputs(OutputPlugin outputPlugin, Schema outputSchema, TaskSource outputTaskSource)
421
+ {
422
+ for (int i = 0; i < scatterCount; i++) {
423
+ int outputTaskIndex = taskIndex * scatterCount + i;
424
+ if (!state.getOutputTaskState(outputTaskIndex).isCommitted()) {
425
+ TransactionalPageOutput tran = PluginWrappers.transactionalPageOutput(
426
+ outputPlugin.open(outputTaskSource, outputSchema, outputTaskIndex));
427
+ trans[i] = tran;
428
+ closeThese[i].closeThis(tran);
429
+ }
430
+ }
431
+ }
432
+
433
+ public void openFilters(List<FilterPlugin> filterPlugins, List<Schema> filterSchemas, List<TaskSource> filterTaskSources)
434
+ {
435
+ for (int i = 0; i < scatterCount; i++) {
436
+ TransactionalPageOutput tran = trans[i];
437
+ if (tran != null) {
438
+ PageOutput filtered = Filters.open(filterPlugins, filterTaskSources, filterSchemas, trans[i]);
439
+ filtereds[i] = filtered;
440
+ closeThese[i].closeThis(filtered);
125
441
  }
126
442
  }
127
- });
443
+ }
444
+
445
+ public void startWorkers(ExecutorService outputExecutor)
446
+ {
447
+ for (int i = 0; i < scatterCount; i++) {
448
+ PageOutput filtered = filtereds[i];
449
+ if (filtered != null) {
450
+ outputWorkers[i] = new OutputWorker(filtered, outputExecutor);
451
+ }
452
+ }
453
+ }
454
+
455
+ public void add(Page page)
456
+ {
457
+ OutputWorker worker = outputWorkers[(int) (pageCount % scatterCount)];
458
+ if (worker != null) {
459
+ try {
460
+ worker.add(page);
461
+ }
462
+ catch (InterruptedException ex) {
463
+ throw Throwables.propagate(ex);
464
+ }
465
+ }
466
+ pageCount++;
467
+ }
468
+
469
+ public void finish()
470
+ {
471
+ completeWorkers();
472
+ for (int i = 0; i < scatterCount; i++) {
473
+ if (trans[i] != null) {
474
+ trans[i].finish();
475
+ }
476
+ }
477
+ }
478
+
479
+ public void close()
480
+ {
481
+ completeWorkers();
482
+ for (int i = 0; i < scatterCount; i++) {
483
+ closeThese[i].close();
484
+ }
485
+ }
486
+
487
+ public void abort()
488
+ {
489
+ completeWorkers();
490
+ for (int i = 0; i < scatterCount; i++) {
491
+ if (trans[i] != null) {
492
+ trans[i].abort();
493
+ }
494
+ }
495
+ }
496
+
497
+ public TaskReport commit()
498
+ {
499
+ completeWorkers();
500
+ for (int i = 0; i < scatterCount; i++) {
501
+ if (trans[i] != null) {
502
+ int outputTaskIndex = taskIndex * scatterCount + i;
503
+ TaskReport outputTaskReport = trans[i].commit();
504
+ trans[i] = null; // don't abort
505
+ if (outputTaskReport == null) {
506
+ outputTaskReport = Exec.newTaskReport();
507
+ }
508
+ state.getOutputTaskState(outputTaskIndex).setTaskReport(outputTaskReport);
509
+ }
510
+ }
511
+ return null;
512
+ }
513
+
514
+ public void completeWorkers()
515
+ {
516
+ for (int i = 0; i < scatterCount; i++) {
517
+ OutputWorker worker = outputWorkers[i];
518
+ if (worker != null) {
519
+ try {
520
+ worker.add(DONE_PAGE);
521
+ }
522
+ catch (InterruptedException ex) {
523
+ throw Throwables.propagate(ex);
524
+ }
525
+ Throwable error = null;
526
+ try {
527
+ error = worker.join();
528
+ }
529
+ catch (InterruptedException ex) {
530
+ error = ex;
531
+ }
532
+ if (error != null) {
533
+ throw Throwables.propagate(error);
534
+ }
535
+ }
536
+ }
537
+ }
128
538
  }
129
539
  }