embulk 0.7.11-java → 0.8.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +3 -3
  3. data/README.md +1 -1
  4. data/build.gradle +2 -2
  5. data/embulk-core/build.gradle +2 -0
  6. data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +11 -3
  7. data/embulk-core/src/main/java/org/embulk/config/YamlTagResolver.java +53 -0
  8. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +0 -1
  9. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +479 -69
  10. data/embulk-core/src/main/java/org/embulk/spi/Column.java +3 -0
  11. data/embulk-core/src/main/java/org/embulk/spi/ColumnVisitor.java +2 -0
  12. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +12 -5
  13. data/embulk-core/src/main/java/org/embulk/spi/Page.java +19 -0
  14. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +26 -5
  15. data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +13 -0
  16. data/embulk-core/src/main/java/org/embulk/spi/json/JsonParseException.java +17 -0
  17. data/embulk-core/src/main/java/org/embulk/spi/json/JsonParser.java +125 -0
  18. data/embulk-core/src/main/java/org/embulk/spi/json/RubyValueApi.java +55 -0
  19. data/embulk-core/src/main/java/org/embulk/spi/type/JsonType.java +14 -0
  20. data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +1 -0
  21. data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +2 -0
  22. data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetterFactory.java +6 -0
  23. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +5 -0
  24. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +10 -0
  25. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +3 -0
  26. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/BooleanColumnSetter.java +7 -0
  27. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DefaultValueSetter.java +2 -0
  28. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DoubleColumnSetter.java +7 -0
  29. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/JsonColumnSetter.java +73 -0
  30. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/LongColumnSetter.java +11 -2
  31. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/NullDefaultValueSetter.java +5 -0
  32. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java +5 -0
  33. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/StringColumnSetter.java +7 -0
  34. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/TimestampColumnSetter.java +9 -1
  35. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +7 -0
  36. data/embulk-docs/src/built-in.rst +40 -3
  37. data/embulk-docs/src/conf.py +2 -2
  38. data/embulk-docs/src/release.rst +1 -1
  39. data/embulk-docs/src/release/release-0.8.0.rst +68 -0
  40. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +12 -1
  41. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +18 -0
  42. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +1 -1
  43. data/embulk.gemspec +1 -1
  44. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  45. data/gradle/wrapper/gradle-wrapper.properties +2 -2
  46. data/gradlew +3 -7
  47. data/lib/embulk/column.rb +2 -0
  48. data/lib/embulk/command/embulk_migrate_plugin.rb +76 -10
  49. data/lib/embulk/command/embulk_new_plugin.rb +2 -0
  50. data/lib/embulk/command/embulk_run.rb +17 -10
  51. data/lib/embulk/data/bundle/.ruby-version +1 -1
  52. data/lib/embulk/data/new/java/build.gradle.erb +21 -0
  53. data/lib/embulk/data/new/java/config/checkstyle/checkstyle.xml +128 -0
  54. data/lib/embulk/data/new/java/config/checkstyle/default.xml +108 -0
  55. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
  56. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +2 -2
  57. data/lib/embulk/data/new/java/gradlew +3 -7
  58. data/lib/embulk/data/new/ruby/.ruby-version +1 -1
  59. data/lib/embulk/guess/csv.rb +1 -1
  60. data/lib/embulk/guess/schema_guess.rb +6 -0
  61. data/lib/embulk/guess_plugin.rb +1 -1
  62. data/lib/embulk/java/imports.rb +4 -0
  63. data/lib/embulk/plugin_registry.rb +8 -12
  64. data/lib/embulk/schema.rb +6 -0
  65. data/lib/embulk/version.rb +1 -1
  66. data/test/guess/test_csv_guess.rb +170 -0
  67. data/test/helper.rb +2 -0
  68. metadata +17 -15
  69. data/embulk-core/src/main/java/org/embulk/exec/LocalThreadExecutor.java +0 -34
  70. data/embulk-core/src/main/java/org/embulk/guice/Bootstrap.java +0 -157
  71. data/embulk-core/src/main/java/org/embulk/guice/CloseableInjector.java +0 -22
  72. data/embulk-core/src/main/java/org/embulk/guice/InjectorProxy.java +0 -145
  73. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjector.java +0 -26
  74. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjectorProxy.java +0 -61
  75. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleManager.java +0 -187
  76. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethods.java +0 -89
  77. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethodsMap.java +0 -38
  78. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleModule.java +0 -97
  79. data/embulk-docs/src/release/release-0.7.11.rst +0 -13
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 63d78296ab5c306a09832af9d838a380399c16a7
4
- data.tar.gz: d0a267d4052f88ec276ec8cf0000ce64c07e224e
3
+ metadata.gz: 55c8a96cd529ab0f895063f6d73c36522a96d0be
4
+ data.tar.gz: 6d416a8b3f768a941a5a386107d70893afff1509
5
5
  SHA512:
6
- metadata.gz: e303ef0b199c2ad2246be937103c8c26218635a032e1da7bb6c3798a92eca13c7fdbc91b7612c7b97d15506282b060892db5c4b03bf9206b804bbd3d8ca055ad
7
- data.tar.gz: 48b676b557f9be90f915bded8b6589f39671a24584e41989d436a3f7f746dc46aae79977ba364b1bfaef23e3593b842078d6ce00b517a367712c43946b599ba0
6
+ metadata.gz: fb9a45ea1d54286c3626ab98055783043b439e6a0e34380d3431cc8ef30bb3cb7f7043f70714f2929ef10421acc5bfdf795f05b481a419896a93efa17a5da535
7
+ data.tar.gz: 8947eb80b213318118325705087655d33c05b53a5861dccfc8d79d0be16f6c01612755bc6daed6a2f17790dcbadbed80d22653877a245ca434f171ca90869658
@@ -1,13 +1,13 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- embulk (0.7.9)
5
- jruby-jars (= 9.0.0.0)
4
+ embulk (0.8.0)
5
+ jruby-jars (= 9.0.4.0)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
- jruby-jars (9.0.0.0)
10
+ jruby-jars (9.0.4.0)
11
11
  kramdown (1.5.0)
12
12
  power_assert (0.2.2)
13
13
  rake (10.4.2)
data/README.md CHANGED
@@ -110,7 +110,7 @@ To use the bundle, add `-b <bundle_dir>` option to `guess`, `preview`, or `run`
110
110
  See the generated \<bundle_dir>/Gemfile file how to plugin bundles work.
111
111
 
112
112
  ```
113
- embulk mkbundle ./embulk_bundle
113
+ embulk mkbundle ./embulk_bundle # please edit ./embulk_bundle/Gemfile to add plugins. Detailed usage is written in the Gemfile
114
114
  embulk guess -b ./embulk_bundle ...
115
115
  embulk run -b ./embulk_bundle ...
116
116
  ```
@@ -16,10 +16,10 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
16
16
 
17
17
  allprojects {
18
18
  group = 'org.embulk'
19
- version = '0.7.11'
19
+ version = '0.8.0'
20
20
 
21
21
  ext {
22
- jrubyVersion = '9.0.0.0'
22
+ jrubyVersion = '9.0.4.0'
23
23
  }
24
24
 
25
25
  apply plugin: 'java'
@@ -16,6 +16,7 @@ import com.github.jrubygradle.JRubyPrepare
16
16
 
17
17
  // determine which dependencies have updates: $ gradle dependencyUpdates
18
18
  dependencies {
19
+ compile 'org.embulk:guice-bootstrap:0.1.1'
19
20
  compile 'com.google.guava:guava:18.0'
20
21
  compile 'com.google.inject:guice:4.0'
21
22
  compile 'com.google.inject.extensions:guice-multibindings:4.0'
@@ -37,6 +38,7 @@ dependencies {
37
38
  compile 'joda-time:joda-time:2.8.1'
38
39
  compile 'io.netty:netty-buffer:5.0.0.Alpha1'
39
40
  compile 'org.fusesource.jansi:jansi:1.11'
41
+ compile 'org.msgpack:msgpack-core:0.8.1'
40
42
 
41
43
  // For embulk/guess/charset.rb. See also embulk.gemspec
42
44
  compile 'com.ibm.icu:icu4j:54.1.1'
@@ -15,6 +15,9 @@ import com.fasterxml.jackson.databind.RuntimeJsonMappingException;
15
15
  import com.fasterxml.jackson.databind.node.ObjectNode;
16
16
  import com.fasterxml.jackson.databind.node.JsonNodeFactory;
17
17
  import org.yaml.snakeyaml.Yaml;
18
+ import org.yaml.snakeyaml.DumperOptions;
19
+ import org.yaml.snakeyaml.representer.Representer;
20
+ import org.yaml.snakeyaml.constructor.SafeConstructor;
18
21
 
19
22
  public class ConfigLoader
20
23
  {
@@ -60,7 +63,7 @@ public class ConfigLoader
60
63
 
61
64
  public ConfigSource fromYamlString(String string)
62
65
  {
63
- JsonNode node = objectToJson(new Yaml().load(string));
66
+ JsonNode node = objectToJson(newYaml().load(string));
64
67
  validateJsonNode(node);
65
68
  return new DataSourceImpl(model, (ObjectNode) node);
66
69
  }
@@ -74,7 +77,7 @@ public class ConfigLoader
74
77
 
75
78
  public ConfigSource fromYaml(InputStream stream) throws IOException
76
79
  {
77
- JsonNode node = objectToJson(new Yaml().load(stream));
80
+ JsonNode node = objectToJson(newYaml().load(stream));
78
81
  validateJsonNode(node);
79
82
  return new DataSourceImpl(model, (ObjectNode) node);
80
83
  }
@@ -107,7 +110,7 @@ public class ConfigLoader
107
110
  {
108
111
  ObjectNode source = new ObjectNode(JsonNodeFactory.instance);
109
112
  DataSource ds = new DataSourceImpl(model, source);
110
- Yaml yaml = new Yaml();
113
+ Yaml yaml = newYaml();
111
114
  for (Map.Entry<String, String> pair : props.entrySet()) {
112
115
  if (!pair.getKey().startsWith(keyPrefix)) {
113
116
  continue;
@@ -138,4 +141,9 @@ public class ConfigLoader
138
141
  throw new RuntimeException(ex);
139
142
  }
140
143
  }
144
+
145
+ private Yaml newYaml()
146
+ {
147
+ return new Yaml(new SafeConstructor(), new Representer(), new DumperOptions(), new YamlTagResolver());
148
+ }
141
149
  }
@@ -0,0 +1,53 @@
1
+ package org.embulk.config;
2
+
3
+ import java.util.List;
4
+ import java.util.regex.Pattern;
5
+ import org.yaml.snakeyaml.resolver.Resolver;
6
+ import org.yaml.snakeyaml.nodes.Tag;
7
+ import org.yaml.snakeyaml.nodes.NodeId;
8
+
9
+ public class YamlTagResolver
10
+ extends Resolver
11
+ {
12
+ // Resolver converts a node (scalar, sequence, map, or !!tag with them)
13
+ // to a tag (INT, FLOAT, STR, SEQ, MAP, ...). For example, converting
14
+ // "123" (scalar) to 123 (INT), or "true" (scalar) to true (BOOL).
15
+ // This is called by snakeyaml Composer which converts parser events
16
+ // into an object.
17
+ //
18
+ // jackson-dataformat-yaml doesn't use this because it traverses parser
19
+ // events without using Composer.
20
+
21
+ public static final Pattern FLOAT_EXCEPTING_ZERO_START = Pattern
22
+ .compile("^([-+]?(\\.[0-9]+|[1-9][0-9_]*(\\.[0-9_]*)?)([eE][-+]?[0-9]+)?|[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*|[-+]?\\.(?:inf|Inf|INF)|\\.(?:nan|NaN|NAN))$");
23
+
24
+ @Override
25
+ public void addImplicitResolver(Tag tag, Pattern regexp, String first)
26
+ {
27
+ // This method is called by constructor through addImplicitResolvers
28
+ // to setup default implicit resolvers.
29
+
30
+ if (tag.equals(Tag.FLOAT)) {
31
+ super.addImplicitResolver(Tag.FLOAT, FLOAT_EXCEPTING_ZERO_START, "-+0123456789.");
32
+ }
33
+ else if (tag.equals(Tag.BOOL)) {
34
+ // use stricter rule (reject 'On', 'Off', 'Yes', 'No')
35
+ super.addImplicitResolver(Tag.BOOL, Pattern.compile("^(?:[Tt]rue|[Ff]alse)$"), "TtFf");
36
+ }
37
+ else if (tag.equals(Tag.TIMESTAMP)) {
38
+ // This solves some unexpected behavior that snakeyaml
39
+ // deserializes "2015-01-01 00:00:00" to java.util.Date
40
+ // but jackson serializes java.util.Date to an integer.
41
+ return;
42
+ }
43
+ else {
44
+ super.addImplicitResolver(tag, regexp, first);
45
+ }
46
+ }
47
+
48
+ @Override
49
+ public Tag resolve(NodeId kind, String value, boolean implicit)
50
+ {
51
+ return super.resolve(kind, value, implicit); // checks implicit resolvers
52
+ }
53
+ }
@@ -36,7 +36,6 @@ public class ExecModule
36
36
  registerPluginTo(binder, ParserPlugin.class, "system_sampling", SamplingParserPlugin.class);
37
37
 
38
38
  // LocalExecutorPlugin
39
- binder.bind(LocalThreadExecutor.class).in(Scopes.SINGLETON);
40
39
  registerPluginTo(binder, ExecutorPlugin.class, "local", LocalExecutorPlugin.class);
41
40
 
42
41
  // serde
@@ -6,124 +6,534 @@ import java.util.concurrent.Callable;
6
6
  import java.util.concurrent.Future;
7
7
  import java.util.concurrent.ExecutorService;
8
8
  import java.util.concurrent.ExecutionException;
9
+ import com.google.common.base.Throwables;
10
+ import com.google.common.util.concurrent.ThreadFactoryBuilder;
9
11
  import org.slf4j.Logger;
10
12
  import com.google.inject.Inject;
11
13
  import org.embulk.config.ConfigSource;
14
+ import org.embulk.config.TaskSource;
12
15
  import org.embulk.config.TaskReport;
13
16
  import org.embulk.spi.Exec;
17
+ import org.embulk.spi.ExecSession;
14
18
  import org.embulk.spi.ExecutorPlugin;
15
19
  import org.embulk.spi.ProcessTask;
16
20
  import org.embulk.spi.ProcessState;
17
21
  import org.embulk.spi.Schema;
22
+ import org.embulk.spi.InputPlugin;
23
+ import org.embulk.spi.FilterPlugin;
24
+ import org.embulk.spi.OutputPlugin;
25
+ import org.embulk.spi.Page;
26
+ import org.embulk.spi.PageOutput;
27
+ import org.embulk.spi.AbortTransactionResource;
28
+ import org.embulk.spi.CloseResource;
29
+ import org.embulk.spi.TransactionalPageOutput;
30
+ import org.embulk.plugin.compat.PluginWrappers;
31
+ import org.embulk.spi.util.Filters;
18
32
  import org.embulk.spi.util.Executors;
19
33
  import org.embulk.spi.util.Executors.ProcessStateCallback;
20
34
 
21
35
  public class LocalExecutorPlugin
22
36
  implements ExecutorPlugin
23
37
  {
24
- private final ExecutorService executor;
38
+ private int defaultMaxThreads;
39
+ private int defaultMinThreads;
25
40
 
26
41
  @Inject
27
- public LocalExecutorPlugin(LocalThreadExecutor executor)
42
+ public LocalExecutorPlugin(@ForSystemConfig ConfigSource systemConfig)
28
43
  {
29
- this.executor = executor.getExecutorService();
44
+ int cores = Runtime.getRuntime().availableProcessors();
45
+ this.defaultMaxThreads = systemConfig.get(Integer.class, "max_threads", cores * 2);
46
+ this.defaultMinThreads = systemConfig.get(Integer.class, "min_output_tasks", cores);
30
47
  }
31
48
 
32
49
  @Override
33
- public void transaction(ConfigSource config, Schema outputSchema, final int inputTaskCount,
50
+ public void transaction(ConfigSource config, Schema outputSchema, int inputTaskCount,
34
51
  ExecutorPlugin.Control control)
35
52
  {
36
- control.transaction(outputSchema, inputTaskCount, new Executor() {
37
- public void execute(ProcessTask task, ProcessState state)
38
- {
39
- localExecute(task, inputTaskCount, state);
40
- }
41
- });
53
+ try (AbstractLocalExecutor exec = newExecutor(config, inputTaskCount)) {
54
+ control.transaction(outputSchema, exec.getOutputTaskCount(), exec);
55
+ }
42
56
  }
43
57
 
44
- private void localExecute(ProcessTask task, int taskCount, ProcessState state)
58
+ private AbstractLocalExecutor newExecutor(ConfigSource config, int inputTaskCount)
45
59
  {
46
60
  Logger log = Exec.getLogger(LocalExecutorPlugin.class);
61
+ int maxThreads = config.get(Integer.class, "max_threads", defaultMaxThreads);
62
+ int minThreads = config.get(Integer.class, "min_output_tasks", defaultMinThreads);
63
+ if (inputTaskCount < minThreads) {
64
+ int scatterCount = (minThreads + inputTaskCount - 1) / inputTaskCount;
65
+ log.info("Using local thread executor with max_threads={} / output tasks {} = input tasks {} * {}",
66
+ maxThreads, inputTaskCount * scatterCount, inputTaskCount, scatterCount);
67
+ return new ScatterExecutor(maxThreads, inputTaskCount, scatterCount);
68
+ }
69
+ else {
70
+ log.info("Using local thread executor with max_threads={} / tasks={}", maxThreads, inputTaskCount);
71
+ return new DirectExecutor(maxThreads, inputTaskCount);
72
+ }
73
+ }
47
74
 
48
- state.initialize(taskCount, taskCount);
75
+ private static abstract class AbstractLocalExecutor
76
+ implements Executor, AutoCloseable
77
+ {
78
+ protected final Logger log = Exec.getLogger(LocalExecutorPlugin.class);
49
79
 
50
- List<Future<Throwable>> futures = new ArrayList<>(taskCount);
51
- try {
52
- for (int i=0; i < taskCount; i++) {
53
- if (state.getOutputTaskState(i).isCommitted()) {
54
- log.warn("Skipped resumed task {}", i);
55
- futures.add(null); // resumed
56
- } else {
57
- futures.add(startProcessor(task, i, state));
58
- }
59
- }
60
- showProgress(log, state, taskCount);
80
+ protected final int inputTaskCount;
81
+ protected final int outputTaskCount;
82
+
83
+ public AbstractLocalExecutor(int inputTaskCount, int outputTaskCount)
84
+ {
85
+ this.inputTaskCount = inputTaskCount;
86
+ this.outputTaskCount = outputTaskCount;
87
+ }
88
+
89
+ public int getOutputTaskCount()
90
+ {
91
+ return outputTaskCount;
92
+ }
93
+
94
+ @Override
95
+ public void execute(ProcessTask task, ProcessState state)
96
+ {
97
+ state.initialize(inputTaskCount, outputTaskCount);
61
98
 
62
- for (int i=0; i < taskCount; i++) {
63
- if (futures.get(i) == null) {
64
- continue;
99
+ List<Future<Throwable>> futures = new ArrayList<>(inputTaskCount);
100
+ try {
101
+ for (int i = 0; i < inputTaskCount; i++) {
102
+ futures.add(startInputTask(task, state, i));
65
103
  }
66
- try {
67
- state.getInputTaskState(i).setException(futures.get(i).get());
68
- } catch (ExecutionException ex) {
69
- state.getInputTaskState(i).setException(ex.getCause());
70
- //Throwables.propagate(ex.getCause());
71
- } catch (InterruptedException ex) {
72
- state.getInputTaskState(i).setException(new ExecutionInterruptedException(ex));
104
+ showProgress(state, inputTaskCount);
105
+
106
+ for (int i = 0; i < inputTaskCount; i++) {
107
+ if (futures.get(i) == null) {
108
+ continue;
109
+ }
110
+ try {
111
+ state.getInputTaskState(i).setException(futures.get(i).get());
112
+ }
113
+ catch (ExecutionException ex) {
114
+ state.getInputTaskState(i).setException(ex.getCause());
115
+ //Throwables.propagate(ex.getCause());
116
+ }
117
+ catch (InterruptedException ex) {
118
+ state.getInputTaskState(i).setException(new ExecutionInterruptedException(ex));
119
+ }
120
+ showProgress(state, inputTaskCount);
73
121
  }
74
- showProgress(log, state, taskCount);
75
122
  }
76
- } finally {
77
- for (Future<Throwable> future : futures) {
78
- if (future != null && !future.isDone()) {
79
- future.cancel(true);
80
- // TODO join?
123
+ finally {
124
+ for (Future<Throwable> future : futures) {
125
+ if (future != null && !future.isDone()) {
126
+ future.cancel(true);
127
+ // TODO join?
128
+ }
81
129
  }
82
130
  }
83
131
  }
132
+
133
+ @Override
134
+ public abstract void close();
135
+
136
+ private void showProgress(ProcessState state, int taskCount)
137
+ {
138
+ int started = 0;
139
+ int finished = 0;
140
+ for (int i = 0; i < taskCount; i++) {
141
+ if (state.getOutputTaskState(i).isStarted()) { started++; }
142
+ if (state.getOutputTaskState(i).isFinished()) { finished++; }
143
+ }
144
+
145
+ log.info(String.format("{done:%3d / %d, running: %d}", finished, taskCount, started - finished));
146
+ }
147
+
148
+ protected abstract Future<Throwable> startInputTask(ProcessTask task, ProcessState state, int taskIndex);
149
+ }
150
+
151
+ public static class DirectExecutor
152
+ extends AbstractLocalExecutor
153
+ {
154
+ protected final ExecutorService executor;
155
+
156
+ public DirectExecutor(int maxThreads, int taskCount)
157
+ {
158
+ super(taskCount, taskCount);
159
+ this.executor = java.util.concurrent.Executors.newFixedThreadPool(maxThreads,
160
+ new ThreadFactoryBuilder()
161
+ .setNameFormat("embulk-executor-%d")
162
+ .setDaemon(true)
163
+ .build());
164
+ }
165
+
166
+ @Override
167
+ public void close()
168
+ {
169
+ executor.shutdown();
170
+ }
171
+
172
+ @Override
173
+ protected Future<Throwable> startInputTask(final ProcessTask task, final ProcessState state, final int taskIndex)
174
+ {
175
+ if (state.getOutputTaskState(taskIndex).isCommitted()) {
176
+ log.warn("Skipped resumed task {}", taskIndex);
177
+ return null; // resumed
178
+ }
179
+
180
+ return executor.submit(new Callable<Throwable>() {
181
+ public Throwable call()
182
+ {
183
+ try (SetCurrentThreadName dontCare = new SetCurrentThreadName(String.format("task-%04d", taskIndex))) {
184
+ Executors.process(Exec.session(), task, taskIndex, new ProcessStateCallback() {
185
+ public void started()
186
+ {
187
+ state.getInputTaskState(taskIndex).start();
188
+ state.getOutputTaskState(taskIndex).start();
189
+ }
190
+
191
+ public void inputCommitted(TaskReport report)
192
+ {
193
+ state.getInputTaskState(taskIndex).setTaskReport(report);
194
+ }
195
+
196
+ public void outputCommitted(TaskReport report)
197
+ {
198
+ state.getOutputTaskState(taskIndex).setTaskReport(report);
199
+ }
200
+ });
201
+ return null;
202
+ }
203
+ finally {
204
+ state.getInputTaskState(taskIndex).finish();
205
+ state.getOutputTaskState(taskIndex).finish();
206
+ }
207
+ }
208
+ });
209
+ }
84
210
  }
85
211
 
86
- private void showProgress(Logger log, ProcessState state, int taskCount)
212
+ public static class ScatterExecutor
213
+ extends AbstractLocalExecutor
87
214
  {
88
- int started = 0;
89
- int finished = 0;
90
- for (int i=0; i < taskCount; i++) {
91
- if (state.getInputTaskState(i).isStarted()) { started++; }
92
- if (state.getOutputTaskState(i).isFinished()) { finished++; }
215
+ private final int scatterCount;
216
+ private final int inputTaskCount;
217
+ private final ExecutorService inputExecutor;
218
+ private final ExecutorService outputExecutor;
219
+
220
+ public ScatterExecutor(int maxThreads, int inputTaskCount, int scatterCount)
221
+ {
222
+ super(inputTaskCount, inputTaskCount * scatterCount);
223
+ this.inputTaskCount = inputTaskCount;
224
+ this.scatterCount = scatterCount;
225
+ this.inputExecutor = java.util.concurrent.Executors.newFixedThreadPool(
226
+ Math.max(maxThreads / scatterCount, 1),
227
+ new ThreadFactoryBuilder()
228
+ .setNameFormat("embulk-input-executor-%d")
229
+ .setDaemon(true)
230
+ .build());
231
+ this.outputExecutor = java.util.concurrent.Executors.newCachedThreadPool(
232
+ new ThreadFactoryBuilder()
233
+ .setNameFormat("embulk-output-executor-%d")
234
+ .setDaemon(true)
235
+ .build());
236
+ }
237
+
238
+ @Override
239
+ public void close()
240
+ {
241
+ inputExecutor.shutdown();
242
+ outputExecutor.shutdown();
93
243
  }
94
244
 
95
- log.info(String.format("{done:%3d / %d, running: %d}", finished, taskCount, started - finished));
245
+ @Override
246
+ protected Future<Throwable> startInputTask(final ProcessTask task, final ProcessState state, final int taskIndex)
247
+ {
248
+ if(isAllScatterOutputFinished(state, taskIndex)) {
249
+ log.warn("Skipped resumed input task {}", taskIndex);
250
+ return null; // resumed
251
+ }
252
+
253
+ return inputExecutor.submit(new Callable<Throwable>() {
254
+ public Throwable call()
255
+ {
256
+ try (SetCurrentThreadName dontCare = new SetCurrentThreadName(String.format("task-%04d", taskIndex))) {
257
+ runInputTask(Exec.session(), task, state, taskIndex);
258
+ return null;
259
+ }
260
+ }
261
+ });
262
+ }
263
+
264
+ private boolean isAllScatterOutputFinished(ProcessState state, int taskIndex) {
265
+ for (int i = 0; i < scatterCount; i++) {
266
+ int outputTaskIndex = taskIndex * scatterCount + i;
267
+ if (!state.getOutputTaskState(outputTaskIndex).isCommitted()) {
268
+ return false;
269
+ }
270
+ }
271
+ return true;
272
+ }
273
+
274
+ private void runInputTask(ExecSession exec, ProcessTask task, ProcessState state, int taskIndex)
275
+ {
276
+ InputPlugin inputPlugin = exec.newPlugin(InputPlugin.class, task.getInputPluginType());
277
+ List<FilterPlugin> filterPlugins = Filters.newFilterPlugins(exec, task.getFilterPluginTypes());
278
+ OutputPlugin outputPlugin = exec.newPlugin(OutputPlugin.class, task.getOutputPluginType());
279
+
280
+ try (ScatterTransactionalPageOutput tran = new ScatterTransactionalPageOutput(state, taskIndex, scatterCount)) {
281
+ tran.openOutputs(outputPlugin, task.getOutputSchema(), task.getOutputTaskSource());
282
+
283
+ try (AbortTransactionResource aborter = new AbortTransactionResource(tran)) {
284
+ tran.openFilters(filterPlugins, task.getFilterSchemas(), task.getFilterTaskSources());
285
+
286
+ tran.startWorkers(outputExecutor);
287
+
288
+ // started
289
+ state.getInputTaskState(taskIndex).start();
290
+ for (int i = 0; i < scatterCount; i++) {
291
+ state.getOutputTaskState(taskIndex * scatterCount + i).start();
292
+ }
293
+
294
+ TaskReport inputTaskReport = inputPlugin.run(task.getInputTaskSource(), task.getInputSchema(), taskIndex, tran);
295
+
296
+ // inputCommitted
297
+ if (inputTaskReport == null) {
298
+ inputTaskReport = exec.newTaskReport();
299
+ }
300
+ state.getInputTaskState(taskIndex).setTaskReport(inputTaskReport);
301
+
302
+ // outputCommitted
303
+ tran.commit();
304
+ }
305
+ }
306
+ finally {
307
+ state.getInputTaskState(taskIndex).finish();
308
+ state.getOutputTaskState(taskIndex).finish();
309
+ }
310
+ }
96
311
  }
97
312
 
98
- private Future<Throwable> startProcessor(final ProcessTask task, final int taskIndex, final ProcessState state)
313
+ private static class ScatterTransactionalPageOutput
314
+ implements TransactionalPageOutput
99
315
  {
100
- return executor.submit(new Callable<Throwable>() {
101
- public Throwable call()
316
+ private static final Page DONE_PAGE = Page.allocate(0);
317
+
318
+ private static class OutputWorker
319
+ implements Callable<Throwable>
320
+ {
321
+ private final PageOutput output;
322
+ private final Future<Throwable> future;
323
+ private boolean done;
324
+ private Page queued;
325
+
326
+ public OutputWorker(PageOutput output, ExecutorService executor)
102
327
  {
103
- try (SetCurrentThreadName dontCare = new SetCurrentThreadName(String.format("task-%04d", taskIndex))) {
104
- Executors.process(Exec.session(), task, taskIndex, new ProcessStateCallback() {
105
- public void started()
106
- {
107
- state.getInputTaskState(taskIndex).start();
108
- state.getOutputTaskState(taskIndex).start();
109
- }
328
+ this.output = output;
329
+ this.done = done;
330
+ this.future = executor.submit(this);
331
+ }
110
332
 
111
- public void inputCommitted(TaskReport report)
112
- {
113
- state.getInputTaskState(taskIndex).setTaskReport(report);
114
- }
333
+ public synchronized void add(Page page)
334
+ throws InterruptedException
335
+ {
336
+ while (true) {
337
+ if (queued == null) {
338
+ queued = page;
339
+ notifyAll();
340
+ return;
341
+ }
342
+ else if (queued == DONE_PAGE) {
343
+ page.release();
344
+ return;
345
+ }
346
+ wait();
347
+ }
348
+ }
349
+
350
+ public Throwable join()
351
+ throws InterruptedException
352
+ {
353
+ try {
354
+ return future.get();
355
+ }
356
+ catch (ExecutionException ex) {
357
+ return ex.getCause();
358
+ }
359
+ }
115
360
 
116
- public void outputCommitted(TaskReport report)
117
- {
118
- state.getOutputTaskState(taskIndex).setTaskReport(report);
361
+ @Override
362
+ public synchronized Throwable call()
363
+ throws InterruptedException
364
+ {
365
+ try {
366
+ while (true) {
367
+ if (queued != null) {
368
+ if (queued == DONE_PAGE) {
369
+ return null;
370
+ }
371
+ output.add(queued);
372
+ queued = null;
373
+ notifyAll();
374
+ }
375
+ wait();
376
+ }
377
+ }
378
+ finally {
379
+ try {
380
+ if (queued != null && queued != DONE_PAGE) {
381
+ queued.release();
382
+ queued = null;
119
383
  }
120
- });
121
- return null;
122
- } finally {
123
- state.getInputTaskState(taskIndex).finish();
124
- state.getOutputTaskState(taskIndex).finish();
384
+ }
385
+ finally {
386
+ queued = DONE_PAGE;
387
+ }
388
+ notifyAll();
389
+ }
390
+ }
391
+ }
392
+
393
+ private final ProcessState state;
394
+ private final int taskIndex;
395
+ private final int scatterCount;
396
+
397
+ private final TransactionalPageOutput[] trans;
398
+ private final PageOutput[] filtereds;
399
+ private final CloseResource[] closeThese;
400
+
401
+ private final OutputWorker[] outputWorkers;
402
+
403
+ private long pageCount;
404
+
405
+ public ScatterTransactionalPageOutput(ProcessState state, int taskIndex, int scatterCount)
406
+ {
407
+ this.state = state;
408
+ this.taskIndex = taskIndex;
409
+ this.scatterCount = scatterCount;
410
+
411
+ this.trans = new TransactionalPageOutput[scatterCount];
412
+ this.filtereds = new PageOutput[scatterCount];
413
+ this.closeThese = new CloseResource[scatterCount];
414
+ for (int i = 0; i < scatterCount; i++) {
415
+ closeThese[i] = new CloseResource();
416
+ }
417
+ this.outputWorkers = new OutputWorker[scatterCount];
418
+ }
419
+
420
+ public void openOutputs(OutputPlugin outputPlugin, Schema outputSchema, TaskSource outputTaskSource)
421
+ {
422
+ for (int i = 0; i < scatterCount; i++) {
423
+ int outputTaskIndex = taskIndex * scatterCount + i;
424
+ if (!state.getOutputTaskState(outputTaskIndex).isCommitted()) {
425
+ TransactionalPageOutput tran = PluginWrappers.transactionalPageOutput(
426
+ outputPlugin.open(outputTaskSource, outputSchema, outputTaskIndex));
427
+ trans[i] = tran;
428
+ closeThese[i].closeThis(tran);
429
+ }
430
+ }
431
+ }
432
+
433
+ public void openFilters(List<FilterPlugin> filterPlugins, List<Schema> filterSchemas, List<TaskSource> filterTaskSources)
434
+ {
435
+ for (int i = 0; i < scatterCount; i++) {
436
+ TransactionalPageOutput tran = trans[i];
437
+ if (tran != null) {
438
+ PageOutput filtered = Filters.open(filterPlugins, filterTaskSources, filterSchemas, trans[i]);
439
+ filtereds[i] = filtered;
440
+ closeThese[i].closeThis(filtered);
125
441
  }
126
442
  }
127
- });
443
+ }
444
+
445
+ public void startWorkers(ExecutorService outputExecutor)
446
+ {
447
+ for (int i = 0; i < scatterCount; i++) {
448
+ PageOutput filtered = filtereds[i];
449
+ if (filtered != null) {
450
+ outputWorkers[i] = new OutputWorker(filtered, outputExecutor);
451
+ }
452
+ }
453
+ }
454
+
455
+ public void add(Page page)
456
+ {
457
+ OutputWorker worker = outputWorkers[(int) (pageCount % scatterCount)];
458
+ if (worker != null) {
459
+ try {
460
+ worker.add(page);
461
+ }
462
+ catch (InterruptedException ex) {
463
+ throw Throwables.propagate(ex);
464
+ }
465
+ }
466
+ pageCount++;
467
+ }
468
+
469
+ public void finish()
470
+ {
471
+ completeWorkers();
472
+ for (int i = 0; i < scatterCount; i++) {
473
+ if (trans[i] != null) {
474
+ trans[i].finish();
475
+ }
476
+ }
477
+ }
478
+
479
+ public void close()
480
+ {
481
+ completeWorkers();
482
+ for (int i = 0; i < scatterCount; i++) {
483
+ closeThese[i].close();
484
+ }
485
+ }
486
+
487
+ public void abort()
488
+ {
489
+ completeWorkers();
490
+ for (int i = 0; i < scatterCount; i++) {
491
+ if (trans[i] != null) {
492
+ trans[i].abort();
493
+ }
494
+ }
495
+ }
496
+
497
+ public TaskReport commit()
498
+ {
499
+ completeWorkers();
500
+ for (int i = 0; i < scatterCount; i++) {
501
+ if (trans[i] != null) {
502
+ int outputTaskIndex = taskIndex * scatterCount + i;
503
+ TaskReport outputTaskReport = trans[i].commit();
504
+ trans[i] = null; // don't abort
505
+ if (outputTaskReport == null) {
506
+ outputTaskReport = Exec.newTaskReport();
507
+ }
508
+ state.getOutputTaskState(outputTaskIndex).setTaskReport(outputTaskReport);
509
+ }
510
+ }
511
+ return null;
512
+ }
513
+
514
+ public void completeWorkers()
515
+ {
516
+ for (int i = 0; i < scatterCount; i++) {
517
+ OutputWorker worker = outputWorkers[i];
518
+ if (worker != null) {
519
+ try {
520
+ worker.add(DONE_PAGE);
521
+ }
522
+ catch (InterruptedException ex) {
523
+ throw Throwables.propagate(ex);
524
+ }
525
+ Throwable error = null;
526
+ try {
527
+ error = worker.join();
528
+ }
529
+ catch (InterruptedException ex) {
530
+ error = ex;
531
+ }
532
+ if (error != null) {
533
+ throw Throwables.propagate(error);
534
+ }
535
+ }
536
+ }
537
+ }
128
538
  }
129
539
  }