embulk 0.7.11-java → 0.8.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -3
- data/README.md +1 -1
- data/build.gradle +2 -2
- data/embulk-core/build.gradle +2 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +11 -3
- data/embulk-core/src/main/java/org/embulk/config/YamlTagResolver.java +53 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +0 -1
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +479 -69
- data/embulk-core/src/main/java/org/embulk/spi/Column.java +3 -0
- data/embulk-core/src/main/java/org/embulk/spi/ColumnVisitor.java +2 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +12 -5
- data/embulk-core/src/main/java/org/embulk/spi/Page.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +26 -5
- data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +13 -0
- data/embulk-core/src/main/java/org/embulk/spi/json/JsonParseException.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/json/JsonParser.java +125 -0
- data/embulk-core/src/main/java/org/embulk/spi/json/RubyValueApi.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/JsonType.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +1 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +2 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetterFactory.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +5 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +10 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +3 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/BooleanColumnSetter.java +7 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DefaultValueSetter.java +2 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DoubleColumnSetter.java +7 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/JsonColumnSetter.java +73 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/LongColumnSetter.java +11 -2
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/NullDefaultValueSetter.java +5 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java +5 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/StringColumnSetter.java +7 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/TimestampColumnSetter.java +9 -1
- data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +7 -0
- data/embulk-docs/src/built-in.rst +40 -3
- data/embulk-docs/src/conf.py +2 -2
- data/embulk-docs/src/release.rst +1 -1
- data/embulk-docs/src/release/release-0.8.0.rst +68 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +12 -1
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +18 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +1 -1
- data/embulk.gemspec +1 -1
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/gradlew +3 -7
- data/lib/embulk/column.rb +2 -0
- data/lib/embulk/command/embulk_migrate_plugin.rb +76 -10
- data/lib/embulk/command/embulk_new_plugin.rb +2 -0
- data/lib/embulk/command/embulk_run.rb +17 -10
- data/lib/embulk/data/bundle/.ruby-version +1 -1
- data/lib/embulk/data/new/java/build.gradle.erb +21 -0
- data/lib/embulk/data/new/java/config/checkstyle/checkstyle.xml +128 -0
- data/lib/embulk/data/new/java/config/checkstyle/default.xml +108 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/lib/embulk/data/new/java/gradlew +3 -7
- data/lib/embulk/data/new/ruby/.ruby-version +1 -1
- data/lib/embulk/guess/csv.rb +1 -1
- data/lib/embulk/guess/schema_guess.rb +6 -0
- data/lib/embulk/guess_plugin.rb +1 -1
- data/lib/embulk/java/imports.rb +4 -0
- data/lib/embulk/plugin_registry.rb +8 -12
- data/lib/embulk/schema.rb +6 -0
- data/lib/embulk/version.rb +1 -1
- data/test/guess/test_csv_guess.rb +170 -0
- data/test/helper.rb +2 -0
- metadata +17 -15
- data/embulk-core/src/main/java/org/embulk/exec/LocalThreadExecutor.java +0 -34
- data/embulk-core/src/main/java/org/embulk/guice/Bootstrap.java +0 -157
- data/embulk-core/src/main/java/org/embulk/guice/CloseableInjector.java +0 -22
- data/embulk-core/src/main/java/org/embulk/guice/InjectorProxy.java +0 -145
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjector.java +0 -26
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjectorProxy.java +0 -61
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleManager.java +0 -187
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethods.java +0 -89
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethodsMap.java +0 -38
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleModule.java +0 -97
- data/embulk-docs/src/release/release-0.7.11.rst +0 -13
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 55c8a96cd529ab0f895063f6d73c36522a96d0be
|
|
4
|
+
data.tar.gz: 6d416a8b3f768a941a5a386107d70893afff1509
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: fb9a45ea1d54286c3626ab98055783043b439e6a0e34380d3431cc8ef30bb3cb7f7043f70714f2929ef10421acc5bfdf795f05b481a419896a93efa17a5da535
|
|
7
|
+
data.tar.gz: 8947eb80b213318118325705087655d33c05b53a5861dccfc8d79d0be16f6c01612755bc6daed6a2f17790dcbadbed80d22653877a245ca434f171ca90869658
|
data/Gemfile.lock
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
embulk (0.
|
|
5
|
-
jruby-jars (= 9.0.
|
|
4
|
+
embulk (0.8.0)
|
|
5
|
+
jruby-jars (= 9.0.4.0)
|
|
6
6
|
|
|
7
7
|
GEM
|
|
8
8
|
remote: https://rubygems.org/
|
|
9
9
|
specs:
|
|
10
|
-
jruby-jars (9.0.
|
|
10
|
+
jruby-jars (9.0.4.0)
|
|
11
11
|
kramdown (1.5.0)
|
|
12
12
|
power_assert (0.2.2)
|
|
13
13
|
rake (10.4.2)
|
data/README.md
CHANGED
|
@@ -110,7 +110,7 @@ To use the bundle, add `-b <bundle_dir>` option to `guess`, `preview`, or `run`
|
|
|
110
110
|
See the generated \<bundle_dir>/Gemfile file how to plugin bundles work.
|
|
111
111
|
|
|
112
112
|
```
|
|
113
|
-
embulk mkbundle ./embulk_bundle
|
|
113
|
+
embulk mkbundle ./embulk_bundle # please edit ./embulk_bundle/Gemfile to add plugins. Detailed usage is written in the Gemfile
|
|
114
114
|
embulk guess -b ./embulk_bundle ...
|
|
115
115
|
embulk run -b ./embulk_bundle ...
|
|
116
116
|
```
|
data/build.gradle
CHANGED
|
@@ -16,10 +16,10 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
|
|
|
16
16
|
|
|
17
17
|
allprojects {
|
|
18
18
|
group = 'org.embulk'
|
|
19
|
-
version = '0.
|
|
19
|
+
version = '0.8.0'
|
|
20
20
|
|
|
21
21
|
ext {
|
|
22
|
-
jrubyVersion = '9.0.
|
|
22
|
+
jrubyVersion = '9.0.4.0'
|
|
23
23
|
}
|
|
24
24
|
|
|
25
25
|
apply plugin: 'java'
|
data/embulk-core/build.gradle
CHANGED
|
@@ -16,6 +16,7 @@ import com.github.jrubygradle.JRubyPrepare
|
|
|
16
16
|
|
|
17
17
|
// determine which dependencies have updates: $ gradle dependencyUpdates
|
|
18
18
|
dependencies {
|
|
19
|
+
compile 'org.embulk:guice-bootstrap:0.1.1'
|
|
19
20
|
compile 'com.google.guava:guava:18.0'
|
|
20
21
|
compile 'com.google.inject:guice:4.0'
|
|
21
22
|
compile 'com.google.inject.extensions:guice-multibindings:4.0'
|
|
@@ -37,6 +38,7 @@ dependencies {
|
|
|
37
38
|
compile 'joda-time:joda-time:2.8.1'
|
|
38
39
|
compile 'io.netty:netty-buffer:5.0.0.Alpha1'
|
|
39
40
|
compile 'org.fusesource.jansi:jansi:1.11'
|
|
41
|
+
compile 'org.msgpack:msgpack-core:0.8.1'
|
|
40
42
|
|
|
41
43
|
// For embulk/guess/charset.rb. See also embulk.gemspec
|
|
42
44
|
compile 'com.ibm.icu:icu4j:54.1.1'
|
|
@@ -15,6 +15,9 @@ import com.fasterxml.jackson.databind.RuntimeJsonMappingException;
|
|
|
15
15
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
|
16
16
|
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
|
|
17
17
|
import org.yaml.snakeyaml.Yaml;
|
|
18
|
+
import org.yaml.snakeyaml.DumperOptions;
|
|
19
|
+
import org.yaml.snakeyaml.representer.Representer;
|
|
20
|
+
import org.yaml.snakeyaml.constructor.SafeConstructor;
|
|
18
21
|
|
|
19
22
|
public class ConfigLoader
|
|
20
23
|
{
|
|
@@ -60,7 +63,7 @@ public class ConfigLoader
|
|
|
60
63
|
|
|
61
64
|
public ConfigSource fromYamlString(String string)
|
|
62
65
|
{
|
|
63
|
-
JsonNode node = objectToJson(
|
|
66
|
+
JsonNode node = objectToJson(newYaml().load(string));
|
|
64
67
|
validateJsonNode(node);
|
|
65
68
|
return new DataSourceImpl(model, (ObjectNode) node);
|
|
66
69
|
}
|
|
@@ -74,7 +77,7 @@ public class ConfigLoader
|
|
|
74
77
|
|
|
75
78
|
public ConfigSource fromYaml(InputStream stream) throws IOException
|
|
76
79
|
{
|
|
77
|
-
JsonNode node = objectToJson(
|
|
80
|
+
JsonNode node = objectToJson(newYaml().load(stream));
|
|
78
81
|
validateJsonNode(node);
|
|
79
82
|
return new DataSourceImpl(model, (ObjectNode) node);
|
|
80
83
|
}
|
|
@@ -107,7 +110,7 @@ public class ConfigLoader
|
|
|
107
110
|
{
|
|
108
111
|
ObjectNode source = new ObjectNode(JsonNodeFactory.instance);
|
|
109
112
|
DataSource ds = new DataSourceImpl(model, source);
|
|
110
|
-
Yaml yaml =
|
|
113
|
+
Yaml yaml = newYaml();
|
|
111
114
|
for (Map.Entry<String, String> pair : props.entrySet()) {
|
|
112
115
|
if (!pair.getKey().startsWith(keyPrefix)) {
|
|
113
116
|
continue;
|
|
@@ -138,4 +141,9 @@ public class ConfigLoader
|
|
|
138
141
|
throw new RuntimeException(ex);
|
|
139
142
|
}
|
|
140
143
|
}
|
|
144
|
+
|
|
145
|
+
private Yaml newYaml()
|
|
146
|
+
{
|
|
147
|
+
return new Yaml(new SafeConstructor(), new Representer(), new DumperOptions(), new YamlTagResolver());
|
|
148
|
+
}
|
|
141
149
|
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
package org.embulk.config;
|
|
2
|
+
|
|
3
|
+
import java.util.List;
|
|
4
|
+
import java.util.regex.Pattern;
|
|
5
|
+
import org.yaml.snakeyaml.resolver.Resolver;
|
|
6
|
+
import org.yaml.snakeyaml.nodes.Tag;
|
|
7
|
+
import org.yaml.snakeyaml.nodes.NodeId;
|
|
8
|
+
|
|
9
|
+
public class YamlTagResolver
|
|
10
|
+
extends Resolver
|
|
11
|
+
{
|
|
12
|
+
// Resolver converts a node (scalar, sequence, map, or !!tag with them)
|
|
13
|
+
// to a tag (INT, FLOAT, STR, SEQ, MAP, ...). For example, converting
|
|
14
|
+
// "123" (scalar) to 123 (INT), or "true" (scalar) to true (BOOL).
|
|
15
|
+
// This is called by snakeyaml Composer which converts parser events
|
|
16
|
+
// into an object.
|
|
17
|
+
//
|
|
18
|
+
// jackson-dataformat-yaml doesn't use this because it traverses parser
|
|
19
|
+
// events without using Composer.
|
|
20
|
+
|
|
21
|
+
public static final Pattern FLOAT_EXCEPTING_ZERO_START = Pattern
|
|
22
|
+
.compile("^([-+]?(\\.[0-9]+|[1-9][0-9_]*(\\.[0-9_]*)?)([eE][-+]?[0-9]+)?|[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*|[-+]?\\.(?:inf|Inf|INF)|\\.(?:nan|NaN|NAN))$");
|
|
23
|
+
|
|
24
|
+
@Override
|
|
25
|
+
public void addImplicitResolver(Tag tag, Pattern regexp, String first)
|
|
26
|
+
{
|
|
27
|
+
// This method is called by constructor through addImplicitResolvers
|
|
28
|
+
// to setup default implicit resolvers.
|
|
29
|
+
|
|
30
|
+
if (tag.equals(Tag.FLOAT)) {
|
|
31
|
+
super.addImplicitResolver(Tag.FLOAT, FLOAT_EXCEPTING_ZERO_START, "-+0123456789.");
|
|
32
|
+
}
|
|
33
|
+
else if (tag.equals(Tag.BOOL)) {
|
|
34
|
+
// use stricter rule (reject 'On', 'Off', 'Yes', 'No')
|
|
35
|
+
super.addImplicitResolver(Tag.BOOL, Pattern.compile("^(?:[Tt]rue|[Ff]alse)$"), "TtFf");
|
|
36
|
+
}
|
|
37
|
+
else if (tag.equals(Tag.TIMESTAMP)) {
|
|
38
|
+
// This solves some unexpected behavior that snakeyaml
|
|
39
|
+
// deserializes "2015-01-01 00:00:00" to java.util.Date
|
|
40
|
+
// but jackson serializes java.util.Date to an integer.
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
else {
|
|
44
|
+
super.addImplicitResolver(tag, regexp, first);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
@Override
|
|
49
|
+
public Tag resolve(NodeId kind, String value, boolean implicit)
|
|
50
|
+
{
|
|
51
|
+
return super.resolve(kind, value, implicit); // checks implicit resolvers
|
|
52
|
+
}
|
|
53
|
+
}
|
|
@@ -36,7 +36,6 @@ public class ExecModule
|
|
|
36
36
|
registerPluginTo(binder, ParserPlugin.class, "system_sampling", SamplingParserPlugin.class);
|
|
37
37
|
|
|
38
38
|
// LocalExecutorPlugin
|
|
39
|
-
binder.bind(LocalThreadExecutor.class).in(Scopes.SINGLETON);
|
|
40
39
|
registerPluginTo(binder, ExecutorPlugin.class, "local", LocalExecutorPlugin.class);
|
|
41
40
|
|
|
42
41
|
// serde
|
|
@@ -6,124 +6,534 @@ import java.util.concurrent.Callable;
|
|
|
6
6
|
import java.util.concurrent.Future;
|
|
7
7
|
import java.util.concurrent.ExecutorService;
|
|
8
8
|
import java.util.concurrent.ExecutionException;
|
|
9
|
+
import com.google.common.base.Throwables;
|
|
10
|
+
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
|
9
11
|
import org.slf4j.Logger;
|
|
10
12
|
import com.google.inject.Inject;
|
|
11
13
|
import org.embulk.config.ConfigSource;
|
|
14
|
+
import org.embulk.config.TaskSource;
|
|
12
15
|
import org.embulk.config.TaskReport;
|
|
13
16
|
import org.embulk.spi.Exec;
|
|
17
|
+
import org.embulk.spi.ExecSession;
|
|
14
18
|
import org.embulk.spi.ExecutorPlugin;
|
|
15
19
|
import org.embulk.spi.ProcessTask;
|
|
16
20
|
import org.embulk.spi.ProcessState;
|
|
17
21
|
import org.embulk.spi.Schema;
|
|
22
|
+
import org.embulk.spi.InputPlugin;
|
|
23
|
+
import org.embulk.spi.FilterPlugin;
|
|
24
|
+
import org.embulk.spi.OutputPlugin;
|
|
25
|
+
import org.embulk.spi.Page;
|
|
26
|
+
import org.embulk.spi.PageOutput;
|
|
27
|
+
import org.embulk.spi.AbortTransactionResource;
|
|
28
|
+
import org.embulk.spi.CloseResource;
|
|
29
|
+
import org.embulk.spi.TransactionalPageOutput;
|
|
30
|
+
import org.embulk.plugin.compat.PluginWrappers;
|
|
31
|
+
import org.embulk.spi.util.Filters;
|
|
18
32
|
import org.embulk.spi.util.Executors;
|
|
19
33
|
import org.embulk.spi.util.Executors.ProcessStateCallback;
|
|
20
34
|
|
|
21
35
|
public class LocalExecutorPlugin
|
|
22
36
|
implements ExecutorPlugin
|
|
23
37
|
{
|
|
24
|
-
private
|
|
38
|
+
private int defaultMaxThreads;
|
|
39
|
+
private int defaultMinThreads;
|
|
25
40
|
|
|
26
41
|
@Inject
|
|
27
|
-
public LocalExecutorPlugin(
|
|
42
|
+
public LocalExecutorPlugin(@ForSystemConfig ConfigSource systemConfig)
|
|
28
43
|
{
|
|
29
|
-
|
|
44
|
+
int cores = Runtime.getRuntime().availableProcessors();
|
|
45
|
+
this.defaultMaxThreads = systemConfig.get(Integer.class, "max_threads", cores * 2);
|
|
46
|
+
this.defaultMinThreads = systemConfig.get(Integer.class, "min_output_tasks", cores);
|
|
30
47
|
}
|
|
31
48
|
|
|
32
49
|
@Override
|
|
33
|
-
public void transaction(ConfigSource config, Schema outputSchema,
|
|
50
|
+
public void transaction(ConfigSource config, Schema outputSchema, int inputTaskCount,
|
|
34
51
|
ExecutorPlugin.Control control)
|
|
35
52
|
{
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
localExecute(task, inputTaskCount, state);
|
|
40
|
-
}
|
|
41
|
-
});
|
|
53
|
+
try (AbstractLocalExecutor exec = newExecutor(config, inputTaskCount)) {
|
|
54
|
+
control.transaction(outputSchema, exec.getOutputTaskCount(), exec);
|
|
55
|
+
}
|
|
42
56
|
}
|
|
43
57
|
|
|
44
|
-
private
|
|
58
|
+
private AbstractLocalExecutor newExecutor(ConfigSource config, int inputTaskCount)
|
|
45
59
|
{
|
|
46
60
|
Logger log = Exec.getLogger(LocalExecutorPlugin.class);
|
|
61
|
+
int maxThreads = config.get(Integer.class, "max_threads", defaultMaxThreads);
|
|
62
|
+
int minThreads = config.get(Integer.class, "min_output_tasks", defaultMinThreads);
|
|
63
|
+
if (inputTaskCount < minThreads) {
|
|
64
|
+
int scatterCount = (minThreads + inputTaskCount - 1) / inputTaskCount;
|
|
65
|
+
log.info("Using local thread executor with max_threads={} / output tasks {} = input tasks {} * {}",
|
|
66
|
+
maxThreads, inputTaskCount * scatterCount, inputTaskCount, scatterCount);
|
|
67
|
+
return new ScatterExecutor(maxThreads, inputTaskCount, scatterCount);
|
|
68
|
+
}
|
|
69
|
+
else {
|
|
70
|
+
log.info("Using local thread executor with max_threads={} / tasks={}", maxThreads, inputTaskCount);
|
|
71
|
+
return new DirectExecutor(maxThreads, inputTaskCount);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
47
74
|
|
|
48
|
-
|
|
75
|
+
private static abstract class AbstractLocalExecutor
|
|
76
|
+
implements Executor, AutoCloseable
|
|
77
|
+
{
|
|
78
|
+
protected final Logger log = Exec.getLogger(LocalExecutorPlugin.class);
|
|
49
79
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
80
|
+
protected final int inputTaskCount;
|
|
81
|
+
protected final int outputTaskCount;
|
|
82
|
+
|
|
83
|
+
public AbstractLocalExecutor(int inputTaskCount, int outputTaskCount)
|
|
84
|
+
{
|
|
85
|
+
this.inputTaskCount = inputTaskCount;
|
|
86
|
+
this.outputTaskCount = outputTaskCount;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
public int getOutputTaskCount()
|
|
90
|
+
{
|
|
91
|
+
return outputTaskCount;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
@Override
|
|
95
|
+
public void execute(ProcessTask task, ProcessState state)
|
|
96
|
+
{
|
|
97
|
+
state.initialize(inputTaskCount, outputTaskCount);
|
|
61
98
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
99
|
+
List<Future<Throwable>> futures = new ArrayList<>(inputTaskCount);
|
|
100
|
+
try {
|
|
101
|
+
for (int i = 0; i < inputTaskCount; i++) {
|
|
102
|
+
futures.add(startInputTask(task, state, i));
|
|
65
103
|
}
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
104
|
+
showProgress(state, inputTaskCount);
|
|
105
|
+
|
|
106
|
+
for (int i = 0; i < inputTaskCount; i++) {
|
|
107
|
+
if (futures.get(i) == null) {
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
try {
|
|
111
|
+
state.getInputTaskState(i).setException(futures.get(i).get());
|
|
112
|
+
}
|
|
113
|
+
catch (ExecutionException ex) {
|
|
114
|
+
state.getInputTaskState(i).setException(ex.getCause());
|
|
115
|
+
//Throwables.propagate(ex.getCause());
|
|
116
|
+
}
|
|
117
|
+
catch (InterruptedException ex) {
|
|
118
|
+
state.getInputTaskState(i).setException(new ExecutionInterruptedException(ex));
|
|
119
|
+
}
|
|
120
|
+
showProgress(state, inputTaskCount);
|
|
73
121
|
}
|
|
74
|
-
showProgress(log, state, taskCount);
|
|
75
122
|
}
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
123
|
+
finally {
|
|
124
|
+
for (Future<Throwable> future : futures) {
|
|
125
|
+
if (future != null && !future.isDone()) {
|
|
126
|
+
future.cancel(true);
|
|
127
|
+
// TODO join?
|
|
128
|
+
}
|
|
81
129
|
}
|
|
82
130
|
}
|
|
83
131
|
}
|
|
132
|
+
|
|
133
|
+
@Override
|
|
134
|
+
public abstract void close();
|
|
135
|
+
|
|
136
|
+
private void showProgress(ProcessState state, int taskCount)
|
|
137
|
+
{
|
|
138
|
+
int started = 0;
|
|
139
|
+
int finished = 0;
|
|
140
|
+
for (int i = 0; i < taskCount; i++) {
|
|
141
|
+
if (state.getOutputTaskState(i).isStarted()) { started++; }
|
|
142
|
+
if (state.getOutputTaskState(i).isFinished()) { finished++; }
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
log.info(String.format("{done:%3d / %d, running: %d}", finished, taskCount, started - finished));
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
protected abstract Future<Throwable> startInputTask(ProcessTask task, ProcessState state, int taskIndex);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
public static class DirectExecutor
|
|
152
|
+
extends AbstractLocalExecutor
|
|
153
|
+
{
|
|
154
|
+
protected final ExecutorService executor;
|
|
155
|
+
|
|
156
|
+
public DirectExecutor(int maxThreads, int taskCount)
|
|
157
|
+
{
|
|
158
|
+
super(taskCount, taskCount);
|
|
159
|
+
this.executor = java.util.concurrent.Executors.newFixedThreadPool(maxThreads,
|
|
160
|
+
new ThreadFactoryBuilder()
|
|
161
|
+
.setNameFormat("embulk-executor-%d")
|
|
162
|
+
.setDaemon(true)
|
|
163
|
+
.build());
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
@Override
|
|
167
|
+
public void close()
|
|
168
|
+
{
|
|
169
|
+
executor.shutdown();
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
@Override
|
|
173
|
+
protected Future<Throwable> startInputTask(final ProcessTask task, final ProcessState state, final int taskIndex)
|
|
174
|
+
{
|
|
175
|
+
if (state.getOutputTaskState(taskIndex).isCommitted()) {
|
|
176
|
+
log.warn("Skipped resumed task {}", taskIndex);
|
|
177
|
+
return null; // resumed
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return executor.submit(new Callable<Throwable>() {
|
|
181
|
+
public Throwable call()
|
|
182
|
+
{
|
|
183
|
+
try (SetCurrentThreadName dontCare = new SetCurrentThreadName(String.format("task-%04d", taskIndex))) {
|
|
184
|
+
Executors.process(Exec.session(), task, taskIndex, new ProcessStateCallback() {
|
|
185
|
+
public void started()
|
|
186
|
+
{
|
|
187
|
+
state.getInputTaskState(taskIndex).start();
|
|
188
|
+
state.getOutputTaskState(taskIndex).start();
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
public void inputCommitted(TaskReport report)
|
|
192
|
+
{
|
|
193
|
+
state.getInputTaskState(taskIndex).setTaskReport(report);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
public void outputCommitted(TaskReport report)
|
|
197
|
+
{
|
|
198
|
+
state.getOutputTaskState(taskIndex).setTaskReport(report);
|
|
199
|
+
}
|
|
200
|
+
});
|
|
201
|
+
return null;
|
|
202
|
+
}
|
|
203
|
+
finally {
|
|
204
|
+
state.getInputTaskState(taskIndex).finish();
|
|
205
|
+
state.getOutputTaskState(taskIndex).finish();
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
});
|
|
209
|
+
}
|
|
84
210
|
}
|
|
85
211
|
|
|
86
|
-
|
|
212
|
+
public static class ScatterExecutor
|
|
213
|
+
extends AbstractLocalExecutor
|
|
87
214
|
{
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
215
|
+
private final int scatterCount;
|
|
216
|
+
private final int inputTaskCount;
|
|
217
|
+
private final ExecutorService inputExecutor;
|
|
218
|
+
private final ExecutorService outputExecutor;
|
|
219
|
+
|
|
220
|
+
public ScatterExecutor(int maxThreads, int inputTaskCount, int scatterCount)
|
|
221
|
+
{
|
|
222
|
+
super(inputTaskCount, inputTaskCount * scatterCount);
|
|
223
|
+
this.inputTaskCount = inputTaskCount;
|
|
224
|
+
this.scatterCount = scatterCount;
|
|
225
|
+
this.inputExecutor = java.util.concurrent.Executors.newFixedThreadPool(
|
|
226
|
+
Math.max(maxThreads / scatterCount, 1),
|
|
227
|
+
new ThreadFactoryBuilder()
|
|
228
|
+
.setNameFormat("embulk-input-executor-%d")
|
|
229
|
+
.setDaemon(true)
|
|
230
|
+
.build());
|
|
231
|
+
this.outputExecutor = java.util.concurrent.Executors.newCachedThreadPool(
|
|
232
|
+
new ThreadFactoryBuilder()
|
|
233
|
+
.setNameFormat("embulk-output-executor-%d")
|
|
234
|
+
.setDaemon(true)
|
|
235
|
+
.build());
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
@Override
|
|
239
|
+
public void close()
|
|
240
|
+
{
|
|
241
|
+
inputExecutor.shutdown();
|
|
242
|
+
outputExecutor.shutdown();
|
|
93
243
|
}
|
|
94
244
|
|
|
95
|
-
|
|
245
|
+
@Override
|
|
246
|
+
protected Future<Throwable> startInputTask(final ProcessTask task, final ProcessState state, final int taskIndex)
|
|
247
|
+
{
|
|
248
|
+
if(isAllScatterOutputFinished(state, taskIndex)) {
|
|
249
|
+
log.warn("Skipped resumed input task {}", taskIndex);
|
|
250
|
+
return null; // resumed
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
return inputExecutor.submit(new Callable<Throwable>() {
|
|
254
|
+
public Throwable call()
|
|
255
|
+
{
|
|
256
|
+
try (SetCurrentThreadName dontCare = new SetCurrentThreadName(String.format("task-%04d", taskIndex))) {
|
|
257
|
+
runInputTask(Exec.session(), task, state, taskIndex);
|
|
258
|
+
return null;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
private boolean isAllScatterOutputFinished(ProcessState state, int taskIndex) {
|
|
265
|
+
for (int i = 0; i < scatterCount; i++) {
|
|
266
|
+
int outputTaskIndex = taskIndex * scatterCount + i;
|
|
267
|
+
if (!state.getOutputTaskState(outputTaskIndex).isCommitted()) {
|
|
268
|
+
return false;
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
return true;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
private void runInputTask(ExecSession exec, ProcessTask task, ProcessState state, int taskIndex)
|
|
275
|
+
{
|
|
276
|
+
InputPlugin inputPlugin = exec.newPlugin(InputPlugin.class, task.getInputPluginType());
|
|
277
|
+
List<FilterPlugin> filterPlugins = Filters.newFilterPlugins(exec, task.getFilterPluginTypes());
|
|
278
|
+
OutputPlugin outputPlugin = exec.newPlugin(OutputPlugin.class, task.getOutputPluginType());
|
|
279
|
+
|
|
280
|
+
try (ScatterTransactionalPageOutput tran = new ScatterTransactionalPageOutput(state, taskIndex, scatterCount)) {
|
|
281
|
+
tran.openOutputs(outputPlugin, task.getOutputSchema(), task.getOutputTaskSource());
|
|
282
|
+
|
|
283
|
+
try (AbortTransactionResource aborter = new AbortTransactionResource(tran)) {
|
|
284
|
+
tran.openFilters(filterPlugins, task.getFilterSchemas(), task.getFilterTaskSources());
|
|
285
|
+
|
|
286
|
+
tran.startWorkers(outputExecutor);
|
|
287
|
+
|
|
288
|
+
// started
|
|
289
|
+
state.getInputTaskState(taskIndex).start();
|
|
290
|
+
for (int i = 0; i < scatterCount; i++) {
|
|
291
|
+
state.getOutputTaskState(taskIndex * scatterCount + i).start();
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
TaskReport inputTaskReport = inputPlugin.run(task.getInputTaskSource(), task.getInputSchema(), taskIndex, tran);
|
|
295
|
+
|
|
296
|
+
// inputCommitted
|
|
297
|
+
if (inputTaskReport == null) {
|
|
298
|
+
inputTaskReport = exec.newTaskReport();
|
|
299
|
+
}
|
|
300
|
+
state.getInputTaskState(taskIndex).setTaskReport(inputTaskReport);
|
|
301
|
+
|
|
302
|
+
// outputCommitted
|
|
303
|
+
tran.commit();
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
finally {
|
|
307
|
+
state.getInputTaskState(taskIndex).finish();
|
|
308
|
+
state.getOutputTaskState(taskIndex).finish();
|
|
309
|
+
}
|
|
310
|
+
}
|
|
96
311
|
}
|
|
97
312
|
|
|
98
|
-
private
|
|
313
|
+
private static class ScatterTransactionalPageOutput
|
|
314
|
+
implements TransactionalPageOutput
|
|
99
315
|
{
|
|
100
|
-
|
|
101
|
-
|
|
316
|
+
private static final Page DONE_PAGE = Page.allocate(0);
|
|
317
|
+
|
|
318
|
+
private static class OutputWorker
|
|
319
|
+
implements Callable<Throwable>
|
|
320
|
+
{
|
|
321
|
+
private final PageOutput output;
|
|
322
|
+
private final Future<Throwable> future;
|
|
323
|
+
private boolean done;
|
|
324
|
+
private Page queued;
|
|
325
|
+
|
|
326
|
+
public OutputWorker(PageOutput output, ExecutorService executor)
|
|
102
327
|
{
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
state.getInputTaskState(taskIndex).start();
|
|
108
|
-
state.getOutputTaskState(taskIndex).start();
|
|
109
|
-
}
|
|
328
|
+
this.output = output;
|
|
329
|
+
this.done = done;
|
|
330
|
+
this.future = executor.submit(this);
|
|
331
|
+
}
|
|
110
332
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
333
|
+
public synchronized void add(Page page)
|
|
334
|
+
throws InterruptedException
|
|
335
|
+
{
|
|
336
|
+
while (true) {
|
|
337
|
+
if (queued == null) {
|
|
338
|
+
queued = page;
|
|
339
|
+
notifyAll();
|
|
340
|
+
return;
|
|
341
|
+
}
|
|
342
|
+
else if (queued == DONE_PAGE) {
|
|
343
|
+
page.release();
|
|
344
|
+
return;
|
|
345
|
+
}
|
|
346
|
+
wait();
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
public Throwable join()
|
|
351
|
+
throws InterruptedException
|
|
352
|
+
{
|
|
353
|
+
try {
|
|
354
|
+
return future.get();
|
|
355
|
+
}
|
|
356
|
+
catch (ExecutionException ex) {
|
|
357
|
+
return ex.getCause();
|
|
358
|
+
}
|
|
359
|
+
}
|
|
115
360
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
361
|
+
@Override
|
|
362
|
+
public synchronized Throwable call()
|
|
363
|
+
throws InterruptedException
|
|
364
|
+
{
|
|
365
|
+
try {
|
|
366
|
+
while (true) {
|
|
367
|
+
if (queued != null) {
|
|
368
|
+
if (queued == DONE_PAGE) {
|
|
369
|
+
return null;
|
|
370
|
+
}
|
|
371
|
+
output.add(queued);
|
|
372
|
+
queued = null;
|
|
373
|
+
notifyAll();
|
|
374
|
+
}
|
|
375
|
+
wait();
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
finally {
|
|
379
|
+
try {
|
|
380
|
+
if (queued != null && queued != DONE_PAGE) {
|
|
381
|
+
queued.release();
|
|
382
|
+
queued = null;
|
|
119
383
|
}
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
384
|
+
}
|
|
385
|
+
finally {
|
|
386
|
+
queued = DONE_PAGE;
|
|
387
|
+
}
|
|
388
|
+
notifyAll();
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
private final ProcessState state;
|
|
394
|
+
private final int taskIndex;
|
|
395
|
+
private final int scatterCount;
|
|
396
|
+
|
|
397
|
+
private final TransactionalPageOutput[] trans;
|
|
398
|
+
private final PageOutput[] filtereds;
|
|
399
|
+
private final CloseResource[] closeThese;
|
|
400
|
+
|
|
401
|
+
private final OutputWorker[] outputWorkers;
|
|
402
|
+
|
|
403
|
+
private long pageCount;
|
|
404
|
+
|
|
405
|
+
public ScatterTransactionalPageOutput(ProcessState state, int taskIndex, int scatterCount)
|
|
406
|
+
{
|
|
407
|
+
this.state = state;
|
|
408
|
+
this.taskIndex = taskIndex;
|
|
409
|
+
this.scatterCount = scatterCount;
|
|
410
|
+
|
|
411
|
+
this.trans = new TransactionalPageOutput[scatterCount];
|
|
412
|
+
this.filtereds = new PageOutput[scatterCount];
|
|
413
|
+
this.closeThese = new CloseResource[scatterCount];
|
|
414
|
+
for (int i = 0; i < scatterCount; i++) {
|
|
415
|
+
closeThese[i] = new CloseResource();
|
|
416
|
+
}
|
|
417
|
+
this.outputWorkers = new OutputWorker[scatterCount];
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
public void openOutputs(OutputPlugin outputPlugin, Schema outputSchema, TaskSource outputTaskSource)
|
|
421
|
+
{
|
|
422
|
+
for (int i = 0; i < scatterCount; i++) {
|
|
423
|
+
int outputTaskIndex = taskIndex * scatterCount + i;
|
|
424
|
+
if (!state.getOutputTaskState(outputTaskIndex).isCommitted()) {
|
|
425
|
+
TransactionalPageOutput tran = PluginWrappers.transactionalPageOutput(
|
|
426
|
+
outputPlugin.open(outputTaskSource, outputSchema, outputTaskIndex));
|
|
427
|
+
trans[i] = tran;
|
|
428
|
+
closeThese[i].closeThis(tran);
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
public void openFilters(List<FilterPlugin> filterPlugins, List<Schema> filterSchemas, List<TaskSource> filterTaskSources)
|
|
434
|
+
{
|
|
435
|
+
for (int i = 0; i < scatterCount; i++) {
|
|
436
|
+
TransactionalPageOutput tran = trans[i];
|
|
437
|
+
if (tran != null) {
|
|
438
|
+
PageOutput filtered = Filters.open(filterPlugins, filterTaskSources, filterSchemas, trans[i]);
|
|
439
|
+
filtereds[i] = filtered;
|
|
440
|
+
closeThese[i].closeThis(filtered);
|
|
125
441
|
}
|
|
126
442
|
}
|
|
127
|
-
}
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
public void startWorkers(ExecutorService outputExecutor)
|
|
446
|
+
{
|
|
447
|
+
for (int i = 0; i < scatterCount; i++) {
|
|
448
|
+
PageOutput filtered = filtereds[i];
|
|
449
|
+
if (filtered != null) {
|
|
450
|
+
outputWorkers[i] = new OutputWorker(filtered, outputExecutor);
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
public void add(Page page)
|
|
456
|
+
{
|
|
457
|
+
OutputWorker worker = outputWorkers[(int) (pageCount % scatterCount)];
|
|
458
|
+
if (worker != null) {
|
|
459
|
+
try {
|
|
460
|
+
worker.add(page);
|
|
461
|
+
}
|
|
462
|
+
catch (InterruptedException ex) {
|
|
463
|
+
throw Throwables.propagate(ex);
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
pageCount++;
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
public void finish()
|
|
470
|
+
{
|
|
471
|
+
completeWorkers();
|
|
472
|
+
for (int i = 0; i < scatterCount; i++) {
|
|
473
|
+
if (trans[i] != null) {
|
|
474
|
+
trans[i].finish();
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
public void close()
|
|
480
|
+
{
|
|
481
|
+
completeWorkers();
|
|
482
|
+
for (int i = 0; i < scatterCount; i++) {
|
|
483
|
+
closeThese[i].close();
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
public void abort()
|
|
488
|
+
{
|
|
489
|
+
completeWorkers();
|
|
490
|
+
for (int i = 0; i < scatterCount; i++) {
|
|
491
|
+
if (trans[i] != null) {
|
|
492
|
+
trans[i].abort();
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
public TaskReport commit()
|
|
498
|
+
{
|
|
499
|
+
completeWorkers();
|
|
500
|
+
for (int i = 0; i < scatterCount; i++) {
|
|
501
|
+
if (trans[i] != null) {
|
|
502
|
+
int outputTaskIndex = taskIndex * scatterCount + i;
|
|
503
|
+
TaskReport outputTaskReport = trans[i].commit();
|
|
504
|
+
trans[i] = null; // don't abort
|
|
505
|
+
if (outputTaskReport == null) {
|
|
506
|
+
outputTaskReport = Exec.newTaskReport();
|
|
507
|
+
}
|
|
508
|
+
state.getOutputTaskState(outputTaskIndex).setTaskReport(outputTaskReport);
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
return null;
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
public void completeWorkers()
|
|
515
|
+
{
|
|
516
|
+
for (int i = 0; i < scatterCount; i++) {
|
|
517
|
+
OutputWorker worker = outputWorkers[i];
|
|
518
|
+
if (worker != null) {
|
|
519
|
+
try {
|
|
520
|
+
worker.add(DONE_PAGE);
|
|
521
|
+
}
|
|
522
|
+
catch (InterruptedException ex) {
|
|
523
|
+
throw Throwables.propagate(ex);
|
|
524
|
+
}
|
|
525
|
+
Throwable error = null;
|
|
526
|
+
try {
|
|
527
|
+
error = worker.join();
|
|
528
|
+
}
|
|
529
|
+
catch (InterruptedException ex) {
|
|
530
|
+
error = ex;
|
|
531
|
+
}
|
|
532
|
+
if (error != null) {
|
|
533
|
+
throw Throwables.propagate(error);
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
}
|
|
128
538
|
}
|
|
129
539
|
}
|