embulk 0.7.11 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -3
- data/README.md +1 -1
- data/build.gradle +2 -2
- data/embulk-core/build.gradle +2 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +11 -3
- data/embulk-core/src/main/java/org/embulk/config/YamlTagResolver.java +53 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +0 -1
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +479 -69
- data/embulk-core/src/main/java/org/embulk/spi/Column.java +3 -0
- data/embulk-core/src/main/java/org/embulk/spi/ColumnVisitor.java +2 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +12 -5
- data/embulk-core/src/main/java/org/embulk/spi/Page.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +26 -5
- data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +13 -0
- data/embulk-core/src/main/java/org/embulk/spi/json/JsonParseException.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/json/JsonParser.java +125 -0
- data/embulk-core/src/main/java/org/embulk/spi/json/RubyValueApi.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/JsonType.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +1 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +2 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetterFactory.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +5 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +10 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +3 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/BooleanColumnSetter.java +7 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DefaultValueSetter.java +2 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DoubleColumnSetter.java +7 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/JsonColumnSetter.java +73 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/LongColumnSetter.java +11 -2
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/NullDefaultValueSetter.java +5 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java +5 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/StringColumnSetter.java +7 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/TimestampColumnSetter.java +9 -1
- data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +7 -0
- data/embulk-docs/src/built-in.rst +40 -3
- data/embulk-docs/src/conf.py +2 -2
- data/embulk-docs/src/release.rst +1 -1
- data/embulk-docs/src/release/release-0.8.0.rst +68 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +12 -1
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +18 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +1 -1
- data/embulk.gemspec +1 -1
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/gradlew +3 -7
- data/lib/embulk/column.rb +2 -0
- data/lib/embulk/command/embulk_migrate_plugin.rb +76 -10
- data/lib/embulk/command/embulk_new_plugin.rb +2 -0
- data/lib/embulk/command/embulk_run.rb +17 -10
- data/lib/embulk/data/bundle/.ruby-version +1 -1
- data/lib/embulk/data/new/java/build.gradle.erb +21 -0
- data/lib/embulk/data/new/java/config/checkstyle/checkstyle.xml +128 -0
- data/lib/embulk/data/new/java/config/checkstyle/default.xml +108 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/lib/embulk/data/new/java/gradlew +3 -7
- data/lib/embulk/data/new/ruby/.ruby-version +1 -1
- data/lib/embulk/guess/csv.rb +1 -1
- data/lib/embulk/guess/schema_guess.rb +6 -0
- data/lib/embulk/guess_plugin.rb +1 -1
- data/lib/embulk/java/imports.rb +4 -0
- data/lib/embulk/plugin_registry.rb +8 -12
- data/lib/embulk/schema.rb +6 -0
- data/lib/embulk/version.rb +1 -1
- data/test/guess/test_csv_guess.rb +170 -0
- data/test/helper.rb +2 -0
- metadata +19 -17
- data/embulk-core/src/main/java/org/embulk/exec/LocalThreadExecutor.java +0 -34
- data/embulk-core/src/main/java/org/embulk/guice/Bootstrap.java +0 -157
- data/embulk-core/src/main/java/org/embulk/guice/CloseableInjector.java +0 -22
- data/embulk-core/src/main/java/org/embulk/guice/InjectorProxy.java +0 -145
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjector.java +0 -26
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjectorProxy.java +0 -61
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleManager.java +0 -187
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethods.java +0 -89
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethodsMap.java +0 -38
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleModule.java +0 -97
- data/embulk-docs/src/release/release-0.7.11.rst +0 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0b3174c7826cebe5dd528fb3aa5f577367f30320
|
4
|
+
data.tar.gz: a9d08926396181e890031f8e57cabf9a2db98e18
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3b25dbabad3667aa97ec211c65289f0668d4e9c38ca069012436748be8415368078051b0fffb0bf45b3c40e71473e75136bf1b2fb7b79ca05fb67440aa5ba12a
|
7
|
+
data.tar.gz: 5842d8686dcbf6e2e52a03a2f56d32a2830657d8b70f4101cb2294281db46b9bfc88bb6ef86fec9094e7edef1c2c3287fd240613d3cf887607049c85533ce4e3
|
data/Gemfile.lock
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
embulk (0.
|
5
|
-
jruby-jars (= 9.0.
|
4
|
+
embulk (0.8.0)
|
5
|
+
jruby-jars (= 9.0.4.0)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
9
9
|
specs:
|
10
|
-
jruby-jars (9.0.
|
10
|
+
jruby-jars (9.0.4.0)
|
11
11
|
kramdown (1.5.0)
|
12
12
|
power_assert (0.2.2)
|
13
13
|
rake (10.4.2)
|
data/README.md
CHANGED
@@ -110,7 +110,7 @@ To use the bundle, add `-b <bundle_dir>` option to `guess`, `preview`, or `run`
|
|
110
110
|
See the generated \<bundle_dir>/Gemfile file how to plugin bundles work.
|
111
111
|
|
112
112
|
```
|
113
|
-
embulk mkbundle ./embulk_bundle
|
113
|
+
embulk mkbundle ./embulk_bundle # please edit ./embulk_bundle/Gemfile to add plugins. Detailed usage is written in the Gemfile
|
114
114
|
embulk guess -b ./embulk_bundle ...
|
115
115
|
embulk run -b ./embulk_bundle ...
|
116
116
|
```
|
data/build.gradle
CHANGED
@@ -16,10 +16,10 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
|
|
16
16
|
|
17
17
|
allprojects {
|
18
18
|
group = 'org.embulk'
|
19
|
-
version = '0.
|
19
|
+
version = '0.8.0'
|
20
20
|
|
21
21
|
ext {
|
22
|
-
jrubyVersion = '9.0.
|
22
|
+
jrubyVersion = '9.0.4.0'
|
23
23
|
}
|
24
24
|
|
25
25
|
apply plugin: 'java'
|
data/embulk-core/build.gradle
CHANGED
@@ -16,6 +16,7 @@ import com.github.jrubygradle.JRubyPrepare
|
|
16
16
|
|
17
17
|
// determine which dependencies have updates: $ gradle dependencyUpdates
|
18
18
|
dependencies {
|
19
|
+
compile 'org.embulk:guice-bootstrap:0.1.1'
|
19
20
|
compile 'com.google.guava:guava:18.0'
|
20
21
|
compile 'com.google.inject:guice:4.0'
|
21
22
|
compile 'com.google.inject.extensions:guice-multibindings:4.0'
|
@@ -37,6 +38,7 @@ dependencies {
|
|
37
38
|
compile 'joda-time:joda-time:2.8.1'
|
38
39
|
compile 'io.netty:netty-buffer:5.0.0.Alpha1'
|
39
40
|
compile 'org.fusesource.jansi:jansi:1.11'
|
41
|
+
compile 'org.msgpack:msgpack-core:0.8.1'
|
40
42
|
|
41
43
|
// For embulk/guess/charset.rb. See also embulk.gemspec
|
42
44
|
compile 'com.ibm.icu:icu4j:54.1.1'
|
@@ -15,6 +15,9 @@ import com.fasterxml.jackson.databind.RuntimeJsonMappingException;
|
|
15
15
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
16
16
|
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
|
17
17
|
import org.yaml.snakeyaml.Yaml;
|
18
|
+
import org.yaml.snakeyaml.DumperOptions;
|
19
|
+
import org.yaml.snakeyaml.representer.Representer;
|
20
|
+
import org.yaml.snakeyaml.constructor.SafeConstructor;
|
18
21
|
|
19
22
|
public class ConfigLoader
|
20
23
|
{
|
@@ -60,7 +63,7 @@ public class ConfigLoader
|
|
60
63
|
|
61
64
|
public ConfigSource fromYamlString(String string)
|
62
65
|
{
|
63
|
-
JsonNode node = objectToJson(
|
66
|
+
JsonNode node = objectToJson(newYaml().load(string));
|
64
67
|
validateJsonNode(node);
|
65
68
|
return new DataSourceImpl(model, (ObjectNode) node);
|
66
69
|
}
|
@@ -74,7 +77,7 @@ public class ConfigLoader
|
|
74
77
|
|
75
78
|
public ConfigSource fromYaml(InputStream stream) throws IOException
|
76
79
|
{
|
77
|
-
JsonNode node = objectToJson(
|
80
|
+
JsonNode node = objectToJson(newYaml().load(stream));
|
78
81
|
validateJsonNode(node);
|
79
82
|
return new DataSourceImpl(model, (ObjectNode) node);
|
80
83
|
}
|
@@ -107,7 +110,7 @@ public class ConfigLoader
|
|
107
110
|
{
|
108
111
|
ObjectNode source = new ObjectNode(JsonNodeFactory.instance);
|
109
112
|
DataSource ds = new DataSourceImpl(model, source);
|
110
|
-
Yaml yaml =
|
113
|
+
Yaml yaml = newYaml();
|
111
114
|
for (Map.Entry<String, String> pair : props.entrySet()) {
|
112
115
|
if (!pair.getKey().startsWith(keyPrefix)) {
|
113
116
|
continue;
|
@@ -138,4 +141,9 @@ public class ConfigLoader
|
|
138
141
|
throw new RuntimeException(ex);
|
139
142
|
}
|
140
143
|
}
|
144
|
+
|
145
|
+
private Yaml newYaml()
|
146
|
+
{
|
147
|
+
return new Yaml(new SafeConstructor(), new Representer(), new DumperOptions(), new YamlTagResolver());
|
148
|
+
}
|
141
149
|
}
|
@@ -0,0 +1,53 @@
|
|
1
|
+
package org.embulk.config;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import java.util.regex.Pattern;
|
5
|
+
import org.yaml.snakeyaml.resolver.Resolver;
|
6
|
+
import org.yaml.snakeyaml.nodes.Tag;
|
7
|
+
import org.yaml.snakeyaml.nodes.NodeId;
|
8
|
+
|
9
|
+
public class YamlTagResolver
|
10
|
+
extends Resolver
|
11
|
+
{
|
12
|
+
// Resolver converts a node (scalar, sequence, map, or !!tag with them)
|
13
|
+
// to a tag (INT, FLOAT, STR, SEQ, MAP, ...). For example, converting
|
14
|
+
// "123" (scalar) to 123 (INT), or "true" (scalar) to true (BOOL).
|
15
|
+
// This is called by snakeyaml Composer which converts parser events
|
16
|
+
// into an object.
|
17
|
+
//
|
18
|
+
// jackson-dataformat-yaml doesn't use this because it traverses parser
|
19
|
+
// events without using Composer.
|
20
|
+
|
21
|
+
public static final Pattern FLOAT_EXCEPTING_ZERO_START = Pattern
|
22
|
+
.compile("^([-+]?(\\.[0-9]+|[1-9][0-9_]*(\\.[0-9_]*)?)([eE][-+]?[0-9]+)?|[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*|[-+]?\\.(?:inf|Inf|INF)|\\.(?:nan|NaN|NAN))$");
|
23
|
+
|
24
|
+
@Override
|
25
|
+
public void addImplicitResolver(Tag tag, Pattern regexp, String first)
|
26
|
+
{
|
27
|
+
// This method is called by constructor through addImplicitResolvers
|
28
|
+
// to setup default implicit resolvers.
|
29
|
+
|
30
|
+
if (tag.equals(Tag.FLOAT)) {
|
31
|
+
super.addImplicitResolver(Tag.FLOAT, FLOAT_EXCEPTING_ZERO_START, "-+0123456789.");
|
32
|
+
}
|
33
|
+
else if (tag.equals(Tag.BOOL)) {
|
34
|
+
// use stricter rule (reject 'On', 'Off', 'Yes', 'No')
|
35
|
+
super.addImplicitResolver(Tag.BOOL, Pattern.compile("^(?:[Tt]rue|[Ff]alse)$"), "TtFf");
|
36
|
+
}
|
37
|
+
else if (tag.equals(Tag.TIMESTAMP)) {
|
38
|
+
// This solves some unexpected behavior that snakeyaml
|
39
|
+
// deserializes "2015-01-01 00:00:00" to java.util.Date
|
40
|
+
// but jackson serializes java.util.Date to an integer.
|
41
|
+
return;
|
42
|
+
}
|
43
|
+
else {
|
44
|
+
super.addImplicitResolver(tag, regexp, first);
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
@Override
|
49
|
+
public Tag resolve(NodeId kind, String value, boolean implicit)
|
50
|
+
{
|
51
|
+
return super.resolve(kind, value, implicit); // checks implicit resolvers
|
52
|
+
}
|
53
|
+
}
|
@@ -36,7 +36,6 @@ public class ExecModule
|
|
36
36
|
registerPluginTo(binder, ParserPlugin.class, "system_sampling", SamplingParserPlugin.class);
|
37
37
|
|
38
38
|
// LocalExecutorPlugin
|
39
|
-
binder.bind(LocalThreadExecutor.class).in(Scopes.SINGLETON);
|
40
39
|
registerPluginTo(binder, ExecutorPlugin.class, "local", LocalExecutorPlugin.class);
|
41
40
|
|
42
41
|
// serde
|
@@ -6,124 +6,534 @@ import java.util.concurrent.Callable;
|
|
6
6
|
import java.util.concurrent.Future;
|
7
7
|
import java.util.concurrent.ExecutorService;
|
8
8
|
import java.util.concurrent.ExecutionException;
|
9
|
+
import com.google.common.base.Throwables;
|
10
|
+
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
9
11
|
import org.slf4j.Logger;
|
10
12
|
import com.google.inject.Inject;
|
11
13
|
import org.embulk.config.ConfigSource;
|
14
|
+
import org.embulk.config.TaskSource;
|
12
15
|
import org.embulk.config.TaskReport;
|
13
16
|
import org.embulk.spi.Exec;
|
17
|
+
import org.embulk.spi.ExecSession;
|
14
18
|
import org.embulk.spi.ExecutorPlugin;
|
15
19
|
import org.embulk.spi.ProcessTask;
|
16
20
|
import org.embulk.spi.ProcessState;
|
17
21
|
import org.embulk.spi.Schema;
|
22
|
+
import org.embulk.spi.InputPlugin;
|
23
|
+
import org.embulk.spi.FilterPlugin;
|
24
|
+
import org.embulk.spi.OutputPlugin;
|
25
|
+
import org.embulk.spi.Page;
|
26
|
+
import org.embulk.spi.PageOutput;
|
27
|
+
import org.embulk.spi.AbortTransactionResource;
|
28
|
+
import org.embulk.spi.CloseResource;
|
29
|
+
import org.embulk.spi.TransactionalPageOutput;
|
30
|
+
import org.embulk.plugin.compat.PluginWrappers;
|
31
|
+
import org.embulk.spi.util.Filters;
|
18
32
|
import org.embulk.spi.util.Executors;
|
19
33
|
import org.embulk.spi.util.Executors.ProcessStateCallback;
|
20
34
|
|
21
35
|
public class LocalExecutorPlugin
|
22
36
|
implements ExecutorPlugin
|
23
37
|
{
|
24
|
-
private
|
38
|
+
private int defaultMaxThreads;
|
39
|
+
private int defaultMinThreads;
|
25
40
|
|
26
41
|
@Inject
|
27
|
-
public LocalExecutorPlugin(
|
42
|
+
public LocalExecutorPlugin(@ForSystemConfig ConfigSource systemConfig)
|
28
43
|
{
|
29
|
-
|
44
|
+
int cores = Runtime.getRuntime().availableProcessors();
|
45
|
+
this.defaultMaxThreads = systemConfig.get(Integer.class, "max_threads", cores * 2);
|
46
|
+
this.defaultMinThreads = systemConfig.get(Integer.class, "min_output_tasks", cores);
|
30
47
|
}
|
31
48
|
|
32
49
|
@Override
|
33
|
-
public void transaction(ConfigSource config, Schema outputSchema,
|
50
|
+
public void transaction(ConfigSource config, Schema outputSchema, int inputTaskCount,
|
34
51
|
ExecutorPlugin.Control control)
|
35
52
|
{
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
localExecute(task, inputTaskCount, state);
|
40
|
-
}
|
41
|
-
});
|
53
|
+
try (AbstractLocalExecutor exec = newExecutor(config, inputTaskCount)) {
|
54
|
+
control.transaction(outputSchema, exec.getOutputTaskCount(), exec);
|
55
|
+
}
|
42
56
|
}
|
43
57
|
|
44
|
-
private
|
58
|
+
private AbstractLocalExecutor newExecutor(ConfigSource config, int inputTaskCount)
|
45
59
|
{
|
46
60
|
Logger log = Exec.getLogger(LocalExecutorPlugin.class);
|
61
|
+
int maxThreads = config.get(Integer.class, "max_threads", defaultMaxThreads);
|
62
|
+
int minThreads = config.get(Integer.class, "min_output_tasks", defaultMinThreads);
|
63
|
+
if (inputTaskCount < minThreads) {
|
64
|
+
int scatterCount = (minThreads + inputTaskCount - 1) / inputTaskCount;
|
65
|
+
log.info("Using local thread executor with max_threads={} / output tasks {} = input tasks {} * {}",
|
66
|
+
maxThreads, inputTaskCount * scatterCount, inputTaskCount, scatterCount);
|
67
|
+
return new ScatterExecutor(maxThreads, inputTaskCount, scatterCount);
|
68
|
+
}
|
69
|
+
else {
|
70
|
+
log.info("Using local thread executor with max_threads={} / tasks={}", maxThreads, inputTaskCount);
|
71
|
+
return new DirectExecutor(maxThreads, inputTaskCount);
|
72
|
+
}
|
73
|
+
}
|
47
74
|
|
48
|
-
|
75
|
+
private static abstract class AbstractLocalExecutor
|
76
|
+
implements Executor, AutoCloseable
|
77
|
+
{
|
78
|
+
protected final Logger log = Exec.getLogger(LocalExecutorPlugin.class);
|
49
79
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
80
|
+
protected final int inputTaskCount;
|
81
|
+
protected final int outputTaskCount;
|
82
|
+
|
83
|
+
public AbstractLocalExecutor(int inputTaskCount, int outputTaskCount)
|
84
|
+
{
|
85
|
+
this.inputTaskCount = inputTaskCount;
|
86
|
+
this.outputTaskCount = outputTaskCount;
|
87
|
+
}
|
88
|
+
|
89
|
+
public int getOutputTaskCount()
|
90
|
+
{
|
91
|
+
return outputTaskCount;
|
92
|
+
}
|
93
|
+
|
94
|
+
@Override
|
95
|
+
public void execute(ProcessTask task, ProcessState state)
|
96
|
+
{
|
97
|
+
state.initialize(inputTaskCount, outputTaskCount);
|
61
98
|
|
62
|
-
|
63
|
-
|
64
|
-
|
99
|
+
List<Future<Throwable>> futures = new ArrayList<>(inputTaskCount);
|
100
|
+
try {
|
101
|
+
for (int i = 0; i < inputTaskCount; i++) {
|
102
|
+
futures.add(startInputTask(task, state, i));
|
65
103
|
}
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
104
|
+
showProgress(state, inputTaskCount);
|
105
|
+
|
106
|
+
for (int i = 0; i < inputTaskCount; i++) {
|
107
|
+
if (futures.get(i) == null) {
|
108
|
+
continue;
|
109
|
+
}
|
110
|
+
try {
|
111
|
+
state.getInputTaskState(i).setException(futures.get(i).get());
|
112
|
+
}
|
113
|
+
catch (ExecutionException ex) {
|
114
|
+
state.getInputTaskState(i).setException(ex.getCause());
|
115
|
+
//Throwables.propagate(ex.getCause());
|
116
|
+
}
|
117
|
+
catch (InterruptedException ex) {
|
118
|
+
state.getInputTaskState(i).setException(new ExecutionInterruptedException(ex));
|
119
|
+
}
|
120
|
+
showProgress(state, inputTaskCount);
|
73
121
|
}
|
74
|
-
showProgress(log, state, taskCount);
|
75
122
|
}
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
123
|
+
finally {
|
124
|
+
for (Future<Throwable> future : futures) {
|
125
|
+
if (future != null && !future.isDone()) {
|
126
|
+
future.cancel(true);
|
127
|
+
// TODO join?
|
128
|
+
}
|
81
129
|
}
|
82
130
|
}
|
83
131
|
}
|
132
|
+
|
133
|
+
@Override
|
134
|
+
public abstract void close();
|
135
|
+
|
136
|
+
private void showProgress(ProcessState state, int taskCount)
|
137
|
+
{
|
138
|
+
int started = 0;
|
139
|
+
int finished = 0;
|
140
|
+
for (int i = 0; i < taskCount; i++) {
|
141
|
+
if (state.getOutputTaskState(i).isStarted()) { started++; }
|
142
|
+
if (state.getOutputTaskState(i).isFinished()) { finished++; }
|
143
|
+
}
|
144
|
+
|
145
|
+
log.info(String.format("{done:%3d / %d, running: %d}", finished, taskCount, started - finished));
|
146
|
+
}
|
147
|
+
|
148
|
+
protected abstract Future<Throwable> startInputTask(ProcessTask task, ProcessState state, int taskIndex);
|
149
|
+
}
|
150
|
+
|
151
|
+
public static class DirectExecutor
|
152
|
+
extends AbstractLocalExecutor
|
153
|
+
{
|
154
|
+
protected final ExecutorService executor;
|
155
|
+
|
156
|
+
public DirectExecutor(int maxThreads, int taskCount)
|
157
|
+
{
|
158
|
+
super(taskCount, taskCount);
|
159
|
+
this.executor = java.util.concurrent.Executors.newFixedThreadPool(maxThreads,
|
160
|
+
new ThreadFactoryBuilder()
|
161
|
+
.setNameFormat("embulk-executor-%d")
|
162
|
+
.setDaemon(true)
|
163
|
+
.build());
|
164
|
+
}
|
165
|
+
|
166
|
+
@Override
|
167
|
+
public void close()
|
168
|
+
{
|
169
|
+
executor.shutdown();
|
170
|
+
}
|
171
|
+
|
172
|
+
@Override
|
173
|
+
protected Future<Throwable> startInputTask(final ProcessTask task, final ProcessState state, final int taskIndex)
|
174
|
+
{
|
175
|
+
if (state.getOutputTaskState(taskIndex).isCommitted()) {
|
176
|
+
log.warn("Skipped resumed task {}", taskIndex);
|
177
|
+
return null; // resumed
|
178
|
+
}
|
179
|
+
|
180
|
+
return executor.submit(new Callable<Throwable>() {
|
181
|
+
public Throwable call()
|
182
|
+
{
|
183
|
+
try (SetCurrentThreadName dontCare = new SetCurrentThreadName(String.format("task-%04d", taskIndex))) {
|
184
|
+
Executors.process(Exec.session(), task, taskIndex, new ProcessStateCallback() {
|
185
|
+
public void started()
|
186
|
+
{
|
187
|
+
state.getInputTaskState(taskIndex).start();
|
188
|
+
state.getOutputTaskState(taskIndex).start();
|
189
|
+
}
|
190
|
+
|
191
|
+
public void inputCommitted(TaskReport report)
|
192
|
+
{
|
193
|
+
state.getInputTaskState(taskIndex).setTaskReport(report);
|
194
|
+
}
|
195
|
+
|
196
|
+
public void outputCommitted(TaskReport report)
|
197
|
+
{
|
198
|
+
state.getOutputTaskState(taskIndex).setTaskReport(report);
|
199
|
+
}
|
200
|
+
});
|
201
|
+
return null;
|
202
|
+
}
|
203
|
+
finally {
|
204
|
+
state.getInputTaskState(taskIndex).finish();
|
205
|
+
state.getOutputTaskState(taskIndex).finish();
|
206
|
+
}
|
207
|
+
}
|
208
|
+
});
|
209
|
+
}
|
84
210
|
}
|
85
211
|
|
86
|
-
|
212
|
+
public static class ScatterExecutor
|
213
|
+
extends AbstractLocalExecutor
|
87
214
|
{
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
215
|
+
private final int scatterCount;
|
216
|
+
private final int inputTaskCount;
|
217
|
+
private final ExecutorService inputExecutor;
|
218
|
+
private final ExecutorService outputExecutor;
|
219
|
+
|
220
|
+
public ScatterExecutor(int maxThreads, int inputTaskCount, int scatterCount)
|
221
|
+
{
|
222
|
+
super(inputTaskCount, inputTaskCount * scatterCount);
|
223
|
+
this.inputTaskCount = inputTaskCount;
|
224
|
+
this.scatterCount = scatterCount;
|
225
|
+
this.inputExecutor = java.util.concurrent.Executors.newFixedThreadPool(
|
226
|
+
Math.max(maxThreads / scatterCount, 1),
|
227
|
+
new ThreadFactoryBuilder()
|
228
|
+
.setNameFormat("embulk-input-executor-%d")
|
229
|
+
.setDaemon(true)
|
230
|
+
.build());
|
231
|
+
this.outputExecutor = java.util.concurrent.Executors.newCachedThreadPool(
|
232
|
+
new ThreadFactoryBuilder()
|
233
|
+
.setNameFormat("embulk-output-executor-%d")
|
234
|
+
.setDaemon(true)
|
235
|
+
.build());
|
236
|
+
}
|
237
|
+
|
238
|
+
@Override
|
239
|
+
public void close()
|
240
|
+
{
|
241
|
+
inputExecutor.shutdown();
|
242
|
+
outputExecutor.shutdown();
|
93
243
|
}
|
94
244
|
|
95
|
-
|
245
|
+
@Override
|
246
|
+
protected Future<Throwable> startInputTask(final ProcessTask task, final ProcessState state, final int taskIndex)
|
247
|
+
{
|
248
|
+
if(isAllScatterOutputFinished(state, taskIndex)) {
|
249
|
+
log.warn("Skipped resumed input task {}", taskIndex);
|
250
|
+
return null; // resumed
|
251
|
+
}
|
252
|
+
|
253
|
+
return inputExecutor.submit(new Callable<Throwable>() {
|
254
|
+
public Throwable call()
|
255
|
+
{
|
256
|
+
try (SetCurrentThreadName dontCare = new SetCurrentThreadName(String.format("task-%04d", taskIndex))) {
|
257
|
+
runInputTask(Exec.session(), task, state, taskIndex);
|
258
|
+
return null;
|
259
|
+
}
|
260
|
+
}
|
261
|
+
});
|
262
|
+
}
|
263
|
+
|
264
|
+
private boolean isAllScatterOutputFinished(ProcessState state, int taskIndex) {
|
265
|
+
for (int i = 0; i < scatterCount; i++) {
|
266
|
+
int outputTaskIndex = taskIndex * scatterCount + i;
|
267
|
+
if (!state.getOutputTaskState(outputTaskIndex).isCommitted()) {
|
268
|
+
return false;
|
269
|
+
}
|
270
|
+
}
|
271
|
+
return true;
|
272
|
+
}
|
273
|
+
|
274
|
+
private void runInputTask(ExecSession exec, ProcessTask task, ProcessState state, int taskIndex)
|
275
|
+
{
|
276
|
+
InputPlugin inputPlugin = exec.newPlugin(InputPlugin.class, task.getInputPluginType());
|
277
|
+
List<FilterPlugin> filterPlugins = Filters.newFilterPlugins(exec, task.getFilterPluginTypes());
|
278
|
+
OutputPlugin outputPlugin = exec.newPlugin(OutputPlugin.class, task.getOutputPluginType());
|
279
|
+
|
280
|
+
try (ScatterTransactionalPageOutput tran = new ScatterTransactionalPageOutput(state, taskIndex, scatterCount)) {
|
281
|
+
tran.openOutputs(outputPlugin, task.getOutputSchema(), task.getOutputTaskSource());
|
282
|
+
|
283
|
+
try (AbortTransactionResource aborter = new AbortTransactionResource(tran)) {
|
284
|
+
tran.openFilters(filterPlugins, task.getFilterSchemas(), task.getFilterTaskSources());
|
285
|
+
|
286
|
+
tran.startWorkers(outputExecutor);
|
287
|
+
|
288
|
+
// started
|
289
|
+
state.getInputTaskState(taskIndex).start();
|
290
|
+
for (int i = 0; i < scatterCount; i++) {
|
291
|
+
state.getOutputTaskState(taskIndex * scatterCount + i).start();
|
292
|
+
}
|
293
|
+
|
294
|
+
TaskReport inputTaskReport = inputPlugin.run(task.getInputTaskSource(), task.getInputSchema(), taskIndex, tran);
|
295
|
+
|
296
|
+
// inputCommitted
|
297
|
+
if (inputTaskReport == null) {
|
298
|
+
inputTaskReport = exec.newTaskReport();
|
299
|
+
}
|
300
|
+
state.getInputTaskState(taskIndex).setTaskReport(inputTaskReport);
|
301
|
+
|
302
|
+
// outputCommitted
|
303
|
+
tran.commit();
|
304
|
+
}
|
305
|
+
}
|
306
|
+
finally {
|
307
|
+
state.getInputTaskState(taskIndex).finish();
|
308
|
+
state.getOutputTaskState(taskIndex).finish();
|
309
|
+
}
|
310
|
+
}
|
96
311
|
}
|
97
312
|
|
98
|
-
private
|
313
|
+
private static class ScatterTransactionalPageOutput
|
314
|
+
implements TransactionalPageOutput
|
99
315
|
{
|
100
|
-
|
101
|
-
|
316
|
+
private static final Page DONE_PAGE = Page.allocate(0);
|
317
|
+
|
318
|
+
private static class OutputWorker
|
319
|
+
implements Callable<Throwable>
|
320
|
+
{
|
321
|
+
private final PageOutput output;
|
322
|
+
private final Future<Throwable> future;
|
323
|
+
private boolean done;
|
324
|
+
private Page queued;
|
325
|
+
|
326
|
+
public OutputWorker(PageOutput output, ExecutorService executor)
|
102
327
|
{
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
state.getInputTaskState(taskIndex).start();
|
108
|
-
state.getOutputTaskState(taskIndex).start();
|
109
|
-
}
|
328
|
+
this.output = output;
|
329
|
+
this.done = done;
|
330
|
+
this.future = executor.submit(this);
|
331
|
+
}
|
110
332
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
333
|
+
public synchronized void add(Page page)
|
334
|
+
throws InterruptedException
|
335
|
+
{
|
336
|
+
while (true) {
|
337
|
+
if (queued == null) {
|
338
|
+
queued = page;
|
339
|
+
notifyAll();
|
340
|
+
return;
|
341
|
+
}
|
342
|
+
else if (queued == DONE_PAGE) {
|
343
|
+
page.release();
|
344
|
+
return;
|
345
|
+
}
|
346
|
+
wait();
|
347
|
+
}
|
348
|
+
}
|
349
|
+
|
350
|
+
public Throwable join()
|
351
|
+
throws InterruptedException
|
352
|
+
{
|
353
|
+
try {
|
354
|
+
return future.get();
|
355
|
+
}
|
356
|
+
catch (ExecutionException ex) {
|
357
|
+
return ex.getCause();
|
358
|
+
}
|
359
|
+
}
|
115
360
|
|
116
|
-
|
117
|
-
|
118
|
-
|
361
|
+
@Override
|
362
|
+
public synchronized Throwable call()
|
363
|
+
throws InterruptedException
|
364
|
+
{
|
365
|
+
try {
|
366
|
+
while (true) {
|
367
|
+
if (queued != null) {
|
368
|
+
if (queued == DONE_PAGE) {
|
369
|
+
return null;
|
370
|
+
}
|
371
|
+
output.add(queued);
|
372
|
+
queued = null;
|
373
|
+
notifyAll();
|
374
|
+
}
|
375
|
+
wait();
|
376
|
+
}
|
377
|
+
}
|
378
|
+
finally {
|
379
|
+
try {
|
380
|
+
if (queued != null && queued != DONE_PAGE) {
|
381
|
+
queued.release();
|
382
|
+
queued = null;
|
119
383
|
}
|
120
|
-
}
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
384
|
+
}
|
385
|
+
finally {
|
386
|
+
queued = DONE_PAGE;
|
387
|
+
}
|
388
|
+
notifyAll();
|
389
|
+
}
|
390
|
+
}
|
391
|
+
}
|
392
|
+
|
393
|
+
private final ProcessState state;
|
394
|
+
private final int taskIndex;
|
395
|
+
private final int scatterCount;
|
396
|
+
|
397
|
+
private final TransactionalPageOutput[] trans;
|
398
|
+
private final PageOutput[] filtereds;
|
399
|
+
private final CloseResource[] closeThese;
|
400
|
+
|
401
|
+
private final OutputWorker[] outputWorkers;
|
402
|
+
|
403
|
+
private long pageCount;
|
404
|
+
|
405
|
+
public ScatterTransactionalPageOutput(ProcessState state, int taskIndex, int scatterCount)
|
406
|
+
{
|
407
|
+
this.state = state;
|
408
|
+
this.taskIndex = taskIndex;
|
409
|
+
this.scatterCount = scatterCount;
|
410
|
+
|
411
|
+
this.trans = new TransactionalPageOutput[scatterCount];
|
412
|
+
this.filtereds = new PageOutput[scatterCount];
|
413
|
+
this.closeThese = new CloseResource[scatterCount];
|
414
|
+
for (int i = 0; i < scatterCount; i++) {
|
415
|
+
closeThese[i] = new CloseResource();
|
416
|
+
}
|
417
|
+
this.outputWorkers = new OutputWorker[scatterCount];
|
418
|
+
}
|
419
|
+
|
420
|
+
public void openOutputs(OutputPlugin outputPlugin, Schema outputSchema, TaskSource outputTaskSource)
|
421
|
+
{
|
422
|
+
for (int i = 0; i < scatterCount; i++) {
|
423
|
+
int outputTaskIndex = taskIndex * scatterCount + i;
|
424
|
+
if (!state.getOutputTaskState(outputTaskIndex).isCommitted()) {
|
425
|
+
TransactionalPageOutput tran = PluginWrappers.transactionalPageOutput(
|
426
|
+
outputPlugin.open(outputTaskSource, outputSchema, outputTaskIndex));
|
427
|
+
trans[i] = tran;
|
428
|
+
closeThese[i].closeThis(tran);
|
429
|
+
}
|
430
|
+
}
|
431
|
+
}
|
432
|
+
|
433
|
+
public void openFilters(List<FilterPlugin> filterPlugins, List<Schema> filterSchemas, List<TaskSource> filterTaskSources)
|
434
|
+
{
|
435
|
+
for (int i = 0; i < scatterCount; i++) {
|
436
|
+
TransactionalPageOutput tran = trans[i];
|
437
|
+
if (tran != null) {
|
438
|
+
PageOutput filtered = Filters.open(filterPlugins, filterTaskSources, filterSchemas, trans[i]);
|
439
|
+
filtereds[i] = filtered;
|
440
|
+
closeThese[i].closeThis(filtered);
|
125
441
|
}
|
126
442
|
}
|
127
|
-
}
|
443
|
+
}
|
444
|
+
|
445
|
+
public void startWorkers(ExecutorService outputExecutor)
|
446
|
+
{
|
447
|
+
for (int i = 0; i < scatterCount; i++) {
|
448
|
+
PageOutput filtered = filtereds[i];
|
449
|
+
if (filtered != null) {
|
450
|
+
outputWorkers[i] = new OutputWorker(filtered, outputExecutor);
|
451
|
+
}
|
452
|
+
}
|
453
|
+
}
|
454
|
+
|
455
|
+
public void add(Page page)
|
456
|
+
{
|
457
|
+
OutputWorker worker = outputWorkers[(int) (pageCount % scatterCount)];
|
458
|
+
if (worker != null) {
|
459
|
+
try {
|
460
|
+
worker.add(page);
|
461
|
+
}
|
462
|
+
catch (InterruptedException ex) {
|
463
|
+
throw Throwables.propagate(ex);
|
464
|
+
}
|
465
|
+
}
|
466
|
+
pageCount++;
|
467
|
+
}
|
468
|
+
|
469
|
+
public void finish()
|
470
|
+
{
|
471
|
+
completeWorkers();
|
472
|
+
for (int i = 0; i < scatterCount; i++) {
|
473
|
+
if (trans[i] != null) {
|
474
|
+
trans[i].finish();
|
475
|
+
}
|
476
|
+
}
|
477
|
+
}
|
478
|
+
|
479
|
+
public void close()
|
480
|
+
{
|
481
|
+
completeWorkers();
|
482
|
+
for (int i = 0; i < scatterCount; i++) {
|
483
|
+
closeThese[i].close();
|
484
|
+
}
|
485
|
+
}
|
486
|
+
|
487
|
+
public void abort()
|
488
|
+
{
|
489
|
+
completeWorkers();
|
490
|
+
for (int i = 0; i < scatterCount; i++) {
|
491
|
+
if (trans[i] != null) {
|
492
|
+
trans[i].abort();
|
493
|
+
}
|
494
|
+
}
|
495
|
+
}
|
496
|
+
|
497
|
+
public TaskReport commit()
|
498
|
+
{
|
499
|
+
completeWorkers();
|
500
|
+
for (int i = 0; i < scatterCount; i++) {
|
501
|
+
if (trans[i] != null) {
|
502
|
+
int outputTaskIndex = taskIndex * scatterCount + i;
|
503
|
+
TaskReport outputTaskReport = trans[i].commit();
|
504
|
+
trans[i] = null; // don't abort
|
505
|
+
if (outputTaskReport == null) {
|
506
|
+
outputTaskReport = Exec.newTaskReport();
|
507
|
+
}
|
508
|
+
state.getOutputTaskState(outputTaskIndex).setTaskReport(outputTaskReport);
|
509
|
+
}
|
510
|
+
}
|
511
|
+
return null;
|
512
|
+
}
|
513
|
+
|
514
|
+
public void completeWorkers()
|
515
|
+
{
|
516
|
+
for (int i = 0; i < scatterCount; i++) {
|
517
|
+
OutputWorker worker = outputWorkers[i];
|
518
|
+
if (worker != null) {
|
519
|
+
try {
|
520
|
+
worker.add(DONE_PAGE);
|
521
|
+
}
|
522
|
+
catch (InterruptedException ex) {
|
523
|
+
throw Throwables.propagate(ex);
|
524
|
+
}
|
525
|
+
Throwable error = null;
|
526
|
+
try {
|
527
|
+
error = worker.join();
|
528
|
+
}
|
529
|
+
catch (InterruptedException ex) {
|
530
|
+
error = ex;
|
531
|
+
}
|
532
|
+
if (error != null) {
|
533
|
+
throw Throwables.propagate(error);
|
534
|
+
}
|
535
|
+
}
|
536
|
+
}
|
537
|
+
}
|
128
538
|
}
|
129
539
|
}
|