embulk 0.5.5 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +7 -7
- data/embulk-core/src/main/java/org/embulk/exec/BulkLoader.java +664 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +5 -0
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +130 -0
- data/embulk-core/src/main/java/org/embulk/exec/LocalThreadExecutor.java +34 -0
- data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +3 -3
- data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/ResumeState.java +7 -6
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +3 -0
- data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +35 -3
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +4 -1
- data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/ExecutorPlugin.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/Page.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/PluginClassLoader.java +73 -1
- data/embulk-core/src/main/java/org/embulk/spi/ProcessState.java +10 -0
- data/embulk-core/src/main/java/org/embulk/spi/ProcessTask.java +118 -0
- data/embulk-core/src/main/java/org/embulk/spi/TaskState.java +70 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Executors.java +92 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +17 -3
- data/embulk-core/src/test/java/org/embulk/spi/TestBuffer.java +24 -0
- data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst +1 -1
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.6.0.rst +34 -0
- data/lib/embulk/executor_plugin.rb +23 -0
- data/lib/embulk/java_plugin.rb +5 -0
- data/lib/embulk/plugin.rb +13 -2
- data/lib/embulk/version.rb +1 -1
- metadata +15 -5
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +0 -660
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 346027a3a74803953c01e36cd76e41f84aba43db
|
4
|
+
data.tar.gz: 03b5af51c648d4ef5b7f8b358bc0d8539986dbd3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 622006bd7fc66fa5e3552654e6a30ee546ecae71058c64ec7410444d10e2507dee8e4e21953fadf39a87bc1566cc0c4ed22d40584da5796f0409d676a7958684
|
7
|
+
data.tar.gz: f26fcdb9e686007d0eca61c3fbe1d507c0f20720ff62462c648a8ddd5b85043478a4686d2adeeddd1dc20c4b8b2b041f2a437b8a63779ad8b6c9adbc56207dcb
|
data/README.md
CHANGED
@@ -30,7 +30,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
|
|
30
30
|
Following 4 commands install embulk to your home directory:
|
31
31
|
|
32
32
|
```
|
33
|
-
curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.
|
33
|
+
curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.6.0.jar
|
34
34
|
chmod +x ~/.embulk/bin/embulk
|
35
35
|
echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
|
36
36
|
source ~/.bashrc
|
@@ -45,7 +45,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
|
|
45
45
|
You can assume the jar file is a .bat file.
|
46
46
|
|
47
47
|
```
|
48
|
-
PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.
|
48
|
+
PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.6.0.jar -OutFile embulk.bat}"
|
49
49
|
```
|
50
50
|
|
51
51
|
Next step: [Trying examples](#trying-examples)
|
data/build.gradle
CHANGED
@@ -21,7 +21,7 @@ import org.embulk.config.ConfigDiff;
|
|
21
21
|
import org.embulk.config.ModelManager;
|
22
22
|
import org.embulk.config.ConfigException;
|
23
23
|
import org.embulk.plugin.PluginType;
|
24
|
-
import org.embulk.exec.
|
24
|
+
import org.embulk.exec.BulkLoader;
|
25
25
|
import org.embulk.exec.ExecutionResult;
|
26
26
|
import org.embulk.exec.GuessExecutor;
|
27
27
|
import org.embulk.exec.PreviewExecutor;
|
@@ -129,20 +129,20 @@ public class Runner
|
|
129
129
|
}
|
130
130
|
|
131
131
|
ExecSession exec = newExecSession(config);
|
132
|
-
|
132
|
+
BulkLoader loader = injector.getInstance(BulkLoader.class);
|
133
133
|
ExecutionResult result;
|
134
134
|
try {
|
135
135
|
if (resume != null) {
|
136
|
-
result =
|
136
|
+
result = loader.resume(config, resume);
|
137
137
|
} else {
|
138
|
-
result =
|
138
|
+
result = loader.run(exec, config);
|
139
139
|
}
|
140
140
|
} catch (PartialExecutionException partial) {
|
141
141
|
if (options.getResumeStatePath() == null) {
|
142
142
|
// resume state path is not set. cleanup the transaction
|
143
143
|
exec.getLogger(Runner.class).info("Transaction partially failed. Cleaning up the intermediate data. Use -r option to make it resumable.");
|
144
144
|
try {
|
145
|
-
|
145
|
+
loader.cleanup(config, partial.getResumeState());
|
146
146
|
} catch (Throwable ex) {
|
147
147
|
partial.addSuppressed(ex);
|
148
148
|
}
|
@@ -178,8 +178,8 @@ public class Runner
|
|
178
178
|
ResumeState resume = resumeConfig.loadConfig(ResumeState.class);
|
179
179
|
|
180
180
|
//ExecSession exec = newExecSession(config); // not necessary
|
181
|
-
|
182
|
-
|
181
|
+
BulkLoader loader = injector.getInstance(BulkLoader.class);
|
182
|
+
loader.cleanup(config, resume);
|
183
183
|
|
184
184
|
// delete resume file
|
185
185
|
boolean dontCare = new File(options.getResumeStatePath()).delete();
|
@@ -0,0 +1,664 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import java.util.Arrays;
|
5
|
+
import java.util.concurrent.ExecutionException;
|
6
|
+
import com.google.common.base.Optional;
|
7
|
+
import com.google.common.collect.ImmutableList;
|
8
|
+
import com.google.inject.Inject;
|
9
|
+
import com.google.inject.Injector;
|
10
|
+
import com.google.common.base.Throwables;
|
11
|
+
import com.google.common.base.Predicates;
|
12
|
+
import com.google.common.collect.Iterables;
|
13
|
+
import org.embulk.config.Task;
|
14
|
+
import org.embulk.config.Config;
|
15
|
+
import org.embulk.config.ConfigDefault;
|
16
|
+
import org.embulk.config.ConfigSource;
|
17
|
+
import org.embulk.config.ConfigException;
|
18
|
+
import org.embulk.config.TaskSource;
|
19
|
+
import org.embulk.config.ConfigDiff;
|
20
|
+
import org.embulk.config.CommitReport;
|
21
|
+
import org.embulk.plugin.PluginType;
|
22
|
+
import org.embulk.spi.Schema;
|
23
|
+
import org.embulk.spi.Exec;
|
24
|
+
import org.embulk.spi.ExecSession;
|
25
|
+
import org.embulk.spi.ExecAction;
|
26
|
+
import org.embulk.spi.ExecutorPlugin;
|
27
|
+
import org.embulk.spi.ProcessTask;
|
28
|
+
import org.embulk.spi.ProcessState;
|
29
|
+
import org.embulk.spi.TaskState;
|
30
|
+
import org.embulk.spi.InputPlugin;
|
31
|
+
import org.embulk.spi.FilterPlugin;
|
32
|
+
import org.embulk.spi.OutputPlugin;
|
33
|
+
import org.embulk.spi.util.Filters;
|
34
|
+
import org.slf4j.Logger;
|
35
|
+
|
36
|
+
public class BulkLoader
|
37
|
+
{
|
38
|
+
private final Injector injector;
|
39
|
+
|
40
|
+
public interface BulkLoaderTask
|
41
|
+
extends Task
|
42
|
+
{
|
43
|
+
@Config("exec")
|
44
|
+
@ConfigDefault("{}")
|
45
|
+
public ConfigSource getExecConfig();
|
46
|
+
|
47
|
+
@Config("in")
|
48
|
+
public ConfigSource getInputConfig();
|
49
|
+
|
50
|
+
@Config("filters")
|
51
|
+
@ConfigDefault("[]")
|
52
|
+
public List<ConfigSource> getFilterConfigs();
|
53
|
+
|
54
|
+
@Config("out")
|
55
|
+
public ConfigSource getOutputConfig();
|
56
|
+
|
57
|
+
public TaskSource getOutputTask();
|
58
|
+
public void setOutputTask(TaskSource taskSource);
|
59
|
+
}
|
60
|
+
|
61
|
+
@Inject
|
62
|
+
public BulkLoader(Injector injector,
|
63
|
+
@ForSystemConfig ConfigSource systemConfig)
|
64
|
+
{
|
65
|
+
this.injector = injector;
|
66
|
+
}
|
67
|
+
|
68
|
+
private static class LoaderState
|
69
|
+
implements ProcessState
|
70
|
+
{
|
71
|
+
private final Logger logger;
|
72
|
+
|
73
|
+
private final ProcessPluginSet plugins;
|
74
|
+
|
75
|
+
private volatile TaskSource inputTaskSource;
|
76
|
+
private volatile TaskSource outputTaskSource;
|
77
|
+
private volatile List<TaskSource> filterTaskSources;
|
78
|
+
private volatile List<Schema> schemas;
|
79
|
+
private volatile Schema executorSchema;
|
80
|
+
|
81
|
+
private volatile ConfigDiff inputConfigDiff;
|
82
|
+
private volatile ConfigDiff outputConfigDiff;
|
83
|
+
|
84
|
+
private volatile List<TaskState> inputTaskStates;
|
85
|
+
private volatile List<TaskState> outputTaskStates;
|
86
|
+
|
87
|
+
public LoaderState(Logger logger, ProcessPluginSet plugins)
|
88
|
+
{
|
89
|
+
this.logger = logger;
|
90
|
+
this.plugins = plugins;
|
91
|
+
}
|
92
|
+
|
93
|
+
public Logger getLogger()
|
94
|
+
{
|
95
|
+
return logger;
|
96
|
+
}
|
97
|
+
|
98
|
+
public void setSchemas(List<Schema> schemas)
|
99
|
+
{
|
100
|
+
this.schemas = schemas;
|
101
|
+
}
|
102
|
+
|
103
|
+
public void setExecutorSchema(Schema executorSchema)
|
104
|
+
{
|
105
|
+
this.executorSchema = executorSchema;
|
106
|
+
}
|
107
|
+
|
108
|
+
public void setInputTaskSource(TaskSource inputTaskSource)
|
109
|
+
{
|
110
|
+
this.inputTaskSource = inputTaskSource;
|
111
|
+
}
|
112
|
+
|
113
|
+
public void setOutputTaskSource(TaskSource outputTaskSource)
|
114
|
+
{
|
115
|
+
this.outputTaskSource = outputTaskSource;
|
116
|
+
}
|
117
|
+
|
118
|
+
public void setFilterTaskSources(List<TaskSource> filterTaskSources)
|
119
|
+
{
|
120
|
+
this.filterTaskSources = filterTaskSources;
|
121
|
+
}
|
122
|
+
|
123
|
+
public ProcessTask buildProcessTask()
|
124
|
+
{
|
125
|
+
return new ProcessTask(
|
126
|
+
plugins.getInputPluginType(), plugins.getOutputPluginType(), plugins.getFilterPluginTypes(),
|
127
|
+
inputTaskSource, outputTaskSource, filterTaskSources,
|
128
|
+
schemas, executorSchema, Exec.newTaskSource());
|
129
|
+
}
|
130
|
+
|
131
|
+
@Override
|
132
|
+
public void initialize(int inputTaskCount, int outputTaskCount)
|
133
|
+
{
|
134
|
+
if (inputTaskStates != null || outputTaskStates != null) {
|
135
|
+
// initialize is called twice if resume (by restoreResumedCommitReports and ExecutorPlugin.execute)
|
136
|
+
if (inputTaskStates.size() != inputTaskCount || outputTaskStates.size() != outputTaskCount) {
|
137
|
+
throw new ConfigException(String.format(
|
138
|
+
"input task count and output task (%d and %d) must be same with the first execution (%d and %d) whenre resumed",
|
139
|
+
inputTaskCount, outputTaskCount, inputTaskStates.size(), outputTaskStates.size()));
|
140
|
+
}
|
141
|
+
} else {
|
142
|
+
ImmutableList.Builder<TaskState> inputTaskStates = ImmutableList.builder();
|
143
|
+
ImmutableList.Builder<TaskState> outputTaskStates = ImmutableList.builder();
|
144
|
+
for (int i=0; i < inputTaskCount; i++) {
|
145
|
+
inputTaskStates.add(new TaskState());
|
146
|
+
}
|
147
|
+
for (int i=0; i < outputTaskCount; i++) {
|
148
|
+
outputTaskStates.add(new TaskState());
|
149
|
+
}
|
150
|
+
this.inputTaskStates = inputTaskStates.build();
|
151
|
+
this.outputTaskStates = outputTaskStates.build();
|
152
|
+
}
|
153
|
+
}
|
154
|
+
|
155
|
+
@Override
|
156
|
+
public TaskState getInputTaskState(int inputTaskIndex)
|
157
|
+
{
|
158
|
+
return inputTaskStates.get(inputTaskIndex);
|
159
|
+
}
|
160
|
+
|
161
|
+
@Override
|
162
|
+
public TaskState getOutputTaskState(int outputTaskIndex)
|
163
|
+
{
|
164
|
+
return outputTaskStates.get(outputTaskIndex);
|
165
|
+
}
|
166
|
+
|
167
|
+
public boolean isAllCommitted()
|
168
|
+
{
|
169
|
+
if (outputTaskStates == null) {
|
170
|
+
// not initialized
|
171
|
+
return false;
|
172
|
+
}
|
173
|
+
for (TaskState outputTaskState : outputTaskStates) {
|
174
|
+
if (!outputTaskState.isCommitted()) {
|
175
|
+
return false;
|
176
|
+
}
|
177
|
+
}
|
178
|
+
return true;
|
179
|
+
}
|
180
|
+
|
181
|
+
public boolean isAnyStarted()
|
182
|
+
{
|
183
|
+
if (inputTaskStates == null) {
|
184
|
+
// not initialized
|
185
|
+
return false;
|
186
|
+
}
|
187
|
+
for (TaskState inputTaskState : inputTaskStates) {
|
188
|
+
if (inputTaskState.isStarted()) {
|
189
|
+
return true;
|
190
|
+
}
|
191
|
+
}
|
192
|
+
return false;
|
193
|
+
}
|
194
|
+
|
195
|
+
public void setOutputConfigDiff(ConfigDiff outputConfigDiff)
|
196
|
+
{
|
197
|
+
if (outputConfigDiff == null) {
|
198
|
+
outputConfigDiff = Exec.newConfigDiff();
|
199
|
+
}
|
200
|
+
this.outputConfigDiff = outputConfigDiff;
|
201
|
+
}
|
202
|
+
|
203
|
+
public void setInputConfigDiff(ConfigDiff inputConfigDiff)
|
204
|
+
{
|
205
|
+
if (inputConfigDiff == null) {
|
206
|
+
inputConfigDiff = Exec.newConfigDiff();
|
207
|
+
}
|
208
|
+
this.inputConfigDiff = inputConfigDiff;
|
209
|
+
}
|
210
|
+
|
211
|
+
private List<Optional<CommitReport>> getInputCommitReports()
|
212
|
+
{
|
213
|
+
ImmutableList.Builder<Optional<CommitReport>> builder = ImmutableList.builder();
|
214
|
+
for (TaskState inputTaskState : inputTaskStates) {
|
215
|
+
builder.add(inputTaskState.getCommitReport());
|
216
|
+
}
|
217
|
+
return builder.build();
|
218
|
+
}
|
219
|
+
|
220
|
+
private List<Optional<CommitReport>> getOutputCommitReports()
|
221
|
+
{
|
222
|
+
ImmutableList.Builder<Optional<CommitReport>> builder = ImmutableList.builder();
|
223
|
+
for (TaskState outputTaskState : outputTaskStates) {
|
224
|
+
builder.add(outputTaskState.getCommitReport());
|
225
|
+
}
|
226
|
+
return builder.build();
|
227
|
+
}
|
228
|
+
|
229
|
+
public List<CommitReport> getAllInputCommitReports()
|
230
|
+
{
|
231
|
+
ImmutableList.Builder<CommitReport> builder = ImmutableList.builder();
|
232
|
+
for (TaskState inputTaskState : inputTaskStates) {
|
233
|
+
builder.add(inputTaskState.getCommitReport().get());
|
234
|
+
}
|
235
|
+
return builder.build();
|
236
|
+
}
|
237
|
+
|
238
|
+
public List<CommitReport> getAllOutputCommitReports()
|
239
|
+
{
|
240
|
+
ImmutableList.Builder<CommitReport> builder = ImmutableList.builder();
|
241
|
+
for (TaskState outputTaskState : outputTaskStates) {
|
242
|
+
builder.add(outputTaskState.getCommitReport().get());
|
243
|
+
}
|
244
|
+
return builder.build();
|
245
|
+
}
|
246
|
+
|
247
|
+
public List<Throwable> getExceptions()
|
248
|
+
{
|
249
|
+
ImmutableList.Builder<Throwable> builder = ImmutableList.builder();
|
250
|
+
if (inputTaskStates != null) { // null if not initialized yet
|
251
|
+
for (TaskState inputTaskState : inputTaskStates) {
|
252
|
+
Optional<Throwable> exception = inputTaskState.getException();
|
253
|
+
if (exception.isPresent()) {
|
254
|
+
builder.add(exception.get());
|
255
|
+
}
|
256
|
+
}
|
257
|
+
}
|
258
|
+
if (outputTaskStates != null) { // null if not initialized yet
|
259
|
+
for (TaskState outputTaskState : outputTaskStates) {
|
260
|
+
Optional<Throwable> exception = outputTaskState.getException();
|
261
|
+
if (exception.isPresent()) {
|
262
|
+
builder.add(exception.get());
|
263
|
+
}
|
264
|
+
}
|
265
|
+
}
|
266
|
+
return builder.build();
|
267
|
+
}
|
268
|
+
|
269
|
+
public RuntimeException getRepresentativeException()
|
270
|
+
{
|
271
|
+
RuntimeException top = null;
|
272
|
+
for (Throwable ex : getExceptions()) {
|
273
|
+
if (top != null) {
|
274
|
+
top.addSuppressed(ex);
|
275
|
+
} else {
|
276
|
+
if (ex instanceof RuntimeException) {
|
277
|
+
top = (RuntimeException) ex;
|
278
|
+
} else {
|
279
|
+
top = new RuntimeException(ex);
|
280
|
+
}
|
281
|
+
}
|
282
|
+
}
|
283
|
+
if (top == null) {
|
284
|
+
top = new RuntimeException("Some transactions are not committed");
|
285
|
+
}
|
286
|
+
return top;
|
287
|
+
}
|
288
|
+
|
289
|
+
public ExecutionResult buildExecuteResult()
|
290
|
+
{
|
291
|
+
return buildExecuteResultWithWarningException(null);
|
292
|
+
}
|
293
|
+
|
294
|
+
public ExecutionResult buildExecuteResultWithWarningException(Throwable ex)
|
295
|
+
{
|
296
|
+
ConfigDiff configDiff = Exec.newConfigDiff();
|
297
|
+
if (inputConfigDiff != null) {
|
298
|
+
configDiff.getNestedOrSetEmpty("in").merge(inputConfigDiff);
|
299
|
+
}
|
300
|
+
if (outputConfigDiff != null) {
|
301
|
+
configDiff.getNestedOrSetEmpty("out").merge(outputConfigDiff);
|
302
|
+
}
|
303
|
+
|
304
|
+
ImmutableList.Builder<Throwable> ignoredExceptions = ImmutableList.builder();
|
305
|
+
for (Throwable e : getExceptions()) {
|
306
|
+
ignoredExceptions.add(e);
|
307
|
+
}
|
308
|
+
if (ex != null) {
|
309
|
+
ignoredExceptions.add(ex);
|
310
|
+
}
|
311
|
+
|
312
|
+
return new ExecutionResult(configDiff, ignoredExceptions.build());
|
313
|
+
}
|
314
|
+
|
315
|
+
public ResumeState buildResumeState(ExecSession exec)
|
316
|
+
{
|
317
|
+
return new ResumeState(
|
318
|
+
exec.getSessionConfigSource(),
|
319
|
+
inputTaskSource, outputTaskSource,
|
320
|
+
first(schemas), executorSchema,
|
321
|
+
getInputCommitReports(), getOutputCommitReports());
|
322
|
+
}
|
323
|
+
|
324
|
+
public PartialExecutionException buildPartialExecuteException(Throwable cause, ExecSession exec)
|
325
|
+
{
|
326
|
+
return new PartialExecutionException(cause, buildResumeState(exec));
|
327
|
+
}
|
328
|
+
}
|
329
|
+
|
330
|
+
protected ExecutorPlugin newExecutorPlugin(BulkLoaderTask task)
|
331
|
+
{
|
332
|
+
return Exec.newPlugin(ExecutorPlugin.class,
|
333
|
+
task.getExecConfig().get(PluginType.class, "type", new PluginType("local")));
|
334
|
+
}
|
335
|
+
|
336
|
+
protected InputPlugin newInputPlugin(BulkLoaderTask task)
|
337
|
+
{
|
338
|
+
return Exec.newPlugin(InputPlugin.class, task.getInputConfig().get(PluginType.class, "type"));
|
339
|
+
}
|
340
|
+
|
341
|
+
protected List<FilterPlugin> newFilterPlugins(BulkLoaderTask task)
|
342
|
+
{
|
343
|
+
return Filters.newFilterPlugins(Exec.session(),
|
344
|
+
Filters.getPluginTypes(task.getFilterConfigs()));
|
345
|
+
}
|
346
|
+
|
347
|
+
protected OutputPlugin newOutputPlugin(BulkLoaderTask task)
|
348
|
+
{
|
349
|
+
return Exec.newPlugin(OutputPlugin.class, task.getOutputConfig().get(PluginType.class, "type"));
|
350
|
+
}
|
351
|
+
|
352
|
+
public ExecutionResult run(ExecSession exec, final ConfigSource config)
|
353
|
+
{
|
354
|
+
try {
|
355
|
+
return Exec.doWith(exec, new ExecAction<ExecutionResult>() {
|
356
|
+
public ExecutionResult run()
|
357
|
+
{
|
358
|
+
try (SetCurrentThreadName dontCare = new SetCurrentThreadName("transaction")) {
|
359
|
+
return doRun(config);
|
360
|
+
}
|
361
|
+
}
|
362
|
+
});
|
363
|
+
} catch (ExecutionException ex) {
|
364
|
+
throw Throwables.propagate(ex.getCause());
|
365
|
+
}
|
366
|
+
}
|
367
|
+
|
368
|
+
public ExecutionResult resume(final ConfigSource config, final ResumeState resume)
|
369
|
+
{
|
370
|
+
try {
|
371
|
+
ExecSession exec = new ExecSession(injector, resume.getExecSessionConfigSource());
|
372
|
+
return Exec.doWith(exec, new ExecAction<ExecutionResult>() {
|
373
|
+
public ExecutionResult run()
|
374
|
+
{
|
375
|
+
try (SetCurrentThreadName dontCare = new SetCurrentThreadName("resume")) {
|
376
|
+
return doResume(config, resume);
|
377
|
+
}
|
378
|
+
}
|
379
|
+
});
|
380
|
+
} catch (ExecutionException ex) {
|
381
|
+
throw Throwables.propagate(ex.getCause());
|
382
|
+
}
|
383
|
+
}
|
384
|
+
|
385
|
+
public void cleanup(final ConfigSource config, final ResumeState resume)
|
386
|
+
{
|
387
|
+
try {
|
388
|
+
ExecSession exec = new ExecSession(injector, resume.getExecSessionConfigSource());
|
389
|
+
Exec.doWith(exec, new ExecAction<Void>() {
|
390
|
+
public Void run()
|
391
|
+
{
|
392
|
+
try (SetCurrentThreadName dontCare = new SetCurrentThreadName("cleanup")) {
|
393
|
+
doCleanup(config, resume);
|
394
|
+
return null;
|
395
|
+
}
|
396
|
+
}
|
397
|
+
});
|
398
|
+
} catch (ExecutionException ex) {
|
399
|
+
throw Throwables.propagate(ex.getCause());
|
400
|
+
}
|
401
|
+
}
|
402
|
+
|
403
|
+
public void doCleanup(ConfigSource config, ResumeState resume)
|
404
|
+
{
|
405
|
+
BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
|
406
|
+
InputPlugin inputPlugin = newInputPlugin(task);
|
407
|
+
OutputPlugin outputPlugin = newOutputPlugin(task);
|
408
|
+
|
409
|
+
ImmutableList.Builder<CommitReport> successfulInputCommitReports = ImmutableList.builder();
|
410
|
+
ImmutableList.Builder<CommitReport> successfulOutputCommitReports = ImmutableList.builder();
|
411
|
+
for (Optional<CommitReport> inputCommitReport : resume.getInputCommitReports()) {
|
412
|
+
if (inputCommitReport.isPresent()) {
|
413
|
+
successfulInputCommitReports.add(inputCommitReport.get());
|
414
|
+
}
|
415
|
+
}
|
416
|
+
for (Optional<CommitReport> outputCommitReport : resume.getOutputCommitReports()) {
|
417
|
+
if (outputCommitReport.isPresent()) {
|
418
|
+
successfulOutputCommitReports.add(outputCommitReport.get());
|
419
|
+
}
|
420
|
+
}
|
421
|
+
|
422
|
+
inputPlugin.cleanup(resume.getInputTaskSource(), resume.getInputSchema(),
|
423
|
+
resume.getInputCommitReports().size(), successfulInputCommitReports.build());
|
424
|
+
|
425
|
+
outputPlugin.cleanup(resume.getOutputTaskSource(), resume.getOutputSchema(),
|
426
|
+
resume.getOutputCommitReports().size(), successfulOutputCommitReports.build());
|
427
|
+
}
|
428
|
+
|
429
|
+
private static class ProcessPluginSet
|
430
|
+
{
|
431
|
+
private final PluginType inputPluginType;
|
432
|
+
private final PluginType outputPluginType;
|
433
|
+
private final List<PluginType> filterPluginTypes;
|
434
|
+
|
435
|
+
private final InputPlugin inputPlugin;
|
436
|
+
private final OutputPlugin outputPlugin;
|
437
|
+
private final List<FilterPlugin> filterPlugins;
|
438
|
+
|
439
|
+
public ProcessPluginSet(BulkLoaderTask task)
|
440
|
+
{
|
441
|
+
this.inputPluginType = task.getInputConfig().get(PluginType.class, "type");
|
442
|
+
this.outputPluginType = task.getOutputConfig().get(PluginType.class, "type");
|
443
|
+
this.filterPluginTypes = Filters.getPluginTypes(task.getFilterConfigs());
|
444
|
+
this.inputPlugin = Exec.newPlugin(InputPlugin.class, inputPluginType);
|
445
|
+
this.outputPlugin = Exec.newPlugin(OutputPlugin.class, outputPluginType);
|
446
|
+
this.filterPlugins = Filters.newFilterPlugins(Exec.session(), filterPluginTypes);
|
447
|
+
}
|
448
|
+
|
449
|
+
public PluginType getInputPluginType()
|
450
|
+
{
|
451
|
+
return inputPluginType;
|
452
|
+
}
|
453
|
+
|
454
|
+
public PluginType getOutputPluginType()
|
455
|
+
{
|
456
|
+
return outputPluginType;
|
457
|
+
}
|
458
|
+
|
459
|
+
public List<PluginType> getFilterPluginTypes()
|
460
|
+
{
|
461
|
+
return filterPluginTypes;
|
462
|
+
}
|
463
|
+
|
464
|
+
public InputPlugin getInputPlugin()
|
465
|
+
{
|
466
|
+
return inputPlugin;
|
467
|
+
}
|
468
|
+
|
469
|
+
public OutputPlugin getOutputPlugin()
|
470
|
+
{
|
471
|
+
return outputPlugin;
|
472
|
+
}
|
473
|
+
|
474
|
+
public List<FilterPlugin> getFilterPlugins()
|
475
|
+
{
|
476
|
+
return filterPlugins;
|
477
|
+
}
|
478
|
+
}
|
479
|
+
|
480
|
+
private ExecutionResult doRun(ConfigSource config)
|
481
|
+
{
|
482
|
+
final BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
|
483
|
+
|
484
|
+
final ExecutorPlugin exec = newExecutorPlugin(task);
|
485
|
+
final ProcessPluginSet plugins = new ProcessPluginSet(task);
|
486
|
+
|
487
|
+
final LoaderState state = new LoaderState(Exec.getLogger(BulkLoader.class), plugins);
|
488
|
+
try {
|
489
|
+
ConfigDiff inputConfigDiff = plugins.getInputPlugin().transaction(task.getInputConfig(), new InputPlugin.Control() {
|
490
|
+
public List<CommitReport> run(final TaskSource inputTask, final Schema inputSchema, final int inputTaskCount)
|
491
|
+
{
|
492
|
+
state.setInputTaskSource(inputTask);
|
493
|
+
Filters.transaction(plugins.getFilterPlugins(), task.getFilterConfigs(), inputSchema, new Filters.Control() {
|
494
|
+
public void run(final List<TaskSource> filterTasks, final List<Schema> schemas)
|
495
|
+
{
|
496
|
+
state.setSchemas(schemas);
|
497
|
+
state.setFilterTaskSources(filterTasks);
|
498
|
+
exec.transaction(task.getExecConfig(), last(schemas), inputTaskCount, new ExecutorPlugin.Control() {
|
499
|
+
public void transaction(final Schema executorSchema, final int outputTaskCount, final ExecutorPlugin.Executor executor)
|
500
|
+
{
|
501
|
+
state.setExecutorSchema(executorSchema);
|
502
|
+
ConfigDiff outputConfigDiff = plugins.getOutputPlugin().transaction(task.getOutputConfig(), executorSchema, outputTaskCount, new OutputPlugin.Control() {
|
503
|
+
public List<CommitReport> run(final TaskSource outputTask)
|
504
|
+
{
|
505
|
+
state.setOutputTaskSource(outputTask);
|
506
|
+
|
507
|
+
state.initialize(inputTaskCount, outputTaskCount);
|
508
|
+
|
509
|
+
if (!state.isAllCommitted()) { // inputTaskCount == 0
|
510
|
+
execute(task, executor, state);
|
511
|
+
}
|
512
|
+
|
513
|
+
return state.getAllOutputCommitReports();
|
514
|
+
}
|
515
|
+
});
|
516
|
+
state.setOutputConfigDiff(outputConfigDiff);
|
517
|
+
}
|
518
|
+
});
|
519
|
+
}
|
520
|
+
});
|
521
|
+
return state.getAllInputCommitReports();
|
522
|
+
}
|
523
|
+
});
|
524
|
+
state.setInputConfigDiff(inputConfigDiff);
|
525
|
+
|
526
|
+
cleanupCommittedTransaction(config, state);
|
527
|
+
|
528
|
+
return state.buildExecuteResult();
|
529
|
+
|
530
|
+
} catch (Throwable ex) {
|
531
|
+
if (state.isAllCommitted()) {
|
532
|
+
// ignore the exception
|
533
|
+
return state.buildExecuteResultWithWarningException(ex);
|
534
|
+
}
|
535
|
+
if (!state.isAnyStarted()) {
|
536
|
+
throw ex;
|
537
|
+
}
|
538
|
+
throw state.buildPartialExecuteException(ex, Exec.session());
|
539
|
+
}
|
540
|
+
}
|
541
|
+
|
542
|
+
private ExecutionResult doResume(ConfigSource config, final ResumeState resume)
|
543
|
+
{
|
544
|
+
final BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
|
545
|
+
|
546
|
+
final ExecutorPlugin exec = newExecutorPlugin(task);
|
547
|
+
final ProcessPluginSet plugins = new ProcessPluginSet(task);
|
548
|
+
|
549
|
+
final LoaderState state = new LoaderState(Exec.getLogger(BulkLoader.class), plugins);
|
550
|
+
try {
|
551
|
+
ConfigDiff inputConfigDiff = plugins.getInputPlugin().resume(resume.getInputTaskSource(), resume.getInputSchema(), resume.getInputCommitReports().size(), new InputPlugin.Control() {
|
552
|
+
public List<CommitReport> run(final TaskSource inputTask, final Schema inputSchema, final int inputTaskCount)
|
553
|
+
{
|
554
|
+
// TODO validate inputTask?
|
555
|
+
// TODO validate inputSchema
|
556
|
+
state.setInputTaskSource(inputTask);
|
557
|
+
Filters.transaction(plugins.getFilterPlugins(), task.getFilterConfigs(), inputSchema, new Filters.Control() {
|
558
|
+
public void run(final List<TaskSource> filterTasks, final List<Schema> schemas)
|
559
|
+
{
|
560
|
+
state.setSchemas(schemas);
|
561
|
+
state.setFilterTaskSources(filterTasks);
|
562
|
+
exec.transaction(task.getExecConfig(), last(schemas), inputTaskCount, new ExecutorPlugin.Control() {
|
563
|
+
public void transaction(final Schema executorSchema, final int outputTaskCount, final ExecutorPlugin.Executor executor)
|
564
|
+
{
|
565
|
+
// TODO validate executorSchema
|
566
|
+
state.setExecutorSchema(executorSchema);
|
567
|
+
ConfigDiff outputConfigDiff = plugins.getOutputPlugin().resume(resume.getOutputTaskSource(), executorSchema, outputTaskCount, new OutputPlugin.Control() {
|
568
|
+
public List<CommitReport> run(final TaskSource outputTask)
|
569
|
+
{
|
570
|
+
// TODO validate outputTask?
|
571
|
+
state.setOutputTaskSource(outputTask);
|
572
|
+
|
573
|
+
restoreResumedCommitReports(resume, state);
|
574
|
+
if (!state.isAllCommitted()) {
|
575
|
+
execute(task, executor, state);
|
576
|
+
}
|
577
|
+
|
578
|
+
return state.getAllOutputCommitReports();
|
579
|
+
}
|
580
|
+
});
|
581
|
+
state.setOutputConfigDiff(outputConfigDiff);
|
582
|
+
}
|
583
|
+
});
|
584
|
+
}
|
585
|
+
});
|
586
|
+
return state.getAllInputCommitReports();
|
587
|
+
}
|
588
|
+
});
|
589
|
+
state.setInputConfigDiff(inputConfigDiff);
|
590
|
+
|
591
|
+
cleanupCommittedTransaction(config, state);
|
592
|
+
|
593
|
+
return state.buildExecuteResult();
|
594
|
+
|
595
|
+
} catch (Throwable ex) {
|
596
|
+
if (state.isAllCommitted()) {
|
597
|
+
// ignore the exception
|
598
|
+
return state.buildExecuteResultWithWarningException(ex);
|
599
|
+
}
|
600
|
+
if (!state.isAnyStarted()) {
|
601
|
+
throw ex;
|
602
|
+
}
|
603
|
+
throw state.buildPartialExecuteException(ex, Exec.session());
|
604
|
+
}
|
605
|
+
}
|
606
|
+
|
607
|
+
private static void restoreResumedCommitReports(ResumeState resume, LoaderState state)
|
608
|
+
{
|
609
|
+
int inputTaskCount = resume.getInputCommitReports().size();
|
610
|
+
int outputTaskCount = resume.getOutputCommitReports().size();
|
611
|
+
|
612
|
+
state.initialize(inputTaskCount, outputTaskCount);
|
613
|
+
|
614
|
+
for (int i=0; i < inputTaskCount; i++) {
|
615
|
+
Optional<CommitReport> report = resume.getInputCommitReports().get(i);
|
616
|
+
if (report.isPresent()) {
|
617
|
+
TaskState task = state.getInputTaskState(i);
|
618
|
+
task.start();
|
619
|
+
task.setCommitReport(report.get());
|
620
|
+
task.finish();
|
621
|
+
}
|
622
|
+
}
|
623
|
+
|
624
|
+
for (int i=0; i < outputTaskCount; i++) {
|
625
|
+
Optional<CommitReport> report = resume.getOutputCommitReports().get(i);
|
626
|
+
if (report.isPresent()) {
|
627
|
+
TaskState task = state.getOutputTaskState(i);
|
628
|
+
task.start();
|
629
|
+
task.setCommitReport(report.get());
|
630
|
+
task.finish();
|
631
|
+
}
|
632
|
+
}
|
633
|
+
}
|
634
|
+
|
635
|
+
private void execute(BulkLoaderTask task, ExecutorPlugin.Executor executor, LoaderState state)
|
636
|
+
{
|
637
|
+
ProcessTask procTask = state.buildProcessTask();
|
638
|
+
|
639
|
+
executor.execute(procTask, state);
|
640
|
+
|
641
|
+
if (!state.isAllCommitted()) {
|
642
|
+
throw state.getRepresentativeException();
|
643
|
+
}
|
644
|
+
}
|
645
|
+
|
646
|
+
private void cleanupCommittedTransaction(ConfigSource config, LoaderState state)
|
647
|
+
{
|
648
|
+
try {
|
649
|
+
doCleanup(config, state.buildResumeState(Exec.session()));
|
650
|
+
} catch (Exception ex) {
|
651
|
+
state.getLogger().warn("Commit succeeded but cleanup failed. Ignoring this exception.", ex); // TODO
|
652
|
+
}
|
653
|
+
}
|
654
|
+
|
655
|
+
private static Schema first(List<Schema> schemas)
|
656
|
+
{
|
657
|
+
return schemas.get(0);
|
658
|
+
}
|
659
|
+
|
660
|
+
private static Schema last(List<Schema> schemas)
|
661
|
+
{
|
662
|
+
return schemas.get(schemas.size() - 1);
|
663
|
+
}
|
664
|
+
}
|