embulk 0.5.5 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -2
  3. data/build.gradle +1 -1
  4. data/embulk-core/src/main/java/org/embulk/command/Runner.java +7 -7
  5. data/embulk-core/src/main/java/org/embulk/exec/BulkLoader.java +664 -0
  6. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +5 -0
  7. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +130 -0
  8. data/embulk-core/src/main/java/org/embulk/exec/LocalThreadExecutor.java +34 -0
  9. data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +3 -3
  10. data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +1 -1
  11. data/embulk-core/src/main/java/org/embulk/exec/ResumeState.java +7 -6
  12. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +3 -0
  13. data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +35 -3
  14. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +4 -1
  15. data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +1 -1
  16. data/embulk-core/src/main/java/org/embulk/spi/ExecutorPlugin.java +19 -0
  17. data/embulk-core/src/main/java/org/embulk/spi/Page.java +6 -0
  18. data/embulk-core/src/main/java/org/embulk/spi/PluginClassLoader.java +73 -1
  19. data/embulk-core/src/main/java/org/embulk/spi/ProcessState.java +10 -0
  20. data/embulk-core/src/main/java/org/embulk/spi/ProcessTask.java +118 -0
  21. data/embulk-core/src/main/java/org/embulk/spi/TaskState.java +70 -0
  22. data/embulk-core/src/main/java/org/embulk/spi/util/Executors.java +92 -0
  23. data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +17 -3
  24. data/embulk-core/src/test/java/org/embulk/spi/TestBuffer.java +24 -0
  25. data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst +1 -1
  26. data/embulk-docs/src/release.rst +1 -0
  27. data/embulk-docs/src/release/release-0.6.0.rst +34 -0
  28. data/lib/embulk/executor_plugin.rb +23 -0
  29. data/lib/embulk/java_plugin.rb +5 -0
  30. data/lib/embulk/plugin.rb +13 -2
  31. data/lib/embulk/version.rb +1 -1
  32. metadata +15 -5
  33. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +0 -660
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f78ef9df69c9d408b6a7df450706a54cb646c596
4
- data.tar.gz: 79a55069daa0ec3f952fd3e6d66f9536b9b6eac5
3
+ metadata.gz: 346027a3a74803953c01e36cd76e41f84aba43db
4
+ data.tar.gz: 03b5af51c648d4ef5b7f8b358bc0d8539986dbd3
5
5
  SHA512:
6
- metadata.gz: 51c83cefab8712f70e350ccc446ecab9c11779447c94ddc5c7b4c10597e1c41fe9d6853e7c7b3ff0f7d132082a604901cf7344b0382711c42d101b20f210a359
7
- data.tar.gz: 17d4ab5552e98c6bc0fa3368cbf22ab87814e035dbae5d8b6e6c91aa6778f9d89ab2a666935b382e0dd0e7252a68460c10ea54ac9d18bf4664dfc4b00d5cf84e
6
+ metadata.gz: 622006bd7fc66fa5e3552654e6a30ee546ecae71058c64ec7410444d10e2507dee8e4e21953fadf39a87bc1566cc0c4ed22d40584da5796f0409d676a7958684
7
+ data.tar.gz: f26fcdb9e686007d0eca61c3fbe1d507c0f20720ff62462c648a8ddd5b85043478a4686d2adeeddd1dc20c4b8b2b041f2a437b8a63779ad8b6c9adbc56207dcb
data/README.md CHANGED
@@ -30,7 +30,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
30
30
  Following 4 commands install embulk to your home directory:
31
31
 
32
32
  ```
33
- curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.5.jar
33
+ curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.6.0.jar
34
34
  chmod +x ~/.embulk/bin/embulk
35
35
  echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
36
36
  source ~/.bashrc
@@ -45,7 +45,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
45
45
  You can assume the jar file is a .bat file.
46
46
 
47
47
  ```
48
- PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.5.5.jar -OutFile embulk.bat}"
48
+ PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.6.0.jar -OutFile embulk.bat}"
49
49
  ```
50
50
 
51
51
  Next step: [Trying examples](#trying-examples)
data/build.gradle CHANGED
@@ -12,7 +12,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
12
12
 
13
13
  allprojects {
14
14
  group = 'org.embulk'
15
- version = '0.5.5'
15
+ version = '0.6.0'
16
16
 
17
17
  apply plugin: 'java'
18
18
  apply plugin: 'maven-publish'
@@ -21,7 +21,7 @@ import org.embulk.config.ConfigDiff;
21
21
  import org.embulk.config.ModelManager;
22
22
  import org.embulk.config.ConfigException;
23
23
  import org.embulk.plugin.PluginType;
24
- import org.embulk.exec.LocalExecutor;
24
+ import org.embulk.exec.BulkLoader;
25
25
  import org.embulk.exec.ExecutionResult;
26
26
  import org.embulk.exec.GuessExecutor;
27
27
  import org.embulk.exec.PreviewExecutor;
@@ -129,20 +129,20 @@ public class Runner
129
129
  }
130
130
 
131
131
  ExecSession exec = newExecSession(config);
132
- LocalExecutor local = injector.getInstance(LocalExecutor.class);
132
+ BulkLoader loader = injector.getInstance(BulkLoader.class);
133
133
  ExecutionResult result;
134
134
  try {
135
135
  if (resume != null) {
136
- result = local.resume(config, resume);
136
+ result = loader.resume(config, resume);
137
137
  } else {
138
- result = local.run(exec, config);
138
+ result = loader.run(exec, config);
139
139
  }
140
140
  } catch (PartialExecutionException partial) {
141
141
  if (options.getResumeStatePath() == null) {
142
142
  // resume state path is not set. cleanup the transaction
143
143
  exec.getLogger(Runner.class).info("Transaction partially failed. Cleaning up the intermediate data. Use -r option to make it resumable.");
144
144
  try {
145
- local.cleanup(config, partial.getResumeState());
145
+ loader.cleanup(config, partial.getResumeState());
146
146
  } catch (Throwable ex) {
147
147
  partial.addSuppressed(ex);
148
148
  }
@@ -178,8 +178,8 @@ public class Runner
178
178
  ResumeState resume = resumeConfig.loadConfig(ResumeState.class);
179
179
 
180
180
  //ExecSession exec = newExecSession(config); // not necessary
181
- LocalExecutor local = injector.getInstance(LocalExecutor.class);
182
- local.cleanup(config, resume);
181
+ BulkLoader loader = injector.getInstance(BulkLoader.class);
182
+ loader.cleanup(config, resume);
183
183
 
184
184
  // delete resume file
185
185
  boolean dontCare = new File(options.getResumeStatePath()).delete();
@@ -0,0 +1,664 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.List;
4
+ import java.util.Arrays;
5
+ import java.util.concurrent.ExecutionException;
6
+ import com.google.common.base.Optional;
7
+ import com.google.common.collect.ImmutableList;
8
+ import com.google.inject.Inject;
9
+ import com.google.inject.Injector;
10
+ import com.google.common.base.Throwables;
11
+ import com.google.common.base.Predicates;
12
+ import com.google.common.collect.Iterables;
13
+ import org.embulk.config.Task;
14
+ import org.embulk.config.Config;
15
+ import org.embulk.config.ConfigDefault;
16
+ import org.embulk.config.ConfigSource;
17
+ import org.embulk.config.ConfigException;
18
+ import org.embulk.config.TaskSource;
19
+ import org.embulk.config.ConfigDiff;
20
+ import org.embulk.config.CommitReport;
21
+ import org.embulk.plugin.PluginType;
22
+ import org.embulk.spi.Schema;
23
+ import org.embulk.spi.Exec;
24
+ import org.embulk.spi.ExecSession;
25
+ import org.embulk.spi.ExecAction;
26
+ import org.embulk.spi.ExecutorPlugin;
27
+ import org.embulk.spi.ProcessTask;
28
+ import org.embulk.spi.ProcessState;
29
+ import org.embulk.spi.TaskState;
30
+ import org.embulk.spi.InputPlugin;
31
+ import org.embulk.spi.FilterPlugin;
32
+ import org.embulk.spi.OutputPlugin;
33
+ import org.embulk.spi.util.Filters;
34
+ import org.slf4j.Logger;
35
+
36
+ public class BulkLoader
37
+ {
38
+ private final Injector injector;
39
+
40
+ public interface BulkLoaderTask
41
+ extends Task
42
+ {
43
+ @Config("exec")
44
+ @ConfigDefault("{}")
45
+ public ConfigSource getExecConfig();
46
+
47
+ @Config("in")
48
+ public ConfigSource getInputConfig();
49
+
50
+ @Config("filters")
51
+ @ConfigDefault("[]")
52
+ public List<ConfigSource> getFilterConfigs();
53
+
54
+ @Config("out")
55
+ public ConfigSource getOutputConfig();
56
+
57
+ public TaskSource getOutputTask();
58
+ public void setOutputTask(TaskSource taskSource);
59
+ }
60
+
61
+ @Inject
62
+ public BulkLoader(Injector injector,
63
+ @ForSystemConfig ConfigSource systemConfig)
64
+ {
65
+ this.injector = injector;
66
+ }
67
+
68
+ private static class LoaderState
69
+ implements ProcessState
70
+ {
71
+ private final Logger logger;
72
+
73
+ private final ProcessPluginSet plugins;
74
+
75
+ private volatile TaskSource inputTaskSource;
76
+ private volatile TaskSource outputTaskSource;
77
+ private volatile List<TaskSource> filterTaskSources;
78
+ private volatile List<Schema> schemas;
79
+ private volatile Schema executorSchema;
80
+
81
+ private volatile ConfigDiff inputConfigDiff;
82
+ private volatile ConfigDiff outputConfigDiff;
83
+
84
+ private volatile List<TaskState> inputTaskStates;
85
+ private volatile List<TaskState> outputTaskStates;
86
+
87
+ public LoaderState(Logger logger, ProcessPluginSet plugins)
88
+ {
89
+ this.logger = logger;
90
+ this.plugins = plugins;
91
+ }
92
+
93
+ public Logger getLogger()
94
+ {
95
+ return logger;
96
+ }
97
+
98
+ public void setSchemas(List<Schema> schemas)
99
+ {
100
+ this.schemas = schemas;
101
+ }
102
+
103
+ public void setExecutorSchema(Schema executorSchema)
104
+ {
105
+ this.executorSchema = executorSchema;
106
+ }
107
+
108
+ public void setInputTaskSource(TaskSource inputTaskSource)
109
+ {
110
+ this.inputTaskSource = inputTaskSource;
111
+ }
112
+
113
+ public void setOutputTaskSource(TaskSource outputTaskSource)
114
+ {
115
+ this.outputTaskSource = outputTaskSource;
116
+ }
117
+
118
+ public void setFilterTaskSources(List<TaskSource> filterTaskSources)
119
+ {
120
+ this.filterTaskSources = filterTaskSources;
121
+ }
122
+
123
+ public ProcessTask buildProcessTask()
124
+ {
125
+ return new ProcessTask(
126
+ plugins.getInputPluginType(), plugins.getOutputPluginType(), plugins.getFilterPluginTypes(),
127
+ inputTaskSource, outputTaskSource, filterTaskSources,
128
+ schemas, executorSchema, Exec.newTaskSource());
129
+ }
130
+
131
+ @Override
132
+ public void initialize(int inputTaskCount, int outputTaskCount)
133
+ {
134
+ if (inputTaskStates != null || outputTaskStates != null) {
135
+ // initialize is called twice if resume (by restoreResumedCommitReports and ExecutorPlugin.execute)
136
+ if (inputTaskStates.size() != inputTaskCount || outputTaskStates.size() != outputTaskCount) {
137
+ throw new ConfigException(String.format(
138
+ "input task count and output task (%d and %d) must be same with the first execution (%d and %d) whenre resumed",
139
+ inputTaskCount, outputTaskCount, inputTaskStates.size(), outputTaskStates.size()));
140
+ }
141
+ } else {
142
+ ImmutableList.Builder<TaskState> inputTaskStates = ImmutableList.builder();
143
+ ImmutableList.Builder<TaskState> outputTaskStates = ImmutableList.builder();
144
+ for (int i=0; i < inputTaskCount; i++) {
145
+ inputTaskStates.add(new TaskState());
146
+ }
147
+ for (int i=0; i < outputTaskCount; i++) {
148
+ outputTaskStates.add(new TaskState());
149
+ }
150
+ this.inputTaskStates = inputTaskStates.build();
151
+ this.outputTaskStates = outputTaskStates.build();
152
+ }
153
+ }
154
+
155
+ @Override
156
+ public TaskState getInputTaskState(int inputTaskIndex)
157
+ {
158
+ return inputTaskStates.get(inputTaskIndex);
159
+ }
160
+
161
+ @Override
162
+ public TaskState getOutputTaskState(int outputTaskIndex)
163
+ {
164
+ return outputTaskStates.get(outputTaskIndex);
165
+ }
166
+
167
+ public boolean isAllCommitted()
168
+ {
169
+ if (outputTaskStates == null) {
170
+ // not initialized
171
+ return false;
172
+ }
173
+ for (TaskState outputTaskState : outputTaskStates) {
174
+ if (!outputTaskState.isCommitted()) {
175
+ return false;
176
+ }
177
+ }
178
+ return true;
179
+ }
180
+
181
+ public boolean isAnyStarted()
182
+ {
183
+ if (inputTaskStates == null) {
184
+ // not initialized
185
+ return false;
186
+ }
187
+ for (TaskState inputTaskState : inputTaskStates) {
188
+ if (inputTaskState.isStarted()) {
189
+ return true;
190
+ }
191
+ }
192
+ return false;
193
+ }
194
+
195
+ public void setOutputConfigDiff(ConfigDiff outputConfigDiff)
196
+ {
197
+ if (outputConfigDiff == null) {
198
+ outputConfigDiff = Exec.newConfigDiff();
199
+ }
200
+ this.outputConfigDiff = outputConfigDiff;
201
+ }
202
+
203
+ public void setInputConfigDiff(ConfigDiff inputConfigDiff)
204
+ {
205
+ if (inputConfigDiff == null) {
206
+ inputConfigDiff = Exec.newConfigDiff();
207
+ }
208
+ this.inputConfigDiff = inputConfigDiff;
209
+ }
210
+
211
+ private List<Optional<CommitReport>> getInputCommitReports()
212
+ {
213
+ ImmutableList.Builder<Optional<CommitReport>> builder = ImmutableList.builder();
214
+ for (TaskState inputTaskState : inputTaskStates) {
215
+ builder.add(inputTaskState.getCommitReport());
216
+ }
217
+ return builder.build();
218
+ }
219
+
220
+ private List<Optional<CommitReport>> getOutputCommitReports()
221
+ {
222
+ ImmutableList.Builder<Optional<CommitReport>> builder = ImmutableList.builder();
223
+ for (TaskState outputTaskState : outputTaskStates) {
224
+ builder.add(outputTaskState.getCommitReport());
225
+ }
226
+ return builder.build();
227
+ }
228
+
229
+ public List<CommitReport> getAllInputCommitReports()
230
+ {
231
+ ImmutableList.Builder<CommitReport> builder = ImmutableList.builder();
232
+ for (TaskState inputTaskState : inputTaskStates) {
233
+ builder.add(inputTaskState.getCommitReport().get());
234
+ }
235
+ return builder.build();
236
+ }
237
+
238
+ public List<CommitReport> getAllOutputCommitReports()
239
+ {
240
+ ImmutableList.Builder<CommitReport> builder = ImmutableList.builder();
241
+ for (TaskState outputTaskState : outputTaskStates) {
242
+ builder.add(outputTaskState.getCommitReport().get());
243
+ }
244
+ return builder.build();
245
+ }
246
+
247
+ public List<Throwable> getExceptions()
248
+ {
249
+ ImmutableList.Builder<Throwable> builder = ImmutableList.builder();
250
+ if (inputTaskStates != null) { // null if not initialized yet
251
+ for (TaskState inputTaskState : inputTaskStates) {
252
+ Optional<Throwable> exception = inputTaskState.getException();
253
+ if (exception.isPresent()) {
254
+ builder.add(exception.get());
255
+ }
256
+ }
257
+ }
258
+ if (outputTaskStates != null) { // null if not initialized yet
259
+ for (TaskState outputTaskState : outputTaskStates) {
260
+ Optional<Throwable> exception = outputTaskState.getException();
261
+ if (exception.isPresent()) {
262
+ builder.add(exception.get());
263
+ }
264
+ }
265
+ }
266
+ return builder.build();
267
+ }
268
+
269
+ public RuntimeException getRepresentativeException()
270
+ {
271
+ RuntimeException top = null;
272
+ for (Throwable ex : getExceptions()) {
273
+ if (top != null) {
274
+ top.addSuppressed(ex);
275
+ } else {
276
+ if (ex instanceof RuntimeException) {
277
+ top = (RuntimeException) ex;
278
+ } else {
279
+ top = new RuntimeException(ex);
280
+ }
281
+ }
282
+ }
283
+ if (top == null) {
284
+ top = new RuntimeException("Some transactions are not committed");
285
+ }
286
+ return top;
287
+ }
288
+
289
+ public ExecutionResult buildExecuteResult()
290
+ {
291
+ return buildExecuteResultWithWarningException(null);
292
+ }
293
+
294
+ public ExecutionResult buildExecuteResultWithWarningException(Throwable ex)
295
+ {
296
+ ConfigDiff configDiff = Exec.newConfigDiff();
297
+ if (inputConfigDiff != null) {
298
+ configDiff.getNestedOrSetEmpty("in").merge(inputConfigDiff);
299
+ }
300
+ if (outputConfigDiff != null) {
301
+ configDiff.getNestedOrSetEmpty("out").merge(outputConfigDiff);
302
+ }
303
+
304
+ ImmutableList.Builder<Throwable> ignoredExceptions = ImmutableList.builder();
305
+ for (Throwable e : getExceptions()) {
306
+ ignoredExceptions.add(e);
307
+ }
308
+ if (ex != null) {
309
+ ignoredExceptions.add(ex);
310
+ }
311
+
312
+ return new ExecutionResult(configDiff, ignoredExceptions.build());
313
+ }
314
+
315
+ public ResumeState buildResumeState(ExecSession exec)
316
+ {
317
+ return new ResumeState(
318
+ exec.getSessionConfigSource(),
319
+ inputTaskSource, outputTaskSource,
320
+ first(schemas), executorSchema,
321
+ getInputCommitReports(), getOutputCommitReports());
322
+ }
323
+
324
+ public PartialExecutionException buildPartialExecuteException(Throwable cause, ExecSession exec)
325
+ {
326
+ return new PartialExecutionException(cause, buildResumeState(exec));
327
+ }
328
+ }
329
+
330
+ protected ExecutorPlugin newExecutorPlugin(BulkLoaderTask task)
331
+ {
332
+ return Exec.newPlugin(ExecutorPlugin.class,
333
+ task.getExecConfig().get(PluginType.class, "type", new PluginType("local")));
334
+ }
335
+
336
+ protected InputPlugin newInputPlugin(BulkLoaderTask task)
337
+ {
338
+ return Exec.newPlugin(InputPlugin.class, task.getInputConfig().get(PluginType.class, "type"));
339
+ }
340
+
341
+ protected List<FilterPlugin> newFilterPlugins(BulkLoaderTask task)
342
+ {
343
+ return Filters.newFilterPlugins(Exec.session(),
344
+ Filters.getPluginTypes(task.getFilterConfigs()));
345
+ }
346
+
347
+ protected OutputPlugin newOutputPlugin(BulkLoaderTask task)
348
+ {
349
+ return Exec.newPlugin(OutputPlugin.class, task.getOutputConfig().get(PluginType.class, "type"));
350
+ }
351
+
352
+ public ExecutionResult run(ExecSession exec, final ConfigSource config)
353
+ {
354
+ try {
355
+ return Exec.doWith(exec, new ExecAction<ExecutionResult>() {
356
+ public ExecutionResult run()
357
+ {
358
+ try (SetCurrentThreadName dontCare = new SetCurrentThreadName("transaction")) {
359
+ return doRun(config);
360
+ }
361
+ }
362
+ });
363
+ } catch (ExecutionException ex) {
364
+ throw Throwables.propagate(ex.getCause());
365
+ }
366
+ }
367
+
368
+ public ExecutionResult resume(final ConfigSource config, final ResumeState resume)
369
+ {
370
+ try {
371
+ ExecSession exec = new ExecSession(injector, resume.getExecSessionConfigSource());
372
+ return Exec.doWith(exec, new ExecAction<ExecutionResult>() {
373
+ public ExecutionResult run()
374
+ {
375
+ try (SetCurrentThreadName dontCare = new SetCurrentThreadName("resume")) {
376
+ return doResume(config, resume);
377
+ }
378
+ }
379
+ });
380
+ } catch (ExecutionException ex) {
381
+ throw Throwables.propagate(ex.getCause());
382
+ }
383
+ }
384
+
385
+ public void cleanup(final ConfigSource config, final ResumeState resume)
386
+ {
387
+ try {
388
+ ExecSession exec = new ExecSession(injector, resume.getExecSessionConfigSource());
389
+ Exec.doWith(exec, new ExecAction<Void>() {
390
+ public Void run()
391
+ {
392
+ try (SetCurrentThreadName dontCare = new SetCurrentThreadName("cleanup")) {
393
+ doCleanup(config, resume);
394
+ return null;
395
+ }
396
+ }
397
+ });
398
+ } catch (ExecutionException ex) {
399
+ throw Throwables.propagate(ex.getCause());
400
+ }
401
+ }
402
+
403
+ public void doCleanup(ConfigSource config, ResumeState resume)
404
+ {
405
+ BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
406
+ InputPlugin inputPlugin = newInputPlugin(task);
407
+ OutputPlugin outputPlugin = newOutputPlugin(task);
408
+
409
+ ImmutableList.Builder<CommitReport> successfulInputCommitReports = ImmutableList.builder();
410
+ ImmutableList.Builder<CommitReport> successfulOutputCommitReports = ImmutableList.builder();
411
+ for (Optional<CommitReport> inputCommitReport : resume.getInputCommitReports()) {
412
+ if (inputCommitReport.isPresent()) {
413
+ successfulInputCommitReports.add(inputCommitReport.get());
414
+ }
415
+ }
416
+ for (Optional<CommitReport> outputCommitReport : resume.getOutputCommitReports()) {
417
+ if (outputCommitReport.isPresent()) {
418
+ successfulOutputCommitReports.add(outputCommitReport.get());
419
+ }
420
+ }
421
+
422
+ inputPlugin.cleanup(resume.getInputTaskSource(), resume.getInputSchema(),
423
+ resume.getInputCommitReports().size(), successfulInputCommitReports.build());
424
+
425
+ outputPlugin.cleanup(resume.getOutputTaskSource(), resume.getOutputSchema(),
426
+ resume.getOutputCommitReports().size(), successfulOutputCommitReports.build());
427
+ }
428
+
429
+ private static class ProcessPluginSet
430
+ {
431
+ private final PluginType inputPluginType;
432
+ private final PluginType outputPluginType;
433
+ private final List<PluginType> filterPluginTypes;
434
+
435
+ private final InputPlugin inputPlugin;
436
+ private final OutputPlugin outputPlugin;
437
+ private final List<FilterPlugin> filterPlugins;
438
+
439
+ public ProcessPluginSet(BulkLoaderTask task)
440
+ {
441
+ this.inputPluginType = task.getInputConfig().get(PluginType.class, "type");
442
+ this.outputPluginType = task.getOutputConfig().get(PluginType.class, "type");
443
+ this.filterPluginTypes = Filters.getPluginTypes(task.getFilterConfigs());
444
+ this.inputPlugin = Exec.newPlugin(InputPlugin.class, inputPluginType);
445
+ this.outputPlugin = Exec.newPlugin(OutputPlugin.class, outputPluginType);
446
+ this.filterPlugins = Filters.newFilterPlugins(Exec.session(), filterPluginTypes);
447
+ }
448
+
449
+ public PluginType getInputPluginType()
450
+ {
451
+ return inputPluginType;
452
+ }
453
+
454
+ public PluginType getOutputPluginType()
455
+ {
456
+ return outputPluginType;
457
+ }
458
+
459
+ public List<PluginType> getFilterPluginTypes()
460
+ {
461
+ return filterPluginTypes;
462
+ }
463
+
464
+ public InputPlugin getInputPlugin()
465
+ {
466
+ return inputPlugin;
467
+ }
468
+
469
+ public OutputPlugin getOutputPlugin()
470
+ {
471
+ return outputPlugin;
472
+ }
473
+
474
+ public List<FilterPlugin> getFilterPlugins()
475
+ {
476
+ return filterPlugins;
477
+ }
478
+ }
479
+
480
+ private ExecutionResult doRun(ConfigSource config)
481
+ {
482
+ final BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
483
+
484
+ final ExecutorPlugin exec = newExecutorPlugin(task);
485
+ final ProcessPluginSet plugins = new ProcessPluginSet(task);
486
+
487
+ final LoaderState state = new LoaderState(Exec.getLogger(BulkLoader.class), plugins);
488
+ try {
489
+ ConfigDiff inputConfigDiff = plugins.getInputPlugin().transaction(task.getInputConfig(), new InputPlugin.Control() {
490
+ public List<CommitReport> run(final TaskSource inputTask, final Schema inputSchema, final int inputTaskCount)
491
+ {
492
+ state.setInputTaskSource(inputTask);
493
+ Filters.transaction(plugins.getFilterPlugins(), task.getFilterConfigs(), inputSchema, new Filters.Control() {
494
+ public void run(final List<TaskSource> filterTasks, final List<Schema> schemas)
495
+ {
496
+ state.setSchemas(schemas);
497
+ state.setFilterTaskSources(filterTasks);
498
+ exec.transaction(task.getExecConfig(), last(schemas), inputTaskCount, new ExecutorPlugin.Control() {
499
+ public void transaction(final Schema executorSchema, final int outputTaskCount, final ExecutorPlugin.Executor executor)
500
+ {
501
+ state.setExecutorSchema(executorSchema);
502
+ ConfigDiff outputConfigDiff = plugins.getOutputPlugin().transaction(task.getOutputConfig(), executorSchema, outputTaskCount, new OutputPlugin.Control() {
503
+ public List<CommitReport> run(final TaskSource outputTask)
504
+ {
505
+ state.setOutputTaskSource(outputTask);
506
+
507
+ state.initialize(inputTaskCount, outputTaskCount);
508
+
509
+ if (!state.isAllCommitted()) { // inputTaskCount == 0
510
+ execute(task, executor, state);
511
+ }
512
+
513
+ return state.getAllOutputCommitReports();
514
+ }
515
+ });
516
+ state.setOutputConfigDiff(outputConfigDiff);
517
+ }
518
+ });
519
+ }
520
+ });
521
+ return state.getAllInputCommitReports();
522
+ }
523
+ });
524
+ state.setInputConfigDiff(inputConfigDiff);
525
+
526
+ cleanupCommittedTransaction(config, state);
527
+
528
+ return state.buildExecuteResult();
529
+
530
+ } catch (Throwable ex) {
531
+ if (state.isAllCommitted()) {
532
+ // ignore the exception
533
+ return state.buildExecuteResultWithWarningException(ex);
534
+ }
535
+ if (!state.isAnyStarted()) {
536
+ throw ex;
537
+ }
538
+ throw state.buildPartialExecuteException(ex, Exec.session());
539
+ }
540
+ }
541
+
542
+ private ExecutionResult doResume(ConfigSource config, final ResumeState resume)
543
+ {
544
+ final BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
545
+
546
+ final ExecutorPlugin exec = newExecutorPlugin(task);
547
+ final ProcessPluginSet plugins = new ProcessPluginSet(task);
548
+
549
+ final LoaderState state = new LoaderState(Exec.getLogger(BulkLoader.class), plugins);
550
+ try {
551
+ ConfigDiff inputConfigDiff = plugins.getInputPlugin().resume(resume.getInputTaskSource(), resume.getInputSchema(), resume.getInputCommitReports().size(), new InputPlugin.Control() {
552
+ public List<CommitReport> run(final TaskSource inputTask, final Schema inputSchema, final int inputTaskCount)
553
+ {
554
+ // TODO validate inputTask?
555
+ // TODO validate inputSchema
556
+ state.setInputTaskSource(inputTask);
557
+ Filters.transaction(plugins.getFilterPlugins(), task.getFilterConfigs(), inputSchema, new Filters.Control() {
558
+ public void run(final List<TaskSource> filterTasks, final List<Schema> schemas)
559
+ {
560
+ state.setSchemas(schemas);
561
+ state.setFilterTaskSources(filterTasks);
562
+ exec.transaction(task.getExecConfig(), last(schemas), inputTaskCount, new ExecutorPlugin.Control() {
563
+ public void transaction(final Schema executorSchema, final int outputTaskCount, final ExecutorPlugin.Executor executor)
564
+ {
565
+ // TODO validate executorSchema
566
+ state.setExecutorSchema(executorSchema);
567
+ ConfigDiff outputConfigDiff = plugins.getOutputPlugin().resume(resume.getOutputTaskSource(), executorSchema, outputTaskCount, new OutputPlugin.Control() {
568
+ public List<CommitReport> run(final TaskSource outputTask)
569
+ {
570
+ // TODO validate outputTask?
571
+ state.setOutputTaskSource(outputTask);
572
+
573
+ restoreResumedCommitReports(resume, state);
574
+ if (!state.isAllCommitted()) {
575
+ execute(task, executor, state);
576
+ }
577
+
578
+ return state.getAllOutputCommitReports();
579
+ }
580
+ });
581
+ state.setOutputConfigDiff(outputConfigDiff);
582
+ }
583
+ });
584
+ }
585
+ });
586
+ return state.getAllInputCommitReports();
587
+ }
588
+ });
589
+ state.setInputConfigDiff(inputConfigDiff);
590
+
591
+ cleanupCommittedTransaction(config, state);
592
+
593
+ return state.buildExecuteResult();
594
+
595
+ } catch (Throwable ex) {
596
+ if (state.isAllCommitted()) {
597
+ // ignore the exception
598
+ return state.buildExecuteResultWithWarningException(ex);
599
+ }
600
+ if (!state.isAnyStarted()) {
601
+ throw ex;
602
+ }
603
+ throw state.buildPartialExecuteException(ex, Exec.session());
604
+ }
605
+ }
606
+
607
+ private static void restoreResumedCommitReports(ResumeState resume, LoaderState state)
608
+ {
609
+ int inputTaskCount = resume.getInputCommitReports().size();
610
+ int outputTaskCount = resume.getOutputCommitReports().size();
611
+
612
+ state.initialize(inputTaskCount, outputTaskCount);
613
+
614
+ for (int i=0; i < inputTaskCount; i++) {
615
+ Optional<CommitReport> report = resume.getInputCommitReports().get(i);
616
+ if (report.isPresent()) {
617
+ TaskState task = state.getInputTaskState(i);
618
+ task.start();
619
+ task.setCommitReport(report.get());
620
+ task.finish();
621
+ }
622
+ }
623
+
624
+ for (int i=0; i < outputTaskCount; i++) {
625
+ Optional<CommitReport> report = resume.getOutputCommitReports().get(i);
626
+ if (report.isPresent()) {
627
+ TaskState task = state.getOutputTaskState(i);
628
+ task.start();
629
+ task.setCommitReport(report.get());
630
+ task.finish();
631
+ }
632
+ }
633
+ }
634
+
635
+ private void execute(BulkLoaderTask task, ExecutorPlugin.Executor executor, LoaderState state)
636
+ {
637
+ ProcessTask procTask = state.buildProcessTask();
638
+
639
+ executor.execute(procTask, state);
640
+
641
+ if (!state.isAllCommitted()) {
642
+ throw state.getRepresentativeException();
643
+ }
644
+ }
645
+
646
+ private void cleanupCommittedTransaction(ConfigSource config, LoaderState state)
647
+ {
648
+ try {
649
+ doCleanup(config, state.buildResumeState(Exec.session()));
650
+ } catch (Exception ex) {
651
+ state.getLogger().warn("Commit succeeded but cleanup failed. Ignoring this exception.", ex); // TODO
652
+ }
653
+ }
654
+
655
+ private static Schema first(List<Schema> schemas)
656
+ {
657
+ return schemas.get(0);
658
+ }
659
+
660
+ private static Schema last(List<Schema> schemas)
661
+ {
662
+ return schemas.get(schemas.size() - 1);
663
+ }
664
+ }