embulk 0.5.5 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -2
  3. data/build.gradle +1 -1
  4. data/embulk-core/src/main/java/org/embulk/command/Runner.java +7 -7
  5. data/embulk-core/src/main/java/org/embulk/exec/BulkLoader.java +664 -0
  6. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +5 -0
  7. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +130 -0
  8. data/embulk-core/src/main/java/org/embulk/exec/LocalThreadExecutor.java +34 -0
  9. data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +3 -3
  10. data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +1 -1
  11. data/embulk-core/src/main/java/org/embulk/exec/ResumeState.java +7 -6
  12. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +3 -0
  13. data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +35 -3
  14. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +4 -1
  15. data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +1 -1
  16. data/embulk-core/src/main/java/org/embulk/spi/ExecutorPlugin.java +19 -0
  17. data/embulk-core/src/main/java/org/embulk/spi/Page.java +6 -0
  18. data/embulk-core/src/main/java/org/embulk/spi/PluginClassLoader.java +73 -1
  19. data/embulk-core/src/main/java/org/embulk/spi/ProcessState.java +10 -0
  20. data/embulk-core/src/main/java/org/embulk/spi/ProcessTask.java +118 -0
  21. data/embulk-core/src/main/java/org/embulk/spi/TaskState.java +70 -0
  22. data/embulk-core/src/main/java/org/embulk/spi/util/Executors.java +92 -0
  23. data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +17 -3
  24. data/embulk-core/src/test/java/org/embulk/spi/TestBuffer.java +24 -0
  25. data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst +1 -1
  26. data/embulk-docs/src/release.rst +1 -0
  27. data/embulk-docs/src/release/release-0.6.0.rst +34 -0
  28. data/lib/embulk/executor_plugin.rb +23 -0
  29. data/lib/embulk/java_plugin.rb +5 -0
  30. data/lib/embulk/plugin.rb +13 -2
  31. data/lib/embulk/version.rb +1 -1
  32. metadata +15 -5
  33. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +0 -660
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f78ef9df69c9d408b6a7df450706a54cb646c596
4
- data.tar.gz: 79a55069daa0ec3f952fd3e6d66f9536b9b6eac5
3
+ metadata.gz: 346027a3a74803953c01e36cd76e41f84aba43db
4
+ data.tar.gz: 03b5af51c648d4ef5b7f8b358bc0d8539986dbd3
5
5
  SHA512:
6
- metadata.gz: 51c83cefab8712f70e350ccc446ecab9c11779447c94ddc5c7b4c10597e1c41fe9d6853e7c7b3ff0f7d132082a604901cf7344b0382711c42d101b20f210a359
7
- data.tar.gz: 17d4ab5552e98c6bc0fa3368cbf22ab87814e035dbae5d8b6e6c91aa6778f9d89ab2a666935b382e0dd0e7252a68460c10ea54ac9d18bf4664dfc4b00d5cf84e
6
+ metadata.gz: 622006bd7fc66fa5e3552654e6a30ee546ecae71058c64ec7410444d10e2507dee8e4e21953fadf39a87bc1566cc0c4ed22d40584da5796f0409d676a7958684
7
+ data.tar.gz: f26fcdb9e686007d0eca61c3fbe1d507c0f20720ff62462c648a8ddd5b85043478a4686d2adeeddd1dc20c4b8b2b041f2a437b8a63779ad8b6c9adbc56207dcb
data/README.md CHANGED
@@ -30,7 +30,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
30
30
  Following 4 commands install embulk to your home directory:
31
31
 
32
32
  ```
33
- curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.5.jar
33
+ curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.6.0.jar
34
34
  chmod +x ~/.embulk/bin/embulk
35
35
  echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
36
36
  source ~/.bashrc
@@ -45,7 +45,7 @@ Embulk is a Java application. Please make sure that you installed [Java](http://
45
45
  You can assume the jar file is a .bat file.
46
46
 
47
47
  ```
48
- PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.5.5.jar -OutFile embulk.bat}"
48
+ PowerShell -Command "& {Invoke-WebRequest https://bintray.com/artifact/download/embulk/maven/embulk-0.6.0.jar -OutFile embulk.bat}"
49
49
  ```
50
50
 
51
51
  Next step: [Trying examples](#trying-examples)
data/build.gradle CHANGED
@@ -12,7 +12,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
12
12
 
13
13
  allprojects {
14
14
  group = 'org.embulk'
15
- version = '0.5.5'
15
+ version = '0.6.0'
16
16
 
17
17
  apply plugin: 'java'
18
18
  apply plugin: 'maven-publish'
@@ -21,7 +21,7 @@ import org.embulk.config.ConfigDiff;
21
21
  import org.embulk.config.ModelManager;
22
22
  import org.embulk.config.ConfigException;
23
23
  import org.embulk.plugin.PluginType;
24
- import org.embulk.exec.LocalExecutor;
24
+ import org.embulk.exec.BulkLoader;
25
25
  import org.embulk.exec.ExecutionResult;
26
26
  import org.embulk.exec.GuessExecutor;
27
27
  import org.embulk.exec.PreviewExecutor;
@@ -129,20 +129,20 @@ public class Runner
129
129
  }
130
130
 
131
131
  ExecSession exec = newExecSession(config);
132
- LocalExecutor local = injector.getInstance(LocalExecutor.class);
132
+ BulkLoader loader = injector.getInstance(BulkLoader.class);
133
133
  ExecutionResult result;
134
134
  try {
135
135
  if (resume != null) {
136
- result = local.resume(config, resume);
136
+ result = loader.resume(config, resume);
137
137
  } else {
138
- result = local.run(exec, config);
138
+ result = loader.run(exec, config);
139
139
  }
140
140
  } catch (PartialExecutionException partial) {
141
141
  if (options.getResumeStatePath() == null) {
142
142
  // resume state path is not set. cleanup the transaction
143
143
  exec.getLogger(Runner.class).info("Transaction partially failed. Cleaning up the intermediate data. Use -r option to make it resumable.");
144
144
  try {
145
- local.cleanup(config, partial.getResumeState());
145
+ loader.cleanup(config, partial.getResumeState());
146
146
  } catch (Throwable ex) {
147
147
  partial.addSuppressed(ex);
148
148
  }
@@ -178,8 +178,8 @@ public class Runner
178
178
  ResumeState resume = resumeConfig.loadConfig(ResumeState.class);
179
179
 
180
180
  //ExecSession exec = newExecSession(config); // not necessary
181
- LocalExecutor local = injector.getInstance(LocalExecutor.class);
182
- local.cleanup(config, resume);
181
+ BulkLoader loader = injector.getInstance(BulkLoader.class);
182
+ loader.cleanup(config, resume);
183
183
 
184
184
  // delete resume file
185
185
  boolean dontCare = new File(options.getResumeStatePath()).delete();
@@ -0,0 +1,664 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.List;
4
+ import java.util.Arrays;
5
+ import java.util.concurrent.ExecutionException;
6
+ import com.google.common.base.Optional;
7
+ import com.google.common.collect.ImmutableList;
8
+ import com.google.inject.Inject;
9
+ import com.google.inject.Injector;
10
+ import com.google.common.base.Throwables;
11
+ import com.google.common.base.Predicates;
12
+ import com.google.common.collect.Iterables;
13
+ import org.embulk.config.Task;
14
+ import org.embulk.config.Config;
15
+ import org.embulk.config.ConfigDefault;
16
+ import org.embulk.config.ConfigSource;
17
+ import org.embulk.config.ConfigException;
18
+ import org.embulk.config.TaskSource;
19
+ import org.embulk.config.ConfigDiff;
20
+ import org.embulk.config.CommitReport;
21
+ import org.embulk.plugin.PluginType;
22
+ import org.embulk.spi.Schema;
23
+ import org.embulk.spi.Exec;
24
+ import org.embulk.spi.ExecSession;
25
+ import org.embulk.spi.ExecAction;
26
+ import org.embulk.spi.ExecutorPlugin;
27
+ import org.embulk.spi.ProcessTask;
28
+ import org.embulk.spi.ProcessState;
29
+ import org.embulk.spi.TaskState;
30
+ import org.embulk.spi.InputPlugin;
31
+ import org.embulk.spi.FilterPlugin;
32
+ import org.embulk.spi.OutputPlugin;
33
+ import org.embulk.spi.util.Filters;
34
+ import org.slf4j.Logger;
35
+
36
+ public class BulkLoader
37
+ {
38
+ private final Injector injector;
39
+
40
+ public interface BulkLoaderTask
41
+ extends Task
42
+ {
43
+ @Config("exec")
44
+ @ConfigDefault("{}")
45
+ public ConfigSource getExecConfig();
46
+
47
+ @Config("in")
48
+ public ConfigSource getInputConfig();
49
+
50
+ @Config("filters")
51
+ @ConfigDefault("[]")
52
+ public List<ConfigSource> getFilterConfigs();
53
+
54
+ @Config("out")
55
+ public ConfigSource getOutputConfig();
56
+
57
+ public TaskSource getOutputTask();
58
+ public void setOutputTask(TaskSource taskSource);
59
+ }
60
+
61
+ @Inject
62
+ public BulkLoader(Injector injector,
63
+ @ForSystemConfig ConfigSource systemConfig)
64
+ {
65
+ this.injector = injector;
66
+ }
67
+
68
+ private static class LoaderState
69
+ implements ProcessState
70
+ {
71
+ private final Logger logger;
72
+
73
+ private final ProcessPluginSet plugins;
74
+
75
+ private volatile TaskSource inputTaskSource;
76
+ private volatile TaskSource outputTaskSource;
77
+ private volatile List<TaskSource> filterTaskSources;
78
+ private volatile List<Schema> schemas;
79
+ private volatile Schema executorSchema;
80
+
81
+ private volatile ConfigDiff inputConfigDiff;
82
+ private volatile ConfigDiff outputConfigDiff;
83
+
84
+ private volatile List<TaskState> inputTaskStates;
85
+ private volatile List<TaskState> outputTaskStates;
86
+
87
+ public LoaderState(Logger logger, ProcessPluginSet plugins)
88
+ {
89
+ this.logger = logger;
90
+ this.plugins = plugins;
91
+ }
92
+
93
+ public Logger getLogger()
94
+ {
95
+ return logger;
96
+ }
97
+
98
+ public void setSchemas(List<Schema> schemas)
99
+ {
100
+ this.schemas = schemas;
101
+ }
102
+
103
+ public void setExecutorSchema(Schema executorSchema)
104
+ {
105
+ this.executorSchema = executorSchema;
106
+ }
107
+
108
+ public void setInputTaskSource(TaskSource inputTaskSource)
109
+ {
110
+ this.inputTaskSource = inputTaskSource;
111
+ }
112
+
113
+ public void setOutputTaskSource(TaskSource outputTaskSource)
114
+ {
115
+ this.outputTaskSource = outputTaskSource;
116
+ }
117
+
118
+ public void setFilterTaskSources(List<TaskSource> filterTaskSources)
119
+ {
120
+ this.filterTaskSources = filterTaskSources;
121
+ }
122
+
123
+ public ProcessTask buildProcessTask()
124
+ {
125
+ return new ProcessTask(
126
+ plugins.getInputPluginType(), plugins.getOutputPluginType(), plugins.getFilterPluginTypes(),
127
+ inputTaskSource, outputTaskSource, filterTaskSources,
128
+ schemas, executorSchema, Exec.newTaskSource());
129
+ }
130
+
131
+ @Override
132
+ public void initialize(int inputTaskCount, int outputTaskCount)
133
+ {
134
+ if (inputTaskStates != null || outputTaskStates != null) {
135
+ // initialize is called twice if resume (by restoreResumedCommitReports and ExecutorPlugin.execute)
136
+ if (inputTaskStates.size() != inputTaskCount || outputTaskStates.size() != outputTaskCount) {
137
+ throw new ConfigException(String.format(
138
+ "input task count and output task (%d and %d) must be same with the first execution (%d and %d) whenre resumed",
139
+ inputTaskCount, outputTaskCount, inputTaskStates.size(), outputTaskStates.size()));
140
+ }
141
+ } else {
142
+ ImmutableList.Builder<TaskState> inputTaskStates = ImmutableList.builder();
143
+ ImmutableList.Builder<TaskState> outputTaskStates = ImmutableList.builder();
144
+ for (int i=0; i < inputTaskCount; i++) {
145
+ inputTaskStates.add(new TaskState());
146
+ }
147
+ for (int i=0; i < outputTaskCount; i++) {
148
+ outputTaskStates.add(new TaskState());
149
+ }
150
+ this.inputTaskStates = inputTaskStates.build();
151
+ this.outputTaskStates = outputTaskStates.build();
152
+ }
153
+ }
154
+
155
+ @Override
156
+ public TaskState getInputTaskState(int inputTaskIndex)
157
+ {
158
+ return inputTaskStates.get(inputTaskIndex);
159
+ }
160
+
161
+ @Override
162
+ public TaskState getOutputTaskState(int outputTaskIndex)
163
+ {
164
+ return outputTaskStates.get(outputTaskIndex);
165
+ }
166
+
167
+ public boolean isAllCommitted()
168
+ {
169
+ if (outputTaskStates == null) {
170
+ // not initialized
171
+ return false;
172
+ }
173
+ for (TaskState outputTaskState : outputTaskStates) {
174
+ if (!outputTaskState.isCommitted()) {
175
+ return false;
176
+ }
177
+ }
178
+ return true;
179
+ }
180
+
181
+ public boolean isAnyStarted()
182
+ {
183
+ if (inputTaskStates == null) {
184
+ // not initialized
185
+ return false;
186
+ }
187
+ for (TaskState inputTaskState : inputTaskStates) {
188
+ if (inputTaskState.isStarted()) {
189
+ return true;
190
+ }
191
+ }
192
+ return false;
193
+ }
194
+
195
+ public void setOutputConfigDiff(ConfigDiff outputConfigDiff)
196
+ {
197
+ if (outputConfigDiff == null) {
198
+ outputConfigDiff = Exec.newConfigDiff();
199
+ }
200
+ this.outputConfigDiff = outputConfigDiff;
201
+ }
202
+
203
+ public void setInputConfigDiff(ConfigDiff inputConfigDiff)
204
+ {
205
+ if (inputConfigDiff == null) {
206
+ inputConfigDiff = Exec.newConfigDiff();
207
+ }
208
+ this.inputConfigDiff = inputConfigDiff;
209
+ }
210
+
211
+ private List<Optional<CommitReport>> getInputCommitReports()
212
+ {
213
+ ImmutableList.Builder<Optional<CommitReport>> builder = ImmutableList.builder();
214
+ for (TaskState inputTaskState : inputTaskStates) {
215
+ builder.add(inputTaskState.getCommitReport());
216
+ }
217
+ return builder.build();
218
+ }
219
+
220
+ private List<Optional<CommitReport>> getOutputCommitReports()
221
+ {
222
+ ImmutableList.Builder<Optional<CommitReport>> builder = ImmutableList.builder();
223
+ for (TaskState outputTaskState : outputTaskStates) {
224
+ builder.add(outputTaskState.getCommitReport());
225
+ }
226
+ return builder.build();
227
+ }
228
+
229
+ public List<CommitReport> getAllInputCommitReports()
230
+ {
231
+ ImmutableList.Builder<CommitReport> builder = ImmutableList.builder();
232
+ for (TaskState inputTaskState : inputTaskStates) {
233
+ builder.add(inputTaskState.getCommitReport().get());
234
+ }
235
+ return builder.build();
236
+ }
237
+
238
+ public List<CommitReport> getAllOutputCommitReports()
239
+ {
240
+ ImmutableList.Builder<CommitReport> builder = ImmutableList.builder();
241
+ for (TaskState outputTaskState : outputTaskStates) {
242
+ builder.add(outputTaskState.getCommitReport().get());
243
+ }
244
+ return builder.build();
245
+ }
246
+
247
+ public List<Throwable> getExceptions()
248
+ {
249
+ ImmutableList.Builder<Throwable> builder = ImmutableList.builder();
250
+ if (inputTaskStates != null) { // null if not initialized yet
251
+ for (TaskState inputTaskState : inputTaskStates) {
252
+ Optional<Throwable> exception = inputTaskState.getException();
253
+ if (exception.isPresent()) {
254
+ builder.add(exception.get());
255
+ }
256
+ }
257
+ }
258
+ if (outputTaskStates != null) { // null if not initialized yet
259
+ for (TaskState outputTaskState : outputTaskStates) {
260
+ Optional<Throwable> exception = outputTaskState.getException();
261
+ if (exception.isPresent()) {
262
+ builder.add(exception.get());
263
+ }
264
+ }
265
+ }
266
+ return builder.build();
267
+ }
268
+
269
+ public RuntimeException getRepresentativeException()
270
+ {
271
+ RuntimeException top = null;
272
+ for (Throwable ex : getExceptions()) {
273
+ if (top != null) {
274
+ top.addSuppressed(ex);
275
+ } else {
276
+ if (ex instanceof RuntimeException) {
277
+ top = (RuntimeException) ex;
278
+ } else {
279
+ top = new RuntimeException(ex);
280
+ }
281
+ }
282
+ }
283
+ if (top == null) {
284
+ top = new RuntimeException("Some transactions are not committed");
285
+ }
286
+ return top;
287
+ }
288
+
289
+ public ExecutionResult buildExecuteResult()
290
+ {
291
+ return buildExecuteResultWithWarningException(null);
292
+ }
293
+
294
+ public ExecutionResult buildExecuteResultWithWarningException(Throwable ex)
295
+ {
296
+ ConfigDiff configDiff = Exec.newConfigDiff();
297
+ if (inputConfigDiff != null) {
298
+ configDiff.getNestedOrSetEmpty("in").merge(inputConfigDiff);
299
+ }
300
+ if (outputConfigDiff != null) {
301
+ configDiff.getNestedOrSetEmpty("out").merge(outputConfigDiff);
302
+ }
303
+
304
+ ImmutableList.Builder<Throwable> ignoredExceptions = ImmutableList.builder();
305
+ for (Throwable e : getExceptions()) {
306
+ ignoredExceptions.add(e);
307
+ }
308
+ if (ex != null) {
309
+ ignoredExceptions.add(ex);
310
+ }
311
+
312
+ return new ExecutionResult(configDiff, ignoredExceptions.build());
313
+ }
314
+
315
+ public ResumeState buildResumeState(ExecSession exec)
316
+ {
317
+ return new ResumeState(
318
+ exec.getSessionConfigSource(),
319
+ inputTaskSource, outputTaskSource,
320
+ first(schemas), executorSchema,
321
+ getInputCommitReports(), getOutputCommitReports());
322
+ }
323
+
324
+ public PartialExecutionException buildPartialExecuteException(Throwable cause, ExecSession exec)
325
+ {
326
+ return new PartialExecutionException(cause, buildResumeState(exec));
327
+ }
328
+ }
329
+
330
+ protected ExecutorPlugin newExecutorPlugin(BulkLoaderTask task)
331
+ {
332
+ return Exec.newPlugin(ExecutorPlugin.class,
333
+ task.getExecConfig().get(PluginType.class, "type", new PluginType("local")));
334
+ }
335
+
336
+ protected InputPlugin newInputPlugin(BulkLoaderTask task)
337
+ {
338
+ return Exec.newPlugin(InputPlugin.class, task.getInputConfig().get(PluginType.class, "type"));
339
+ }
340
+
341
+ protected List<FilterPlugin> newFilterPlugins(BulkLoaderTask task)
342
+ {
343
+ return Filters.newFilterPlugins(Exec.session(),
344
+ Filters.getPluginTypes(task.getFilterConfigs()));
345
+ }
346
+
347
+ protected OutputPlugin newOutputPlugin(BulkLoaderTask task)
348
+ {
349
+ return Exec.newPlugin(OutputPlugin.class, task.getOutputConfig().get(PluginType.class, "type"));
350
+ }
351
+
352
+ public ExecutionResult run(ExecSession exec, final ConfigSource config)
353
+ {
354
+ try {
355
+ return Exec.doWith(exec, new ExecAction<ExecutionResult>() {
356
+ public ExecutionResult run()
357
+ {
358
+ try (SetCurrentThreadName dontCare = new SetCurrentThreadName("transaction")) {
359
+ return doRun(config);
360
+ }
361
+ }
362
+ });
363
+ } catch (ExecutionException ex) {
364
+ throw Throwables.propagate(ex.getCause());
365
+ }
366
+ }
367
+
368
+ public ExecutionResult resume(final ConfigSource config, final ResumeState resume)
369
+ {
370
+ try {
371
+ ExecSession exec = new ExecSession(injector, resume.getExecSessionConfigSource());
372
+ return Exec.doWith(exec, new ExecAction<ExecutionResult>() {
373
+ public ExecutionResult run()
374
+ {
375
+ try (SetCurrentThreadName dontCare = new SetCurrentThreadName("resume")) {
376
+ return doResume(config, resume);
377
+ }
378
+ }
379
+ });
380
+ } catch (ExecutionException ex) {
381
+ throw Throwables.propagate(ex.getCause());
382
+ }
383
+ }
384
+
385
+ public void cleanup(final ConfigSource config, final ResumeState resume)
386
+ {
387
+ try {
388
+ ExecSession exec = new ExecSession(injector, resume.getExecSessionConfigSource());
389
+ Exec.doWith(exec, new ExecAction<Void>() {
390
+ public Void run()
391
+ {
392
+ try (SetCurrentThreadName dontCare = new SetCurrentThreadName("cleanup")) {
393
+ doCleanup(config, resume);
394
+ return null;
395
+ }
396
+ }
397
+ });
398
+ } catch (ExecutionException ex) {
399
+ throw Throwables.propagate(ex.getCause());
400
+ }
401
+ }
402
+
403
+ public void doCleanup(ConfigSource config, ResumeState resume)
404
+ {
405
+ BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
406
+ InputPlugin inputPlugin = newInputPlugin(task);
407
+ OutputPlugin outputPlugin = newOutputPlugin(task);
408
+
409
+ ImmutableList.Builder<CommitReport> successfulInputCommitReports = ImmutableList.builder();
410
+ ImmutableList.Builder<CommitReport> successfulOutputCommitReports = ImmutableList.builder();
411
+ for (Optional<CommitReport> inputCommitReport : resume.getInputCommitReports()) {
412
+ if (inputCommitReport.isPresent()) {
413
+ successfulInputCommitReports.add(inputCommitReport.get());
414
+ }
415
+ }
416
+ for (Optional<CommitReport> outputCommitReport : resume.getOutputCommitReports()) {
417
+ if (outputCommitReport.isPresent()) {
418
+ successfulOutputCommitReports.add(outputCommitReport.get());
419
+ }
420
+ }
421
+
422
+ inputPlugin.cleanup(resume.getInputTaskSource(), resume.getInputSchema(),
423
+ resume.getInputCommitReports().size(), successfulInputCommitReports.build());
424
+
425
+ outputPlugin.cleanup(resume.getOutputTaskSource(), resume.getOutputSchema(),
426
+ resume.getOutputCommitReports().size(), successfulOutputCommitReports.build());
427
+ }
428
+
429
+ private static class ProcessPluginSet
430
+ {
431
+ private final PluginType inputPluginType;
432
+ private final PluginType outputPluginType;
433
+ private final List<PluginType> filterPluginTypes;
434
+
435
+ private final InputPlugin inputPlugin;
436
+ private final OutputPlugin outputPlugin;
437
+ private final List<FilterPlugin> filterPlugins;
438
+
439
+ public ProcessPluginSet(BulkLoaderTask task)
440
+ {
441
+ this.inputPluginType = task.getInputConfig().get(PluginType.class, "type");
442
+ this.outputPluginType = task.getOutputConfig().get(PluginType.class, "type");
443
+ this.filterPluginTypes = Filters.getPluginTypes(task.getFilterConfigs());
444
+ this.inputPlugin = Exec.newPlugin(InputPlugin.class, inputPluginType);
445
+ this.outputPlugin = Exec.newPlugin(OutputPlugin.class, outputPluginType);
446
+ this.filterPlugins = Filters.newFilterPlugins(Exec.session(), filterPluginTypes);
447
+ }
448
+
449
+ public PluginType getInputPluginType()
450
+ {
451
+ return inputPluginType;
452
+ }
453
+
454
+ public PluginType getOutputPluginType()
455
+ {
456
+ return outputPluginType;
457
+ }
458
+
459
+ public List<PluginType> getFilterPluginTypes()
460
+ {
461
+ return filterPluginTypes;
462
+ }
463
+
464
+ public InputPlugin getInputPlugin()
465
+ {
466
+ return inputPlugin;
467
+ }
468
+
469
+ public OutputPlugin getOutputPlugin()
470
+ {
471
+ return outputPlugin;
472
+ }
473
+
474
+ public List<FilterPlugin> getFilterPlugins()
475
+ {
476
+ return filterPlugins;
477
+ }
478
+ }
479
+
480
+ private ExecutionResult doRun(ConfigSource config)
481
+ {
482
+ final BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
483
+
484
+ final ExecutorPlugin exec = newExecutorPlugin(task);
485
+ final ProcessPluginSet plugins = new ProcessPluginSet(task);
486
+
487
+ final LoaderState state = new LoaderState(Exec.getLogger(BulkLoader.class), plugins);
488
+ try {
489
+ ConfigDiff inputConfigDiff = plugins.getInputPlugin().transaction(task.getInputConfig(), new InputPlugin.Control() {
490
+ public List<CommitReport> run(final TaskSource inputTask, final Schema inputSchema, final int inputTaskCount)
491
+ {
492
+ state.setInputTaskSource(inputTask);
493
+ Filters.transaction(plugins.getFilterPlugins(), task.getFilterConfigs(), inputSchema, new Filters.Control() {
494
+ public void run(final List<TaskSource> filterTasks, final List<Schema> schemas)
495
+ {
496
+ state.setSchemas(schemas);
497
+ state.setFilterTaskSources(filterTasks);
498
+ exec.transaction(task.getExecConfig(), last(schemas), inputTaskCount, new ExecutorPlugin.Control() {
499
+ public void transaction(final Schema executorSchema, final int outputTaskCount, final ExecutorPlugin.Executor executor)
500
+ {
501
+ state.setExecutorSchema(executorSchema);
502
+ ConfigDiff outputConfigDiff = plugins.getOutputPlugin().transaction(task.getOutputConfig(), executorSchema, outputTaskCount, new OutputPlugin.Control() {
503
+ public List<CommitReport> run(final TaskSource outputTask)
504
+ {
505
+ state.setOutputTaskSource(outputTask);
506
+
507
+ state.initialize(inputTaskCount, outputTaskCount);
508
+
509
+ if (!state.isAllCommitted()) { // inputTaskCount == 0
510
+ execute(task, executor, state);
511
+ }
512
+
513
+ return state.getAllOutputCommitReports();
514
+ }
515
+ });
516
+ state.setOutputConfigDiff(outputConfigDiff);
517
+ }
518
+ });
519
+ }
520
+ });
521
+ return state.getAllInputCommitReports();
522
+ }
523
+ });
524
+ state.setInputConfigDiff(inputConfigDiff);
525
+
526
+ cleanupCommittedTransaction(config, state);
527
+
528
+ return state.buildExecuteResult();
529
+
530
+ } catch (Throwable ex) {
531
+ if (state.isAllCommitted()) {
532
+ // ignore the exception
533
+ return state.buildExecuteResultWithWarningException(ex);
534
+ }
535
+ if (!state.isAnyStarted()) {
536
+ throw ex;
537
+ }
538
+ throw state.buildPartialExecuteException(ex, Exec.session());
539
+ }
540
+ }
541
+
542
+ private ExecutionResult doResume(ConfigSource config, final ResumeState resume)
543
+ {
544
+ final BulkLoaderTask task = config.loadConfig(BulkLoaderTask.class);
545
+
546
+ final ExecutorPlugin exec = newExecutorPlugin(task);
547
+ final ProcessPluginSet plugins = new ProcessPluginSet(task);
548
+
549
+ final LoaderState state = new LoaderState(Exec.getLogger(BulkLoader.class), plugins);
550
+ try {
551
+ ConfigDiff inputConfigDiff = plugins.getInputPlugin().resume(resume.getInputTaskSource(), resume.getInputSchema(), resume.getInputCommitReports().size(), new InputPlugin.Control() {
552
+ public List<CommitReport> run(final TaskSource inputTask, final Schema inputSchema, final int inputTaskCount)
553
+ {
554
+ // TODO validate inputTask?
555
+ // TODO validate inputSchema
556
+ state.setInputTaskSource(inputTask);
557
+ Filters.transaction(plugins.getFilterPlugins(), task.getFilterConfigs(), inputSchema, new Filters.Control() {
558
+ public void run(final List<TaskSource> filterTasks, final List<Schema> schemas)
559
+ {
560
+ state.setSchemas(schemas);
561
+ state.setFilterTaskSources(filterTasks);
562
+ exec.transaction(task.getExecConfig(), last(schemas), inputTaskCount, new ExecutorPlugin.Control() {
563
+ public void transaction(final Schema executorSchema, final int outputTaskCount, final ExecutorPlugin.Executor executor)
564
+ {
565
+ // TODO validate executorSchema
566
+ state.setExecutorSchema(executorSchema);
567
+ ConfigDiff outputConfigDiff = plugins.getOutputPlugin().resume(resume.getOutputTaskSource(), executorSchema, outputTaskCount, new OutputPlugin.Control() {
568
+ public List<CommitReport> run(final TaskSource outputTask)
569
+ {
570
+ // TODO validate outputTask?
571
+ state.setOutputTaskSource(outputTask);
572
+
573
+ restoreResumedCommitReports(resume, state);
574
+ if (!state.isAllCommitted()) {
575
+ execute(task, executor, state);
576
+ }
577
+
578
+ return state.getAllOutputCommitReports();
579
+ }
580
+ });
581
+ state.setOutputConfigDiff(outputConfigDiff);
582
+ }
583
+ });
584
+ }
585
+ });
586
+ return state.getAllInputCommitReports();
587
+ }
588
+ });
589
+ state.setInputConfigDiff(inputConfigDiff);
590
+
591
+ cleanupCommittedTransaction(config, state);
592
+
593
+ return state.buildExecuteResult();
594
+
595
+ } catch (Throwable ex) {
596
+ if (state.isAllCommitted()) {
597
+ // ignore the exception
598
+ return state.buildExecuteResultWithWarningException(ex);
599
+ }
600
+ if (!state.isAnyStarted()) {
601
+ throw ex;
602
+ }
603
+ throw state.buildPartialExecuteException(ex, Exec.session());
604
+ }
605
+ }
606
+
607
+ private static void restoreResumedCommitReports(ResumeState resume, LoaderState state)
608
+ {
609
+ int inputTaskCount = resume.getInputCommitReports().size();
610
+ int outputTaskCount = resume.getOutputCommitReports().size();
611
+
612
+ state.initialize(inputTaskCount, outputTaskCount);
613
+
614
+ for (int i=0; i < inputTaskCount; i++) {
615
+ Optional<CommitReport> report = resume.getInputCommitReports().get(i);
616
+ if (report.isPresent()) {
617
+ TaskState task = state.getInputTaskState(i);
618
+ task.start();
619
+ task.setCommitReport(report.get());
620
+ task.finish();
621
+ }
622
+ }
623
+
624
+ for (int i=0; i < outputTaskCount; i++) {
625
+ Optional<CommitReport> report = resume.getOutputCommitReports().get(i);
626
+ if (report.isPresent()) {
627
+ TaskState task = state.getOutputTaskState(i);
628
+ task.start();
629
+ task.setCommitReport(report.get());
630
+ task.finish();
631
+ }
632
+ }
633
+ }
634
+
635
+ private void execute(BulkLoaderTask task, ExecutorPlugin.Executor executor, LoaderState state)
636
+ {
637
+ ProcessTask procTask = state.buildProcessTask();
638
+
639
+ executor.execute(procTask, state);
640
+
641
+ if (!state.isAllCommitted()) {
642
+ throw state.getRepresentativeException();
643
+ }
644
+ }
645
+
646
+ private void cleanupCommittedTransaction(ConfigSource config, LoaderState state)
647
+ {
648
+ try {
649
+ doCleanup(config, state.buildResumeState(Exec.session()));
650
+ } catch (Exception ex) {
651
+ state.getLogger().warn("Commit succeeded but cleanup failed. Ignoring this exception.", ex); // TODO
652
+ }
653
+ }
654
+
655
+ private static Schema first(List<Schema> schemas)
656
+ {
657
+ return schemas.get(0);
658
+ }
659
+
660
+ private static Schema last(List<Schema> schemas)
661
+ {
662
+ return schemas.get(schemas.size() - 1);
663
+ }
664
+ }