embulk 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +8 -8
  2. data/ChangeLog +12 -0
  3. data/README.md +38 -13
  4. data/build.gradle +6 -1
  5. data/embulk-cli/pom.xml +1 -1
  6. data/embulk-core/pom.xml +1 -1
  7. data/embulk-core/src/main/java/org/embulk/command/Runner.java +87 -8
  8. data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +1 -1
  9. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +16 -3
  10. data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +1 -1
  11. data/embulk-core/src/main/java/org/embulk/exec/ExecutionInterruptedException.java +10 -0
  12. data/embulk-core/src/main/java/org/embulk/exec/ExecutionResult.java +26 -0
  13. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +37 -1
  14. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +461 -110
  15. data/embulk-core/src/main/java/org/embulk/exec/PartialExecutionException.java +18 -0
  16. data/embulk-core/src/main/java/org/embulk/exec/ResumeState.java +82 -0
  17. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +3 -3
  18. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +35 -4
  19. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +14 -3
  20. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +55 -24
  21. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +8 -0
  22. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +57 -24
  23. data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +21 -0
  24. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +14 -3
  25. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +8 -0
  26. data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +87 -0
  27. data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +4 -2
  28. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +16 -0
  29. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +15 -0
  30. data/embulk-standards/pom.xml +1 -1
  31. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +16 -2
  32. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +14 -1
  33. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +14 -1
  34. data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +15 -3
  35. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +15 -1
  36. data/lib/embulk/command/embulk_run.rb +16 -1
  37. data/lib/embulk/data/bundle/embulk/filter_example.rb +42 -0
  38. data/lib/embulk/data/bundle/embulk/input_example.rb +43 -33
  39. data/lib/embulk/data/bundle/embulk/output_example.rb +43 -36
  40. data/lib/embulk/filter_plugin.rb +86 -0
  41. data/lib/embulk/input_plugin.rb +37 -2
  42. data/lib/embulk/java/imports.rb +1 -0
  43. data/lib/embulk/output_plugin.rb +30 -0
  44. data/lib/embulk/plugin.rb +32 -19
  45. data/lib/embulk/schema.rb +16 -9
  46. data/lib/embulk/version.rb +1 -1
  47. data/pom.xml +1 -1
  48. metadata +13 -7
  49. data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +0 -10
  50. data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +0 -19
@@ -1,6 +1,7 @@
1
1
  package org.embulk.exec;
2
2
 
3
3
  import java.util.List;
4
+ import java.util.Arrays;
4
5
  import java.util.ArrayList;
5
6
  import java.util.concurrent.Callable;
6
7
  import java.util.concurrent.Future;
@@ -13,9 +14,12 @@ import com.google.common.collect.ImmutableList;
13
14
  import com.google.inject.Inject;
14
15
  import com.google.inject.Injector;
15
16
  import com.google.common.base.Throwables;
17
+ import com.google.common.base.Predicates;
18
+ import com.google.common.collect.Iterables;
16
19
  import com.google.common.util.concurrent.ThreadFactoryBuilder;
17
20
  import org.embulk.config.Task;
18
21
  import org.embulk.config.Config;
22
+ import org.embulk.config.ConfigDefault;
19
23
  import org.embulk.config.ConfigSource;
20
24
  import org.embulk.config.TaskSource;
21
25
  import org.embulk.config.NextConfig;
@@ -26,8 +30,11 @@ import org.embulk.spi.Exec;
26
30
  import org.embulk.spi.ExecSession;
27
31
  import org.embulk.spi.ExecAction;
28
32
  import org.embulk.spi.InputPlugin;
33
+ import org.embulk.spi.FilterPlugin;
29
34
  import org.embulk.spi.OutputPlugin;
35
+ import org.embulk.spi.PageOutput;
30
36
  import org.embulk.spi.TransactionalPageOutput;
37
+ import org.embulk.spi.util.Filters;
31
38
  import org.slf4j.Logger;
32
39
 
33
40
  public class LocalExecutor
@@ -37,22 +44,25 @@ public class LocalExecutor
37
44
  private final int maxThreads;
38
45
  private final ExecutorService executor;
39
46
 
40
- private Logger log;
41
- private final AtomicInteger runningTaskCount;
42
- private final AtomicInteger completedTaskCount;
43
-
44
47
  public interface ExecutorTask
45
48
  extends Task
46
49
  {
47
50
  @Config("in")
48
51
  public ConfigSource getInputConfig();
49
52
 
53
+ @Config("filters")
54
+ @ConfigDefault("[]")
55
+ public List<ConfigSource> getFilterConfigs();
56
+
50
57
  @Config("out")
51
58
  public ConfigSource getOutputConfig();
52
59
 
53
60
  public TaskSource getInputTask();
54
61
  public void setInputTask(TaskSource taskSource);
55
62
 
63
+ public List<TaskSource> getFilterTasks();
64
+ public void setFilterTasks(List<TaskSource> taskSources);
65
+
56
66
  public TaskSource getOutputTask();
57
67
  public void setOutputTask(TaskSource taskSource);
58
68
  }
@@ -71,70 +81,251 @@ public class LocalExecutor
71
81
  .setNameFormat("embulk-executor-%d")
72
82
  .setDaemon(true)
73
83
  .build());
74
-
75
- this.runningTaskCount = new AtomicInteger(0);
76
- this.completedTaskCount = new AtomicInteger(0);
77
84
  }
78
85
 
79
- private static class ExecuteResultBuilder
86
+ private static class ProcessState
80
87
  {
81
- private NextConfig inputNextConfig;
82
- private NextConfig outputNextConfig;
88
+ private final Logger logger;
89
+ private volatile boolean[] started;
90
+ private volatile boolean[] finished;
91
+ private volatile Schema inputSchema;
92
+ private volatile Schema outputSchema;
93
+ private volatile Throwable[] exceptions;
94
+ private volatile CommitReport[] inputCommitReports;
95
+ private volatile CommitReport[] outputCommitReports;
96
+ private volatile NextConfig inputNextConfig;
97
+ private volatile NextConfig outputNextConfig;
98
+ private int processorCount;
99
+
100
+ public ProcessState(Logger logger)
101
+ {
102
+ this.logger = logger;
103
+ }
83
104
 
84
- public void setInputNextConfig(NextConfig inputNextConfig)
105
+ public Logger getLogger()
85
106
  {
86
- this.inputNextConfig = inputNextConfig;
107
+ return logger;
87
108
  }
88
109
 
89
- public void setOutputNextConfig(NextConfig outputNextConfig)
110
+ public void initialize(int count)
90
111
  {
91
- this.outputNextConfig = outputNextConfig;
112
+ this.started = new boolean[count];
113
+ this.finished = new boolean[count];
114
+ this.exceptions = new Throwable[count];
115
+ this.inputCommitReports = new CommitReport[count];
116
+ this.outputCommitReports = new CommitReport[count];
117
+ this.processorCount = count;
92
118
  }
93
119
 
94
- public NextConfig getInputNextConfig()
120
+ public void setInputSchema(Schema inputSchema)
95
121
  {
96
- return inputNextConfig;
122
+ this.inputSchema = inputSchema;
97
123
  }
98
124
 
99
- public NextConfig getOutputNextConfig()
125
+ public void setOutputSchema(Schema outputSchema)
100
126
  {
101
- return outputNextConfig;
127
+ this.outputSchema = outputSchema;
102
128
  }
103
129
 
104
- public ExecuteResult build()
130
+ public Schema getInputSchema()
105
131
  {
106
- if (inputNextConfig == null) {
107
- inputNextConfig = Exec.newNextConfig();
132
+ return inputSchema;
133
+ }
134
+
135
+ public Schema getOutputSchema()
136
+ {
137
+ return outputSchema;
138
+ }
139
+
140
+ public boolean isAnyStarted()
141
+ {
142
+ if (started == null) {
143
+ return false;
108
144
  }
145
+ for (boolean b : started) {
146
+ if (b) { return true; }
147
+ }
148
+ return false;
149
+ }
150
+
151
+ public void start(int i)
152
+ {
153
+ started[i] = true;
154
+ }
155
+
156
+ public void finish(int i)
157
+ {
158
+ finished[i] = true;
159
+ }
160
+
161
+ public int getProcessorCount()
162
+ {
163
+ return processorCount;
164
+ }
165
+
166
+ public int getStartedCount()
167
+ {
168
+ int count = 0;
169
+ for (int i=0; i < started.length; i++) {
170
+ if (started[i]) { count++; }
171
+ }
172
+ return count;
173
+ }
174
+
175
+ public int getFinishedCount()
176
+ {
177
+ int count = 0;
178
+ for (int i=0; i < finished.length; i++) {
179
+ if (finished[i]) { count++; }
180
+ }
181
+ return count;
182
+ }
183
+
184
+ public void setInputCommitReport(int i, CommitReport inputCommitReport)
185
+ {
186
+ if (inputCommitReport == null) {
187
+ inputCommitReport = Exec.newCommitReport();
188
+ }
189
+ this.inputCommitReports[i] = inputCommitReport;
190
+ }
191
+
192
+ public void setOutputCommitReport(int i, CommitReport outputCommitReport)
193
+ {
194
+ if (outputCommitReport == null) {
195
+ outputCommitReport = Exec.newCommitReport();
196
+ }
197
+ this.outputCommitReports[i] = outputCommitReport;
198
+ }
199
+
200
+ public boolean isOutputCommitted(int i)
201
+ {
202
+ return outputCommitReports[i] != null;
203
+ }
204
+
205
+ public void setException(int i, Throwable exception)
206
+ {
207
+ this.exceptions[i] = exception;
208
+ }
209
+
210
+ public boolean isAllCommitted()
211
+ {
212
+ if (processorCount <= 0) {
213
+ // not initialized
214
+ return false;
215
+ }
216
+ for (int i=0; i < processorCount; i++) {
217
+ if (!isOutputCommitted(i)) {
218
+ return false;
219
+ }
220
+ }
221
+ return true;
222
+ }
223
+
224
+ public boolean isAnyCommitted()
225
+ {
226
+ for (int i=0; i < processorCount; i++) {
227
+ if (isOutputCommitted(i)) {
228
+ return true;
229
+ }
230
+ }
231
+ return false;
232
+ }
233
+
234
+ public void setOutputNextConfig(NextConfig outputNextConfig)
235
+ {
109
236
  if (outputNextConfig == null) {
110
237
  outputNextConfig = Exec.newNextConfig();
111
238
  }
112
- NextConfig nextConfig = Exec.newNextConfig();
113
- nextConfig.getNestedOrSetEmpty("in").merge(inputNextConfig);
114
- nextConfig.getNestedOrSetEmpty("out").merge(outputNextConfig);
115
- return new ExecuteResult(nextConfig);
239
+ this.outputNextConfig = outputNextConfig;
116
240
  }
117
- }
118
241
 
119
- private static class ProcessResult
120
- {
121
- private final CommitReport inputCommitReport;
122
- private final CommitReport outputCommitReport;
242
+ public void setInputNextConfig(NextConfig inputNextConfig)
243
+ {
244
+ if (inputNextConfig == null) {
245
+ inputNextConfig = Exec.newNextConfig();
246
+ }
247
+ this.inputNextConfig = inputNextConfig;
248
+ }
123
249
 
124
- public ProcessResult(CommitReport inputCommitReport, CommitReport outputCommitReport)
250
+ public List<CommitReport> getInputCommitReports()
125
251
  {
126
- this.inputCommitReport = inputCommitReport;
127
- this.outputCommitReport = outputCommitReport;
252
+ return ImmutableList.copyOf(inputCommitReports);
128
253
  }
129
254
 
130
- public CommitReport getInputCommitReport()
255
+ public List<CommitReport> getOutputCommitReports()
131
256
  {
132
- return inputCommitReport;
257
+ return ImmutableList.copyOf(outputCommitReports);
133
258
  }
134
259
 
135
- public CommitReport getOutputCommitReport()
260
+ public RuntimeException getRepresentativeException()
136
261
  {
137
- return outputCommitReport;
262
+ RuntimeException top = null;
263
+ for (Throwable ex : exceptions) {
264
+ if (ex != null) {
265
+ if (top != null) {
266
+ top.addSuppressed(ex);
267
+ } else {
268
+ if (ex instanceof RuntimeException) {
269
+ top = (RuntimeException) ex;
270
+ } else {
271
+ top = new RuntimeException(ex);
272
+ }
273
+ }
274
+ }
275
+ }
276
+ if (top == null) {
277
+ top = new RuntimeException("Some transactions are not committed");
278
+ }
279
+ return top;
280
+ }
281
+
282
+ public int getCommittedUnclosedCount()
283
+ {
284
+ int count = 0;
285
+ for (int i=0; i < exceptions.length; i++) {
286
+ if (exceptions[i] != null && isOutputCommitted(i)) {
287
+ count++;
288
+ }
289
+ }
290
+ return count;
291
+ }
292
+
293
+ public ExecutionResult buildExecuteResult()
294
+ {
295
+ return buildExecuteResultWithWarningException(null);
296
+ }
297
+
298
+ public ExecutionResult buildExecuteResultWithWarningException(Throwable ex)
299
+ {
300
+ NextConfig nextConfig = Exec.newNextConfig();
301
+ if (inputNextConfig != null) {
302
+ nextConfig.getNestedOrSetEmpty("in").merge(inputNextConfig);
303
+ }
304
+ if (outputNextConfig != null) {
305
+ nextConfig.getNestedOrSetEmpty("out").merge(outputNextConfig);
306
+ }
307
+
308
+ ImmutableList.Builder<Throwable> ignoredExceptions = ImmutableList.builder();
309
+ for (Throwable e : exceptions) {
310
+ if (e != null) {
311
+ ignoredExceptions.add(e);
312
+ }
313
+ }
314
+ if (ex != null) {
315
+ ignoredExceptions.add(ex);
316
+ }
317
+
318
+ return new ExecutionResult(nextConfig, ignoredExceptions.build());
319
+ }
320
+
321
+ public PartialExecutionException buildPartialExecuteException(Throwable cause,
322
+ ExecutorTask task, ExecSession exec)
323
+ {
324
+ return new PartialExecutionException(cause, new ResumeState(
325
+ exec.getSessionConfigSource(),
326
+ task.getInputTask(), task.getOutputTask(),
327
+ inputSchema, outputSchema,
328
+ Arrays.asList(inputCommitReports), Arrays.asList(outputCommitReports)));
138
329
  }
139
330
  }
140
331
 
@@ -143,17 +334,21 @@ public class LocalExecutor
143
334
  return Exec.newPlugin(InputPlugin.class, task.getInputConfig().get(PluginType.class, "type"));
144
335
  }
145
336
 
337
+ protected List<FilterPlugin> newFilterPlugins(ExecutorTask task)
338
+ {
339
+ return Filters.newFilterPlugins(Exec.session(), task.getFilterConfigs());
340
+ }
341
+
146
342
  protected OutputPlugin newOutputPlugin(ExecutorTask task)
147
343
  {
148
344
  return Exec.newPlugin(OutputPlugin.class, task.getOutputConfig().get(PluginType.class, "type"));
149
345
  }
150
346
 
151
- public ExecuteResult run(ExecSession exec, final ConfigSource config)
347
+ public ExecutionResult run(ExecSession exec, final ConfigSource config)
152
348
  {
153
- log = exec.getLogger(LocalExecutor.class);
154
349
  try {
155
- return Exec.doWith(exec, new ExecAction<ExecuteResult>() {
156
- public ExecuteResult run()
350
+ return Exec.doWith(exec, new ExecAction<ExecutionResult>() {
351
+ public ExecutionResult run()
157
352
  {
158
353
  return doRun(config);
159
354
  }
@@ -163,114 +358,270 @@ public class LocalExecutor
163
358
  }
164
359
  }
165
360
 
166
- private ExecuteResult doRun(ConfigSource config)
361
+ public ExecutionResult resume(final ConfigSource config, final ResumeState resume)
362
+ {
363
+ try {
364
+ ExecSession exec = new ExecSession(injector, resume.getExecSessionConfigSource());
365
+ return Exec.doWith(exec, new ExecAction<ExecutionResult>() {
366
+ public ExecutionResult run()
367
+ {
368
+ return doResume(config, resume);
369
+ }
370
+ });
371
+ } catch (Exception ex) {
372
+ throw Throwables.propagate(ex);
373
+ }
374
+ }
375
+
376
+ public void cleanup(final ConfigSource config, final ResumeState resume)
377
+ {
378
+ try {
379
+ ExecSession exec = new ExecSession(injector, resume.getExecSessionConfigSource());
380
+ Exec.doWith(exec, new ExecAction<Void>() {
381
+ public Void run()
382
+ {
383
+ doCleanup(config, resume);
384
+ return null;
385
+ }
386
+ });
387
+ } catch (Exception ex) {
388
+ throw Throwables.propagate(ex);
389
+ }
390
+ }
391
+
392
+ public void doCleanup(ConfigSource config, ResumeState resume)
393
+ {
394
+ ExecutorTask task = config.loadConfig(ExecutorTask.class);
395
+ InputPlugin in = newInputPlugin(task);
396
+ OutputPlugin out = newOutputPlugin(task);
397
+
398
+ List<CommitReport> successInputCommitReports = ImmutableList.copyOf(
399
+ Iterables.filter(resume.getInputCommitReports(), Predicates.notNull()));
400
+ List<CommitReport> successOutputCommitReports = ImmutableList.copyOf(
401
+ Iterables.filter(resume.getOutputCommitReports(), Predicates.notNull()));
402
+
403
+ in.cleanup(resume.getInputTaskSource(), resume.getInputSchema(),
404
+ resume.getInputCommitReports().size(), successInputCommitReports);
405
+
406
+ out.cleanup(resume.getOutputTaskSource(), resume.getOutputSchema(),
407
+ resume.getOutputCommitReports().size(), successOutputCommitReports);
408
+ }
409
+
410
+ private ExecutionResult doRun(ConfigSource config)
167
411
  {
168
412
  final ExecutorTask task = config.loadConfig(ExecutorTask.class);
169
413
 
170
414
  final InputPlugin in = newInputPlugin(task);
415
+ final List<FilterPlugin> filterPlugins = newFilterPlugins(task);
171
416
  final OutputPlugin out = newOutputPlugin(task);
172
417
 
173
- final ExecuteResultBuilder execResult = new ExecuteResultBuilder();
174
-
175
- NextConfig inputNextConfig = in.transaction(task.getInputConfig(), new InputPlugin.Control() {
176
- public List<CommitReport> run(final TaskSource inputTask, final Schema schema, final int processorCount)
177
- {
178
- final ImmutableList.Builder<CommitReport> inputCommitReports = ImmutableList.builder();
179
- NextConfig outputNextConfig = out.transaction(task.getOutputConfig(), schema, processorCount, new OutputPlugin.Control() {
180
- public List<CommitReport> run(final TaskSource outputTask)
181
- {
182
- final ImmutableList.Builder<CommitReport> outputCommitReports = ImmutableList.builder();
183
- task.setInputTask(inputTask);
184
- task.setOutputTask(outputTask);
185
-
186
- //log.debug("input: %s", task.getInputTask());
187
- //log.debug("output: %s", task.getOutputTask());
188
-
189
- List<ProcessResult> results = process(task.dump(), schema, processorCount);
190
- for (ProcessResult result : results) {
191
- inputCommitReports.add(result.getInputCommitReport());
192
- outputCommitReports.add(result.getOutputCommitReport());
418
+ final ProcessState state = new ProcessState(Exec.getLogger(LocalExecutor.class));
419
+ try {
420
+ NextConfig inputNextConfig = in.transaction(task.getInputConfig(), new InputPlugin.Control() {
421
+ public List<CommitReport> run(final TaskSource inputTask, final Schema inputSchema, final int processorCount)
422
+ {
423
+ state.initialize(processorCount);
424
+ state.setInputSchema(inputSchema);
425
+ Filters.transaction(filterPlugins, task.getFilterConfigs(), inputSchema, new Filters.Control() {
426
+ public void run(final List<TaskSource> filterTasks, final List<Schema> filterSchemas)
427
+ {
428
+ Schema outputSchema = last(filterSchemas);
429
+ state.setOutputSchema(outputSchema);
430
+ NextConfig outputNextConfig = out.transaction(task.getOutputConfig(), outputSchema, processorCount, new OutputPlugin.Control() {
431
+ public List<CommitReport> run(final TaskSource outputTask)
432
+ {
433
+ task.setInputTask(inputTask);
434
+ task.setFilterTasks(filterTasks);
435
+ task.setOutputTask(outputTask);
436
+
437
+ process(task.dump(), filterSchemas, processorCount, state);
438
+ if (!state.isAllCommitted()) {
439
+ throw state.getRepresentativeException();
440
+ }
441
+ return state.getOutputCommitReports();
442
+ }
443
+ });
444
+ state.setOutputNextConfig(outputNextConfig);
193
445
  }
446
+ });
447
+ return state.getInputCommitReports();
448
+ }
449
+ });
450
+ state.setInputNextConfig(inputNextConfig);
194
451
 
195
- return outputCommitReports.build();
196
- }
197
- });
198
- execResult.setOutputNextConfig(outputNextConfig);
199
- return inputCommitReports.build();
452
+ return state.buildExecuteResult();
453
+
454
+ } catch (Throwable ex) {
455
+ if (state.isAllCommitted()) {
456
+ // ignore the exception
457
+ return state.buildExecuteResultWithWarningException(ex);
200
458
  }
201
- });
202
- execResult.setInputNextConfig(inputNextConfig);
459
+ if (!state.isAnyStarted()) {
460
+ throw ex;
461
+ }
462
+ throw state.buildPartialExecuteException(ex, task, Exec.session());
463
+ }
464
+ }
465
+
466
+ private ExecutionResult doResume(ConfigSource config, final ResumeState resume)
467
+ {
468
+ final ExecutorTask task = config.loadConfig(ExecutorTask.class);
469
+
470
+ final InputPlugin in = newInputPlugin(task);
471
+ final List<FilterPlugin> filterPlugins = newFilterPlugins(task);
472
+ final OutputPlugin out = newOutputPlugin(task);
473
+
474
+ final ProcessState state = new ProcessState(Exec.getLogger(LocalExecutor.class));
475
+ try {
476
+ NextConfig inputNextConfig = in.resume(resume.getInputTaskSource(), resume.getInputSchema(), resume.getInputCommitReports().size(), new InputPlugin.Control() {
477
+ public List<CommitReport> run(final TaskSource inputTask, final Schema inputSchema, final int processorCount)
478
+ {
479
+ // TODO validate inputTask?
480
+ // TODO validate inputSchema
481
+ // TODO validate processorCount
482
+ state.initialize(processorCount);
483
+ Filters.transaction(filterPlugins, task.getFilterConfigs(), inputSchema, new Filters.Control() {
484
+ public void run(final List<TaskSource> filterTasks, final List<Schema> filterSchemas)
485
+ {
486
+ Schema outputSchema = last(filterSchemas);
487
+ state.setOutputSchema(outputSchema);
488
+ NextConfig outputNextConfig = out.resume(resume.getOutputTaskSource(), outputSchema, processorCount, new OutputPlugin.Control() {
489
+ public List<CommitReport> run(final TaskSource outputTask)
490
+ {
491
+ // TODO validate outputTask?
492
+ task.setInputTask(inputTask);
493
+ task.setFilterTasks(filterTasks);
494
+ task.setOutputTask(outputTask);
495
+
496
+ for (int i=0; i < resume.getOutputCommitReports().size(); i++) {
497
+ if (resume.getOutputCommitReports().get(i) != null) {
498
+ state.start(i);
499
+ state.setInputCommitReport(i, resume.getInputCommitReports().get(i));
500
+ state.setOutputCommitReport(i, resume.getOutputCommitReports().get(i));
501
+ state.finish(i);
502
+ }
503
+ }
504
+
505
+ process(task.dump(), filterSchemas, processorCount, state);
506
+ if (!state.isAllCommitted()) {
507
+ throw state.getRepresentativeException();
508
+ }
509
+ return state.getOutputCommitReports();
510
+ }
511
+ });
512
+ state.setOutputNextConfig(outputNextConfig);
513
+ }
514
+ });
515
+ return state.getInputCommitReports();
516
+ }
517
+ });
518
+ state.setInputNextConfig(inputNextConfig);
203
519
 
204
- return execResult.build();
520
+ return state.buildExecuteResult();
521
+
522
+ } catch (Throwable ex) {
523
+ if (state.isAllCommitted()) {
524
+ // ignore the exception
525
+ return state.buildExecuteResultWithWarningException(ex);
526
+ }
527
+ if (!state.isAnyStarted()) {
528
+ throw ex;
529
+ }
530
+ throw state.buildPartialExecuteException(ex, task, Exec.session());
531
+ }
205
532
  }
206
533
 
207
- private List<ProcessResult> process(TaskSource taskSource, Schema schema, int processorCount)
534
+ private void process(TaskSource taskSource, List<Schema> filterSchemas, int processorCount,
535
+ ProcessState state)
208
536
  {
209
- List<Future<ProcessResult>> futures = new ArrayList<>();
210
- List<ProcessResult> joined = new ArrayList<>();
537
+ List<Future<Throwable>> futures = new ArrayList<>(processorCount);
211
538
  try {
212
- log.info("Running {} tasks using {} local threads", processorCount, maxThreads);
213
- showProgress(processorCount);
214
539
  for (int i=0; i < processorCount; i++) {
215
- futures.add(startProcessor(taskSource, schema, i));
540
+ if (state.isOutputCommitted(i)) {
541
+ state.getLogger().warn("Skipped resumed task {}", i);
542
+ futures.add(null); // resumed
543
+ } else {
544
+ futures.add(startProcessor(taskSource, filterSchemas, i, state));
545
+ }
216
546
  }
547
+ showProgress(state);
217
548
 
218
549
  for (int i=0; i < processorCount; i++) {
550
+ if (futures.get(i) == null) {
551
+ continue;
552
+ }
219
553
  try {
220
- joined.add(futures.get(i).get());
221
- showProgress(processorCount);
222
-
554
+ state.setException(i, futures.get(i).get());
223
555
  } catch (ExecutionException ex) {
224
- throw Throwables.propagate(ex.getCause());
556
+ state.setException(i, ex.getCause());
557
+ //Throwables.propagate(ex.getCause());
225
558
  } catch (InterruptedException ex) {
226
- throw new ExecuteInterruptedException(ex);
559
+ state.setException(i, new ExecutionInterruptedException(ex));
227
560
  }
561
+ showProgress(state);
228
562
  }
229
- return joined;
230
563
  } finally {
231
- for (int i=joined.size(); i < futures.size(); i++) {
232
- futures.get(i).cancel(true);
233
- // TODO join?
564
+ for (Future<Throwable> future : futures) {
565
+ if (future != null && !future.isDone()) {
566
+ future.cancel(true);
567
+ // TODO join?
568
+ }
234
569
  }
235
570
  }
236
571
  }
237
572
 
238
- private void showProgress(int total)
573
+ private void showProgress(ProcessState state)
239
574
  {
240
- int running = runningTaskCount.get();
241
- int done = completedTaskCount.get();
242
- log.info(String.format("{done:%3d / %d, running: %d}", done, total, running));
575
+ int total = state.getProcessorCount();
576
+ int finished = state.getFinishedCount();
577
+ int started = state.getStartedCount();
578
+ state.getLogger().info(String.format("{done:%3d / %d, running: %d}", finished, total, started - finished));
243
579
  }
244
580
 
245
- private Future<ProcessResult> startProcessor(final TaskSource taskSource, final Schema schema, final int index)
581
+ private Future<Throwable> startProcessor(final TaskSource taskSource,
582
+ final List<Schema> filterSchemas, final int index,
583
+ final ProcessState state)
246
584
  {
247
- return executor.submit(new Callable<ProcessResult>() {
248
- public ProcessResult call()
585
+ return executor.submit(new Callable<Throwable>() {
586
+ public Throwable call()
249
587
  {
588
+ final ExecutorTask task = taskSource.loadTask(ExecutorTask.class);
589
+ final InputPlugin in = newInputPlugin(task);
590
+ final List<FilterPlugin> filterPlugins = newFilterPlugins(task);
591
+ final OutputPlugin out = newOutputPlugin(task);
592
+
593
+ TransactionalPageOutput tran = out.open(task.getOutputTask(), last(filterSchemas), index);
594
+ PageOutput closeThis = tran;
595
+ state.start(index);
250
596
  try {
251
- runningTaskCount.getAndIncrement();
252
- final ExecutorTask task = taskSource.loadTask(ExecutorTask.class);
253
- final InputPlugin in = newInputPlugin(task);
254
- final OutputPlugin out = newOutputPlugin(task);
255
-
256
- TransactionalPageOutput tran = out.open(task.getOutputTask(), schema, index);
257
- boolean committed = false;
597
+ PageOutput filtered = closeThis = Filters.open(filterPlugins, task.getFilterTasks(), filterSchemas, tran);
598
+ state.setInputCommitReport(index, in.run(task.getInputTask(), first(filterSchemas), index, filtered));
599
+ state.setOutputCommitReport(index, tran.commit()); // TODO check output.finish() is called. wrap or abstract
600
+ return null;
601
+ } finally {
258
602
  try {
259
- CommitReport inReport = in.run(task.getInputTask(), schema, index, tran);
260
- CommitReport outReport = tran.commit(); // TODO check output.finish() is called. wrap or abstract
261
- committed = true;
262
- return new ProcessResult(inReport, outReport);
263
- } finally {
264
- if (!committed) {
265
- tran.abort();
603
+ try {
604
+ if (!state.isOutputCommitted(index)) {
605
+ tran.abort();
606
+ }
607
+ } finally {
608
+ closeThis.close();
266
609
  }
267
- tran.close();
610
+ } finally {
611
+ state.finish(index);
268
612
  }
269
- } finally {
270
- runningTaskCount.getAndDecrement();
271
- completedTaskCount.getAndIncrement();
272
613
  }
273
614
  }
274
615
  });
275
616
  }
617
+
618
+ private static Schema first(List<Schema> filterSchemas)
619
+ {
620
+ return filterSchemas.get(0);
621
+ }
622
+
623
+ private static Schema last(List<Schema> filterSchemas)
624
+ {
625
+ return filterSchemas.get(filterSchemas.size() - 1);
626
+ }
276
627
  }