embulk 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +8 -8
  2. data/ChangeLog +12 -0
  3. data/README.md +38 -13
  4. data/build.gradle +6 -1
  5. data/embulk-cli/pom.xml +1 -1
  6. data/embulk-core/pom.xml +1 -1
  7. data/embulk-core/src/main/java/org/embulk/command/Runner.java +87 -8
  8. data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +1 -1
  9. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +16 -3
  10. data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +1 -1
  11. data/embulk-core/src/main/java/org/embulk/exec/ExecutionInterruptedException.java +10 -0
  12. data/embulk-core/src/main/java/org/embulk/exec/ExecutionResult.java +26 -0
  13. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +37 -1
  14. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +461 -110
  15. data/embulk-core/src/main/java/org/embulk/exec/PartialExecutionException.java +18 -0
  16. data/embulk-core/src/main/java/org/embulk/exec/ResumeState.java +82 -0
  17. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +3 -3
  18. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +35 -4
  19. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +14 -3
  20. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +55 -24
  21. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +8 -0
  22. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +57 -24
  23. data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +21 -0
  24. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +14 -3
  25. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +8 -0
  26. data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +87 -0
  27. data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +4 -2
  28. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +16 -0
  29. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +15 -0
  30. data/embulk-standards/pom.xml +1 -1
  31. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +16 -2
  32. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +14 -1
  33. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +14 -1
  34. data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +15 -3
  35. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +15 -1
  36. data/lib/embulk/command/embulk_run.rb +16 -1
  37. data/lib/embulk/data/bundle/embulk/filter_example.rb +42 -0
  38. data/lib/embulk/data/bundle/embulk/input_example.rb +43 -33
  39. data/lib/embulk/data/bundle/embulk/output_example.rb +43 -36
  40. data/lib/embulk/filter_plugin.rb +86 -0
  41. data/lib/embulk/input_plugin.rb +37 -2
  42. data/lib/embulk/java/imports.rb +1 -0
  43. data/lib/embulk/output_plugin.rb +30 -0
  44. data/lib/embulk/plugin.rb +32 -19
  45. data/lib/embulk/schema.rb +16 -9
  46. data/lib/embulk/version.rb +1 -1
  47. data/pom.xml +1 -1
  48. metadata +13 -7
  49. data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +0 -10
  50. data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +0 -19
@@ -1,6 +1,7 @@
1
1
  package org.embulk.exec;
2
2
 
3
3
  import java.util.List;
4
+ import java.util.Arrays;
4
5
  import java.util.ArrayList;
5
6
  import java.util.concurrent.Callable;
6
7
  import java.util.concurrent.Future;
@@ -13,9 +14,12 @@ import com.google.common.collect.ImmutableList;
13
14
  import com.google.inject.Inject;
14
15
  import com.google.inject.Injector;
15
16
  import com.google.common.base.Throwables;
17
+ import com.google.common.base.Predicates;
18
+ import com.google.common.collect.Iterables;
16
19
  import com.google.common.util.concurrent.ThreadFactoryBuilder;
17
20
  import org.embulk.config.Task;
18
21
  import org.embulk.config.Config;
22
+ import org.embulk.config.ConfigDefault;
19
23
  import org.embulk.config.ConfigSource;
20
24
  import org.embulk.config.TaskSource;
21
25
  import org.embulk.config.NextConfig;
@@ -26,8 +30,11 @@ import org.embulk.spi.Exec;
26
30
  import org.embulk.spi.ExecSession;
27
31
  import org.embulk.spi.ExecAction;
28
32
  import org.embulk.spi.InputPlugin;
33
+ import org.embulk.spi.FilterPlugin;
29
34
  import org.embulk.spi.OutputPlugin;
35
+ import org.embulk.spi.PageOutput;
30
36
  import org.embulk.spi.TransactionalPageOutput;
37
+ import org.embulk.spi.util.Filters;
31
38
  import org.slf4j.Logger;
32
39
 
33
40
  public class LocalExecutor
@@ -37,22 +44,25 @@ public class LocalExecutor
37
44
  private final int maxThreads;
38
45
  private final ExecutorService executor;
39
46
 
40
- private Logger log;
41
- private final AtomicInteger runningTaskCount;
42
- private final AtomicInteger completedTaskCount;
43
-
44
47
  public interface ExecutorTask
45
48
  extends Task
46
49
  {
47
50
  @Config("in")
48
51
  public ConfigSource getInputConfig();
49
52
 
53
+ @Config("filters")
54
+ @ConfigDefault("[]")
55
+ public List<ConfigSource> getFilterConfigs();
56
+
50
57
  @Config("out")
51
58
  public ConfigSource getOutputConfig();
52
59
 
53
60
  public TaskSource getInputTask();
54
61
  public void setInputTask(TaskSource taskSource);
55
62
 
63
+ public List<TaskSource> getFilterTasks();
64
+ public void setFilterTasks(List<TaskSource> taskSources);
65
+
56
66
  public TaskSource getOutputTask();
57
67
  public void setOutputTask(TaskSource taskSource);
58
68
  }
@@ -71,70 +81,251 @@ public class LocalExecutor
71
81
  .setNameFormat("embulk-executor-%d")
72
82
  .setDaemon(true)
73
83
  .build());
74
-
75
- this.runningTaskCount = new AtomicInteger(0);
76
- this.completedTaskCount = new AtomicInteger(0);
77
84
  }
78
85
 
79
- private static class ExecuteResultBuilder
86
+ private static class ProcessState
80
87
  {
81
- private NextConfig inputNextConfig;
82
- private NextConfig outputNextConfig;
88
+ private final Logger logger;
89
+ private volatile boolean[] started;
90
+ private volatile boolean[] finished;
91
+ private volatile Schema inputSchema;
92
+ private volatile Schema outputSchema;
93
+ private volatile Throwable[] exceptions;
94
+ private volatile CommitReport[] inputCommitReports;
95
+ private volatile CommitReport[] outputCommitReports;
96
+ private volatile NextConfig inputNextConfig;
97
+ private volatile NextConfig outputNextConfig;
98
+ private int processorCount;
99
+
100
+ public ProcessState(Logger logger)
101
+ {
102
+ this.logger = logger;
103
+ }
83
104
 
84
- public void setInputNextConfig(NextConfig inputNextConfig)
105
+ public Logger getLogger()
85
106
  {
86
- this.inputNextConfig = inputNextConfig;
107
+ return logger;
87
108
  }
88
109
 
89
- public void setOutputNextConfig(NextConfig outputNextConfig)
110
+ public void initialize(int count)
90
111
  {
91
- this.outputNextConfig = outputNextConfig;
112
+ this.started = new boolean[count];
113
+ this.finished = new boolean[count];
114
+ this.exceptions = new Throwable[count];
115
+ this.inputCommitReports = new CommitReport[count];
116
+ this.outputCommitReports = new CommitReport[count];
117
+ this.processorCount = count;
92
118
  }
93
119
 
94
- public NextConfig getInputNextConfig()
120
+ public void setInputSchema(Schema inputSchema)
95
121
  {
96
- return inputNextConfig;
122
+ this.inputSchema = inputSchema;
97
123
  }
98
124
 
99
- public NextConfig getOutputNextConfig()
125
+ public void setOutputSchema(Schema outputSchema)
100
126
  {
101
- return outputNextConfig;
127
+ this.outputSchema = outputSchema;
102
128
  }
103
129
 
104
- public ExecuteResult build()
130
+ public Schema getInputSchema()
105
131
  {
106
- if (inputNextConfig == null) {
107
- inputNextConfig = Exec.newNextConfig();
132
+ return inputSchema;
133
+ }
134
+
135
+ public Schema getOutputSchema()
136
+ {
137
+ return outputSchema;
138
+ }
139
+
140
+ public boolean isAnyStarted()
141
+ {
142
+ if (started == null) {
143
+ return false;
108
144
  }
145
+ for (boolean b : started) {
146
+ if (b) { return true; }
147
+ }
148
+ return false;
149
+ }
150
+
151
+ public void start(int i)
152
+ {
153
+ started[i] = true;
154
+ }
155
+
156
+ public void finish(int i)
157
+ {
158
+ finished[i] = true;
159
+ }
160
+
161
+ public int getProcessorCount()
162
+ {
163
+ return processorCount;
164
+ }
165
+
166
+ public int getStartedCount()
167
+ {
168
+ int count = 0;
169
+ for (int i=0; i < started.length; i++) {
170
+ if (started[i]) { count++; }
171
+ }
172
+ return count;
173
+ }
174
+
175
+ public int getFinishedCount()
176
+ {
177
+ int count = 0;
178
+ for (int i=0; i < finished.length; i++) {
179
+ if (finished[i]) { count++; }
180
+ }
181
+ return count;
182
+ }
183
+
184
+ public void setInputCommitReport(int i, CommitReport inputCommitReport)
185
+ {
186
+ if (inputCommitReport == null) {
187
+ inputCommitReport = Exec.newCommitReport();
188
+ }
189
+ this.inputCommitReports[i] = inputCommitReport;
190
+ }
191
+
192
+ public void setOutputCommitReport(int i, CommitReport outputCommitReport)
193
+ {
194
+ if (outputCommitReport == null) {
195
+ outputCommitReport = Exec.newCommitReport();
196
+ }
197
+ this.outputCommitReports[i] = outputCommitReport;
198
+ }
199
+
200
+ public boolean isOutputCommitted(int i)
201
+ {
202
+ return outputCommitReports[i] != null;
203
+ }
204
+
205
+ public void setException(int i, Throwable exception)
206
+ {
207
+ this.exceptions[i] = exception;
208
+ }
209
+
210
+ public boolean isAllCommitted()
211
+ {
212
+ if (processorCount <= 0) {
213
+ // not initialized
214
+ return false;
215
+ }
216
+ for (int i=0; i < processorCount; i++) {
217
+ if (!isOutputCommitted(i)) {
218
+ return false;
219
+ }
220
+ }
221
+ return true;
222
+ }
223
+
224
+ public boolean isAnyCommitted()
225
+ {
226
+ for (int i=0; i < processorCount; i++) {
227
+ if (isOutputCommitted(i)) {
228
+ return true;
229
+ }
230
+ }
231
+ return false;
232
+ }
233
+
234
+ public void setOutputNextConfig(NextConfig outputNextConfig)
235
+ {
109
236
  if (outputNextConfig == null) {
110
237
  outputNextConfig = Exec.newNextConfig();
111
238
  }
112
- NextConfig nextConfig = Exec.newNextConfig();
113
- nextConfig.getNestedOrSetEmpty("in").merge(inputNextConfig);
114
- nextConfig.getNestedOrSetEmpty("out").merge(outputNextConfig);
115
- return new ExecuteResult(nextConfig);
239
+ this.outputNextConfig = outputNextConfig;
116
240
  }
117
- }
118
241
 
119
- private static class ProcessResult
120
- {
121
- private final CommitReport inputCommitReport;
122
- private final CommitReport outputCommitReport;
242
+ public void setInputNextConfig(NextConfig inputNextConfig)
243
+ {
244
+ if (inputNextConfig == null) {
245
+ inputNextConfig = Exec.newNextConfig();
246
+ }
247
+ this.inputNextConfig = inputNextConfig;
248
+ }
123
249
 
124
- public ProcessResult(CommitReport inputCommitReport, CommitReport outputCommitReport)
250
+ public List<CommitReport> getInputCommitReports()
125
251
  {
126
- this.inputCommitReport = inputCommitReport;
127
- this.outputCommitReport = outputCommitReport;
252
+ return ImmutableList.copyOf(inputCommitReports);
128
253
  }
129
254
 
130
- public CommitReport getInputCommitReport()
255
+ public List<CommitReport> getOutputCommitReports()
131
256
  {
132
- return inputCommitReport;
257
+ return ImmutableList.copyOf(outputCommitReports);
133
258
  }
134
259
 
135
- public CommitReport getOutputCommitReport()
260
+ public RuntimeException getRepresentativeException()
136
261
  {
137
- return outputCommitReport;
262
+ RuntimeException top = null;
263
+ for (Throwable ex : exceptions) {
264
+ if (ex != null) {
265
+ if (top != null) {
266
+ top.addSuppressed(ex);
267
+ } else {
268
+ if (ex instanceof RuntimeException) {
269
+ top = (RuntimeException) ex;
270
+ } else {
271
+ top = new RuntimeException(ex);
272
+ }
273
+ }
274
+ }
275
+ }
276
+ if (top == null) {
277
+ top = new RuntimeException("Some transactions are not committed");
278
+ }
279
+ return top;
280
+ }
281
+
282
+ public int getCommittedUnclosedCount()
283
+ {
284
+ int count = 0;
285
+ for (int i=0; i < exceptions.length; i++) {
286
+ if (exceptions[i] != null && isOutputCommitted(i)) {
287
+ count++;
288
+ }
289
+ }
290
+ return count;
291
+ }
292
+
293
+ public ExecutionResult buildExecuteResult()
294
+ {
295
+ return buildExecuteResultWithWarningException(null);
296
+ }
297
+
298
+ public ExecutionResult buildExecuteResultWithWarningException(Throwable ex)
299
+ {
300
+ NextConfig nextConfig = Exec.newNextConfig();
301
+ if (inputNextConfig != null) {
302
+ nextConfig.getNestedOrSetEmpty("in").merge(inputNextConfig);
303
+ }
304
+ if (outputNextConfig != null) {
305
+ nextConfig.getNestedOrSetEmpty("out").merge(outputNextConfig);
306
+ }
307
+
308
+ ImmutableList.Builder<Throwable> ignoredExceptions = ImmutableList.builder();
309
+ for (Throwable e : exceptions) {
310
+ if (e != null) {
311
+ ignoredExceptions.add(e);
312
+ }
313
+ }
314
+ if (ex != null) {
315
+ ignoredExceptions.add(ex);
316
+ }
317
+
318
+ return new ExecutionResult(nextConfig, ignoredExceptions.build());
319
+ }
320
+
321
+ public PartialExecutionException buildPartialExecuteException(Throwable cause,
322
+ ExecutorTask task, ExecSession exec)
323
+ {
324
+ return new PartialExecutionException(cause, new ResumeState(
325
+ exec.getSessionConfigSource(),
326
+ task.getInputTask(), task.getOutputTask(),
327
+ inputSchema, outputSchema,
328
+ Arrays.asList(inputCommitReports), Arrays.asList(outputCommitReports)));
138
329
  }
139
330
  }
140
331
 
@@ -143,17 +334,21 @@ public class LocalExecutor
143
334
  return Exec.newPlugin(InputPlugin.class, task.getInputConfig().get(PluginType.class, "type"));
144
335
  }
145
336
 
337
+ protected List<FilterPlugin> newFilterPlugins(ExecutorTask task)
338
+ {
339
+ return Filters.newFilterPlugins(Exec.session(), task.getFilterConfigs());
340
+ }
341
+
146
342
  protected OutputPlugin newOutputPlugin(ExecutorTask task)
147
343
  {
148
344
  return Exec.newPlugin(OutputPlugin.class, task.getOutputConfig().get(PluginType.class, "type"));
149
345
  }
150
346
 
151
- public ExecuteResult run(ExecSession exec, final ConfigSource config)
347
+ public ExecutionResult run(ExecSession exec, final ConfigSource config)
152
348
  {
153
- log = exec.getLogger(LocalExecutor.class);
154
349
  try {
155
- return Exec.doWith(exec, new ExecAction<ExecuteResult>() {
156
- public ExecuteResult run()
350
+ return Exec.doWith(exec, new ExecAction<ExecutionResult>() {
351
+ public ExecutionResult run()
157
352
  {
158
353
  return doRun(config);
159
354
  }
@@ -163,114 +358,270 @@ public class LocalExecutor
163
358
  }
164
359
  }
165
360
 
166
- private ExecuteResult doRun(ConfigSource config)
361
+ public ExecutionResult resume(final ConfigSource config, final ResumeState resume)
362
+ {
363
+ try {
364
+ ExecSession exec = new ExecSession(injector, resume.getExecSessionConfigSource());
365
+ return Exec.doWith(exec, new ExecAction<ExecutionResult>() {
366
+ public ExecutionResult run()
367
+ {
368
+ return doResume(config, resume);
369
+ }
370
+ });
371
+ } catch (Exception ex) {
372
+ throw Throwables.propagate(ex);
373
+ }
374
+ }
375
+
376
+ public void cleanup(final ConfigSource config, final ResumeState resume)
377
+ {
378
+ try {
379
+ ExecSession exec = new ExecSession(injector, resume.getExecSessionConfigSource());
380
+ Exec.doWith(exec, new ExecAction<Void>() {
381
+ public Void run()
382
+ {
383
+ doCleanup(config, resume);
384
+ return null;
385
+ }
386
+ });
387
+ } catch (Exception ex) {
388
+ throw Throwables.propagate(ex);
389
+ }
390
+ }
391
+
392
+ public void doCleanup(ConfigSource config, ResumeState resume)
393
+ {
394
+ ExecutorTask task = config.loadConfig(ExecutorTask.class);
395
+ InputPlugin in = newInputPlugin(task);
396
+ OutputPlugin out = newOutputPlugin(task);
397
+
398
+ List<CommitReport> successInputCommitReports = ImmutableList.copyOf(
399
+ Iterables.filter(resume.getInputCommitReports(), Predicates.notNull()));
400
+ List<CommitReport> successOutputCommitReports = ImmutableList.copyOf(
401
+ Iterables.filter(resume.getOutputCommitReports(), Predicates.notNull()));
402
+
403
+ in.cleanup(resume.getInputTaskSource(), resume.getInputSchema(),
404
+ resume.getInputCommitReports().size(), successInputCommitReports);
405
+
406
+ out.cleanup(resume.getOutputTaskSource(), resume.getOutputSchema(),
407
+ resume.getOutputCommitReports().size(), successOutputCommitReports);
408
+ }
409
+
410
+ private ExecutionResult doRun(ConfigSource config)
167
411
  {
168
412
  final ExecutorTask task = config.loadConfig(ExecutorTask.class);
169
413
 
170
414
  final InputPlugin in = newInputPlugin(task);
415
+ final List<FilterPlugin> filterPlugins = newFilterPlugins(task);
171
416
  final OutputPlugin out = newOutputPlugin(task);
172
417
 
173
- final ExecuteResultBuilder execResult = new ExecuteResultBuilder();
174
-
175
- NextConfig inputNextConfig = in.transaction(task.getInputConfig(), new InputPlugin.Control() {
176
- public List<CommitReport> run(final TaskSource inputTask, final Schema schema, final int processorCount)
177
- {
178
- final ImmutableList.Builder<CommitReport> inputCommitReports = ImmutableList.builder();
179
- NextConfig outputNextConfig = out.transaction(task.getOutputConfig(), schema, processorCount, new OutputPlugin.Control() {
180
- public List<CommitReport> run(final TaskSource outputTask)
181
- {
182
- final ImmutableList.Builder<CommitReport> outputCommitReports = ImmutableList.builder();
183
- task.setInputTask(inputTask);
184
- task.setOutputTask(outputTask);
185
-
186
- //log.debug("input: %s", task.getInputTask());
187
- //log.debug("output: %s", task.getOutputTask());
188
-
189
- List<ProcessResult> results = process(task.dump(), schema, processorCount);
190
- for (ProcessResult result : results) {
191
- inputCommitReports.add(result.getInputCommitReport());
192
- outputCommitReports.add(result.getOutputCommitReport());
418
+ final ProcessState state = new ProcessState(Exec.getLogger(LocalExecutor.class));
419
+ try {
420
+ NextConfig inputNextConfig = in.transaction(task.getInputConfig(), new InputPlugin.Control() {
421
+ public List<CommitReport> run(final TaskSource inputTask, final Schema inputSchema, final int processorCount)
422
+ {
423
+ state.initialize(processorCount);
424
+ state.setInputSchema(inputSchema);
425
+ Filters.transaction(filterPlugins, task.getFilterConfigs(), inputSchema, new Filters.Control() {
426
+ public void run(final List<TaskSource> filterTasks, final List<Schema> filterSchemas)
427
+ {
428
+ Schema outputSchema = last(filterSchemas);
429
+ state.setOutputSchema(outputSchema);
430
+ NextConfig outputNextConfig = out.transaction(task.getOutputConfig(), outputSchema, processorCount, new OutputPlugin.Control() {
431
+ public List<CommitReport> run(final TaskSource outputTask)
432
+ {
433
+ task.setInputTask(inputTask);
434
+ task.setFilterTasks(filterTasks);
435
+ task.setOutputTask(outputTask);
436
+
437
+ process(task.dump(), filterSchemas, processorCount, state);
438
+ if (!state.isAllCommitted()) {
439
+ throw state.getRepresentativeException();
440
+ }
441
+ return state.getOutputCommitReports();
442
+ }
443
+ });
444
+ state.setOutputNextConfig(outputNextConfig);
193
445
  }
446
+ });
447
+ return state.getInputCommitReports();
448
+ }
449
+ });
450
+ state.setInputNextConfig(inputNextConfig);
194
451
 
195
- return outputCommitReports.build();
196
- }
197
- });
198
- execResult.setOutputNextConfig(outputNextConfig);
199
- return inputCommitReports.build();
452
+ return state.buildExecuteResult();
453
+
454
+ } catch (Throwable ex) {
455
+ if (state.isAllCommitted()) {
456
+ // ignore the exception
457
+ return state.buildExecuteResultWithWarningException(ex);
200
458
  }
201
- });
202
- execResult.setInputNextConfig(inputNextConfig);
459
+ if (!state.isAnyStarted()) {
460
+ throw ex;
461
+ }
462
+ throw state.buildPartialExecuteException(ex, task, Exec.session());
463
+ }
464
+ }
465
+
466
+ private ExecutionResult doResume(ConfigSource config, final ResumeState resume)
467
+ {
468
+ final ExecutorTask task = config.loadConfig(ExecutorTask.class);
469
+
470
+ final InputPlugin in = newInputPlugin(task);
471
+ final List<FilterPlugin> filterPlugins = newFilterPlugins(task);
472
+ final OutputPlugin out = newOutputPlugin(task);
473
+
474
+ final ProcessState state = new ProcessState(Exec.getLogger(LocalExecutor.class));
475
+ try {
476
+ NextConfig inputNextConfig = in.resume(resume.getInputTaskSource(), resume.getInputSchema(), resume.getInputCommitReports().size(), new InputPlugin.Control() {
477
+ public List<CommitReport> run(final TaskSource inputTask, final Schema inputSchema, final int processorCount)
478
+ {
479
+ // TODO validate inputTask?
480
+ // TODO validate inputSchema
481
+ // TODO validate processorCount
482
+ state.initialize(processorCount);
483
+ Filters.transaction(filterPlugins, task.getFilterConfigs(), inputSchema, new Filters.Control() {
484
+ public void run(final List<TaskSource> filterTasks, final List<Schema> filterSchemas)
485
+ {
486
+ Schema outputSchema = last(filterSchemas);
487
+ state.setOutputSchema(outputSchema);
488
+ NextConfig outputNextConfig = out.resume(resume.getOutputTaskSource(), outputSchema, processorCount, new OutputPlugin.Control() {
489
+ public List<CommitReport> run(final TaskSource outputTask)
490
+ {
491
+ // TODO validate outputTask?
492
+ task.setInputTask(inputTask);
493
+ task.setFilterTasks(filterTasks);
494
+ task.setOutputTask(outputTask);
495
+
496
+ for (int i=0; i < resume.getOutputCommitReports().size(); i++) {
497
+ if (resume.getOutputCommitReports().get(i) != null) {
498
+ state.start(i);
499
+ state.setInputCommitReport(i, resume.getInputCommitReports().get(i));
500
+ state.setOutputCommitReport(i, resume.getOutputCommitReports().get(i));
501
+ state.finish(i);
502
+ }
503
+ }
504
+
505
+ process(task.dump(), filterSchemas, processorCount, state);
506
+ if (!state.isAllCommitted()) {
507
+ throw state.getRepresentativeException();
508
+ }
509
+ return state.getOutputCommitReports();
510
+ }
511
+ });
512
+ state.setOutputNextConfig(outputNextConfig);
513
+ }
514
+ });
515
+ return state.getInputCommitReports();
516
+ }
517
+ });
518
+ state.setInputNextConfig(inputNextConfig);
203
519
 
204
- return execResult.build();
520
+ return state.buildExecuteResult();
521
+
522
+ } catch (Throwable ex) {
523
+ if (state.isAllCommitted()) {
524
+ // ignore the exception
525
+ return state.buildExecuteResultWithWarningException(ex);
526
+ }
527
+ if (!state.isAnyStarted()) {
528
+ throw ex;
529
+ }
530
+ throw state.buildPartialExecuteException(ex, task, Exec.session());
531
+ }
205
532
  }
206
533
 
207
- private List<ProcessResult> process(TaskSource taskSource, Schema schema, int processorCount)
534
+ private void process(TaskSource taskSource, List<Schema> filterSchemas, int processorCount,
535
+ ProcessState state)
208
536
  {
209
- List<Future<ProcessResult>> futures = new ArrayList<>();
210
- List<ProcessResult> joined = new ArrayList<>();
537
+ List<Future<Throwable>> futures = new ArrayList<>(processorCount);
211
538
  try {
212
- log.info("Running {} tasks using {} local threads", processorCount, maxThreads);
213
- showProgress(processorCount);
214
539
  for (int i=0; i < processorCount; i++) {
215
- futures.add(startProcessor(taskSource, schema, i));
540
+ if (state.isOutputCommitted(i)) {
541
+ state.getLogger().warn("Skipped resumed task {}", i);
542
+ futures.add(null); // resumed
543
+ } else {
544
+ futures.add(startProcessor(taskSource, filterSchemas, i, state));
545
+ }
216
546
  }
547
+ showProgress(state);
217
548
 
218
549
  for (int i=0; i < processorCount; i++) {
550
+ if (futures.get(i) == null) {
551
+ continue;
552
+ }
219
553
  try {
220
- joined.add(futures.get(i).get());
221
- showProgress(processorCount);
222
-
554
+ state.setException(i, futures.get(i).get());
223
555
  } catch (ExecutionException ex) {
224
- throw Throwables.propagate(ex.getCause());
556
+ state.setException(i, ex.getCause());
557
+ //Throwables.propagate(ex.getCause());
225
558
  } catch (InterruptedException ex) {
226
- throw new ExecuteInterruptedException(ex);
559
+ state.setException(i, new ExecutionInterruptedException(ex));
227
560
  }
561
+ showProgress(state);
228
562
  }
229
- return joined;
230
563
  } finally {
231
- for (int i=joined.size(); i < futures.size(); i++) {
232
- futures.get(i).cancel(true);
233
- // TODO join?
564
+ for (Future<Throwable> future : futures) {
565
+ if (future != null && !future.isDone()) {
566
+ future.cancel(true);
567
+ // TODO join?
568
+ }
234
569
  }
235
570
  }
236
571
  }
237
572
 
238
- private void showProgress(int total)
573
+ private void showProgress(ProcessState state)
239
574
  {
240
- int running = runningTaskCount.get();
241
- int done = completedTaskCount.get();
242
- log.info(String.format("{done:%3d / %d, running: %d}", done, total, running));
575
+ int total = state.getProcessorCount();
576
+ int finished = state.getFinishedCount();
577
+ int started = state.getStartedCount();
578
+ state.getLogger().info(String.format("{done:%3d / %d, running: %d}", finished, total, started - finished));
243
579
  }
244
580
 
245
- private Future<ProcessResult> startProcessor(final TaskSource taskSource, final Schema schema, final int index)
581
+ private Future<Throwable> startProcessor(final TaskSource taskSource,
582
+ final List<Schema> filterSchemas, final int index,
583
+ final ProcessState state)
246
584
  {
247
- return executor.submit(new Callable<ProcessResult>() {
248
- public ProcessResult call()
585
+ return executor.submit(new Callable<Throwable>() {
586
+ public Throwable call()
249
587
  {
588
+ final ExecutorTask task = taskSource.loadTask(ExecutorTask.class);
589
+ final InputPlugin in = newInputPlugin(task);
590
+ final List<FilterPlugin> filterPlugins = newFilterPlugins(task);
591
+ final OutputPlugin out = newOutputPlugin(task);
592
+
593
+ TransactionalPageOutput tran = out.open(task.getOutputTask(), last(filterSchemas), index);
594
+ PageOutput closeThis = tran;
595
+ state.start(index);
250
596
  try {
251
- runningTaskCount.getAndIncrement();
252
- final ExecutorTask task = taskSource.loadTask(ExecutorTask.class);
253
- final InputPlugin in = newInputPlugin(task);
254
- final OutputPlugin out = newOutputPlugin(task);
255
-
256
- TransactionalPageOutput tran = out.open(task.getOutputTask(), schema, index);
257
- boolean committed = false;
597
+ PageOutput filtered = closeThis = Filters.open(filterPlugins, task.getFilterTasks(), filterSchemas, tran);
598
+ state.setInputCommitReport(index, in.run(task.getInputTask(), first(filterSchemas), index, filtered));
599
+ state.setOutputCommitReport(index, tran.commit()); // TODO check output.finish() is called. wrap or abstract
600
+ return null;
601
+ } finally {
258
602
  try {
259
- CommitReport inReport = in.run(task.getInputTask(), schema, index, tran);
260
- CommitReport outReport = tran.commit(); // TODO check output.finish() is called. wrap or abstract
261
- committed = true;
262
- return new ProcessResult(inReport, outReport);
263
- } finally {
264
- if (!committed) {
265
- tran.abort();
603
+ try {
604
+ if (!state.isOutputCommitted(index)) {
605
+ tran.abort();
606
+ }
607
+ } finally {
608
+ closeThis.close();
266
609
  }
267
- tran.close();
610
+ } finally {
611
+ state.finish(index);
268
612
  }
269
- } finally {
270
- runningTaskCount.getAndDecrement();
271
- completedTaskCount.getAndIncrement();
272
613
  }
273
614
  }
274
615
  });
275
616
  }
617
+
618
+ private static Schema first(List<Schema> filterSchemas)
619
+ {
620
+ return filterSchemas.get(0);
621
+ }
622
+
623
+ private static Schema last(List<Schema> filterSchemas)
624
+ {
625
+ return filterSchemas.get(filterSchemas.size() - 1);
626
+ }
276
627
  }