embulk 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/ChangeLog +12 -0
- data/README.md +38 -13
- data/build.gradle +6 -1
- data/embulk-cli/pom.xml +1 -1
- data/embulk-core/pom.xml +1 -1
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +87 -8
- data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +1 -1
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +16 -3
- data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/ExecutionInterruptedException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecutionResult.java +26 -0
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +37 -1
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +461 -110
- data/embulk-core/src/main/java/org/embulk/exec/PartialExecutionException.java +18 -0
- data/embulk-core/src/main/java/org/embulk/exec/ResumeState.java +82 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +35 -4
- data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +14 -3
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +55 -24
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +57 -24
- data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +21 -0
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +14 -3
- data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +87 -0
- data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +4 -2
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +16 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +15 -0
- data/embulk-standards/pom.xml +1 -1
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +16 -2
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +14 -1
- data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +14 -1
- data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +15 -3
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +15 -1
- data/lib/embulk/command/embulk_run.rb +16 -1
- data/lib/embulk/data/bundle/embulk/filter_example.rb +42 -0
- data/lib/embulk/data/bundle/embulk/input_example.rb +43 -33
- data/lib/embulk/data/bundle/embulk/output_example.rb +43 -36
- data/lib/embulk/filter_plugin.rb +86 -0
- data/lib/embulk/input_plugin.rb +37 -2
- data/lib/embulk/java/imports.rb +1 -0
- data/lib/embulk/output_plugin.rb +30 -0
- data/lib/embulk/plugin.rb +32 -19
- data/lib/embulk/schema.rb +16 -9
- data/lib/embulk/version.rb +1 -1
- data/pom.xml +1 -1
- metadata +13 -7
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +0 -10
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +0 -19
@@ -1,6 +1,7 @@
|
|
1
1
|
package org.embulk.exec;
|
2
2
|
|
3
3
|
import java.util.List;
|
4
|
+
import java.util.Arrays;
|
4
5
|
import java.util.ArrayList;
|
5
6
|
import java.util.concurrent.Callable;
|
6
7
|
import java.util.concurrent.Future;
|
@@ -13,9 +14,12 @@ import com.google.common.collect.ImmutableList;
|
|
13
14
|
import com.google.inject.Inject;
|
14
15
|
import com.google.inject.Injector;
|
15
16
|
import com.google.common.base.Throwables;
|
17
|
+
import com.google.common.base.Predicates;
|
18
|
+
import com.google.common.collect.Iterables;
|
16
19
|
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
17
20
|
import org.embulk.config.Task;
|
18
21
|
import org.embulk.config.Config;
|
22
|
+
import org.embulk.config.ConfigDefault;
|
19
23
|
import org.embulk.config.ConfigSource;
|
20
24
|
import org.embulk.config.TaskSource;
|
21
25
|
import org.embulk.config.NextConfig;
|
@@ -26,8 +30,11 @@ import org.embulk.spi.Exec;
|
|
26
30
|
import org.embulk.spi.ExecSession;
|
27
31
|
import org.embulk.spi.ExecAction;
|
28
32
|
import org.embulk.spi.InputPlugin;
|
33
|
+
import org.embulk.spi.FilterPlugin;
|
29
34
|
import org.embulk.spi.OutputPlugin;
|
35
|
+
import org.embulk.spi.PageOutput;
|
30
36
|
import org.embulk.spi.TransactionalPageOutput;
|
37
|
+
import org.embulk.spi.util.Filters;
|
31
38
|
import org.slf4j.Logger;
|
32
39
|
|
33
40
|
public class LocalExecutor
|
@@ -37,22 +44,25 @@ public class LocalExecutor
|
|
37
44
|
private final int maxThreads;
|
38
45
|
private final ExecutorService executor;
|
39
46
|
|
40
|
-
private Logger log;
|
41
|
-
private final AtomicInteger runningTaskCount;
|
42
|
-
private final AtomicInteger completedTaskCount;
|
43
|
-
|
44
47
|
public interface ExecutorTask
|
45
48
|
extends Task
|
46
49
|
{
|
47
50
|
@Config("in")
|
48
51
|
public ConfigSource getInputConfig();
|
49
52
|
|
53
|
+
@Config("filters")
|
54
|
+
@ConfigDefault("[]")
|
55
|
+
public List<ConfigSource> getFilterConfigs();
|
56
|
+
|
50
57
|
@Config("out")
|
51
58
|
public ConfigSource getOutputConfig();
|
52
59
|
|
53
60
|
public TaskSource getInputTask();
|
54
61
|
public void setInputTask(TaskSource taskSource);
|
55
62
|
|
63
|
+
public List<TaskSource> getFilterTasks();
|
64
|
+
public void setFilterTasks(List<TaskSource> taskSources);
|
65
|
+
|
56
66
|
public TaskSource getOutputTask();
|
57
67
|
public void setOutputTask(TaskSource taskSource);
|
58
68
|
}
|
@@ -71,70 +81,251 @@ public class LocalExecutor
|
|
71
81
|
.setNameFormat("embulk-executor-%d")
|
72
82
|
.setDaemon(true)
|
73
83
|
.build());
|
74
|
-
|
75
|
-
this.runningTaskCount = new AtomicInteger(0);
|
76
|
-
this.completedTaskCount = new AtomicInteger(0);
|
77
84
|
}
|
78
85
|
|
79
|
-
private static class
|
86
|
+
private static class ProcessState
|
80
87
|
{
|
81
|
-
private
|
82
|
-
private
|
88
|
+
private final Logger logger;
|
89
|
+
private volatile boolean[] started;
|
90
|
+
private volatile boolean[] finished;
|
91
|
+
private volatile Schema inputSchema;
|
92
|
+
private volatile Schema outputSchema;
|
93
|
+
private volatile Throwable[] exceptions;
|
94
|
+
private volatile CommitReport[] inputCommitReports;
|
95
|
+
private volatile CommitReport[] outputCommitReports;
|
96
|
+
private volatile NextConfig inputNextConfig;
|
97
|
+
private volatile NextConfig outputNextConfig;
|
98
|
+
private int processorCount;
|
99
|
+
|
100
|
+
public ProcessState(Logger logger)
|
101
|
+
{
|
102
|
+
this.logger = logger;
|
103
|
+
}
|
83
104
|
|
84
|
-
public
|
105
|
+
public Logger getLogger()
|
85
106
|
{
|
86
|
-
|
107
|
+
return logger;
|
87
108
|
}
|
88
109
|
|
89
|
-
public void
|
110
|
+
public void initialize(int count)
|
90
111
|
{
|
91
|
-
this.
|
112
|
+
this.started = new boolean[count];
|
113
|
+
this.finished = new boolean[count];
|
114
|
+
this.exceptions = new Throwable[count];
|
115
|
+
this.inputCommitReports = new CommitReport[count];
|
116
|
+
this.outputCommitReports = new CommitReport[count];
|
117
|
+
this.processorCount = count;
|
92
118
|
}
|
93
119
|
|
94
|
-
public
|
120
|
+
public void setInputSchema(Schema inputSchema)
|
95
121
|
{
|
96
|
-
|
122
|
+
this.inputSchema = inputSchema;
|
97
123
|
}
|
98
124
|
|
99
|
-
public
|
125
|
+
public void setOutputSchema(Schema outputSchema)
|
100
126
|
{
|
101
|
-
|
127
|
+
this.outputSchema = outputSchema;
|
102
128
|
}
|
103
129
|
|
104
|
-
public
|
130
|
+
public Schema getInputSchema()
|
105
131
|
{
|
106
|
-
|
107
|
-
|
132
|
+
return inputSchema;
|
133
|
+
}
|
134
|
+
|
135
|
+
public Schema getOutputSchema()
|
136
|
+
{
|
137
|
+
return outputSchema;
|
138
|
+
}
|
139
|
+
|
140
|
+
public boolean isAnyStarted()
|
141
|
+
{
|
142
|
+
if (started == null) {
|
143
|
+
return false;
|
108
144
|
}
|
145
|
+
for (boolean b : started) {
|
146
|
+
if (b) { return true; }
|
147
|
+
}
|
148
|
+
return false;
|
149
|
+
}
|
150
|
+
|
151
|
+
public void start(int i)
|
152
|
+
{
|
153
|
+
started[i] = true;
|
154
|
+
}
|
155
|
+
|
156
|
+
public void finish(int i)
|
157
|
+
{
|
158
|
+
finished[i] = true;
|
159
|
+
}
|
160
|
+
|
161
|
+
public int getProcessorCount()
|
162
|
+
{
|
163
|
+
return processorCount;
|
164
|
+
}
|
165
|
+
|
166
|
+
public int getStartedCount()
|
167
|
+
{
|
168
|
+
int count = 0;
|
169
|
+
for (int i=0; i < started.length; i++) {
|
170
|
+
if (started[i]) { count++; }
|
171
|
+
}
|
172
|
+
return count;
|
173
|
+
}
|
174
|
+
|
175
|
+
public int getFinishedCount()
|
176
|
+
{
|
177
|
+
int count = 0;
|
178
|
+
for (int i=0; i < finished.length; i++) {
|
179
|
+
if (finished[i]) { count++; }
|
180
|
+
}
|
181
|
+
return count;
|
182
|
+
}
|
183
|
+
|
184
|
+
public void setInputCommitReport(int i, CommitReport inputCommitReport)
|
185
|
+
{
|
186
|
+
if (inputCommitReport == null) {
|
187
|
+
inputCommitReport = Exec.newCommitReport();
|
188
|
+
}
|
189
|
+
this.inputCommitReports[i] = inputCommitReport;
|
190
|
+
}
|
191
|
+
|
192
|
+
public void setOutputCommitReport(int i, CommitReport outputCommitReport)
|
193
|
+
{
|
194
|
+
if (outputCommitReport == null) {
|
195
|
+
outputCommitReport = Exec.newCommitReport();
|
196
|
+
}
|
197
|
+
this.outputCommitReports[i] = outputCommitReport;
|
198
|
+
}
|
199
|
+
|
200
|
+
public boolean isOutputCommitted(int i)
|
201
|
+
{
|
202
|
+
return outputCommitReports[i] != null;
|
203
|
+
}
|
204
|
+
|
205
|
+
public void setException(int i, Throwable exception)
|
206
|
+
{
|
207
|
+
this.exceptions[i] = exception;
|
208
|
+
}
|
209
|
+
|
210
|
+
public boolean isAllCommitted()
|
211
|
+
{
|
212
|
+
if (processorCount <= 0) {
|
213
|
+
// not initialized
|
214
|
+
return false;
|
215
|
+
}
|
216
|
+
for (int i=0; i < processorCount; i++) {
|
217
|
+
if (!isOutputCommitted(i)) {
|
218
|
+
return false;
|
219
|
+
}
|
220
|
+
}
|
221
|
+
return true;
|
222
|
+
}
|
223
|
+
|
224
|
+
public boolean isAnyCommitted()
|
225
|
+
{
|
226
|
+
for (int i=0; i < processorCount; i++) {
|
227
|
+
if (isOutputCommitted(i)) {
|
228
|
+
return true;
|
229
|
+
}
|
230
|
+
}
|
231
|
+
return false;
|
232
|
+
}
|
233
|
+
|
234
|
+
public void setOutputNextConfig(NextConfig outputNextConfig)
|
235
|
+
{
|
109
236
|
if (outputNextConfig == null) {
|
110
237
|
outputNextConfig = Exec.newNextConfig();
|
111
238
|
}
|
112
|
-
|
113
|
-
nextConfig.getNestedOrSetEmpty("in").merge(inputNextConfig);
|
114
|
-
nextConfig.getNestedOrSetEmpty("out").merge(outputNextConfig);
|
115
|
-
return new ExecuteResult(nextConfig);
|
239
|
+
this.outputNextConfig = outputNextConfig;
|
116
240
|
}
|
117
|
-
}
|
118
241
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
242
|
+
public void setInputNextConfig(NextConfig inputNextConfig)
|
243
|
+
{
|
244
|
+
if (inputNextConfig == null) {
|
245
|
+
inputNextConfig = Exec.newNextConfig();
|
246
|
+
}
|
247
|
+
this.inputNextConfig = inputNextConfig;
|
248
|
+
}
|
123
249
|
|
124
|
-
public
|
250
|
+
public List<CommitReport> getInputCommitReports()
|
125
251
|
{
|
126
|
-
|
127
|
-
this.outputCommitReport = outputCommitReport;
|
252
|
+
return ImmutableList.copyOf(inputCommitReports);
|
128
253
|
}
|
129
254
|
|
130
|
-
public CommitReport
|
255
|
+
public List<CommitReport> getOutputCommitReports()
|
131
256
|
{
|
132
|
-
return
|
257
|
+
return ImmutableList.copyOf(outputCommitReports);
|
133
258
|
}
|
134
259
|
|
135
|
-
public
|
260
|
+
public RuntimeException getRepresentativeException()
|
136
261
|
{
|
137
|
-
|
262
|
+
RuntimeException top = null;
|
263
|
+
for (Throwable ex : exceptions) {
|
264
|
+
if (ex != null) {
|
265
|
+
if (top != null) {
|
266
|
+
top.addSuppressed(ex);
|
267
|
+
} else {
|
268
|
+
if (ex instanceof RuntimeException) {
|
269
|
+
top = (RuntimeException) ex;
|
270
|
+
} else {
|
271
|
+
top = new RuntimeException(ex);
|
272
|
+
}
|
273
|
+
}
|
274
|
+
}
|
275
|
+
}
|
276
|
+
if (top == null) {
|
277
|
+
top = new RuntimeException("Some transactions are not committed");
|
278
|
+
}
|
279
|
+
return top;
|
280
|
+
}
|
281
|
+
|
282
|
+
public int getCommittedUnclosedCount()
|
283
|
+
{
|
284
|
+
int count = 0;
|
285
|
+
for (int i=0; i < exceptions.length; i++) {
|
286
|
+
if (exceptions[i] != null && isOutputCommitted(i)) {
|
287
|
+
count++;
|
288
|
+
}
|
289
|
+
}
|
290
|
+
return count;
|
291
|
+
}
|
292
|
+
|
293
|
+
public ExecutionResult buildExecuteResult()
|
294
|
+
{
|
295
|
+
return buildExecuteResultWithWarningException(null);
|
296
|
+
}
|
297
|
+
|
298
|
+
public ExecutionResult buildExecuteResultWithWarningException(Throwable ex)
|
299
|
+
{
|
300
|
+
NextConfig nextConfig = Exec.newNextConfig();
|
301
|
+
if (inputNextConfig != null) {
|
302
|
+
nextConfig.getNestedOrSetEmpty("in").merge(inputNextConfig);
|
303
|
+
}
|
304
|
+
if (outputNextConfig != null) {
|
305
|
+
nextConfig.getNestedOrSetEmpty("out").merge(outputNextConfig);
|
306
|
+
}
|
307
|
+
|
308
|
+
ImmutableList.Builder<Throwable> ignoredExceptions = ImmutableList.builder();
|
309
|
+
for (Throwable e : exceptions) {
|
310
|
+
if (e != null) {
|
311
|
+
ignoredExceptions.add(e);
|
312
|
+
}
|
313
|
+
}
|
314
|
+
if (ex != null) {
|
315
|
+
ignoredExceptions.add(ex);
|
316
|
+
}
|
317
|
+
|
318
|
+
return new ExecutionResult(nextConfig, ignoredExceptions.build());
|
319
|
+
}
|
320
|
+
|
321
|
+
public PartialExecutionException buildPartialExecuteException(Throwable cause,
|
322
|
+
ExecutorTask task, ExecSession exec)
|
323
|
+
{
|
324
|
+
return new PartialExecutionException(cause, new ResumeState(
|
325
|
+
exec.getSessionConfigSource(),
|
326
|
+
task.getInputTask(), task.getOutputTask(),
|
327
|
+
inputSchema, outputSchema,
|
328
|
+
Arrays.asList(inputCommitReports), Arrays.asList(outputCommitReports)));
|
138
329
|
}
|
139
330
|
}
|
140
331
|
|
@@ -143,17 +334,21 @@ public class LocalExecutor
|
|
143
334
|
return Exec.newPlugin(InputPlugin.class, task.getInputConfig().get(PluginType.class, "type"));
|
144
335
|
}
|
145
336
|
|
337
|
+
protected List<FilterPlugin> newFilterPlugins(ExecutorTask task)
|
338
|
+
{
|
339
|
+
return Filters.newFilterPlugins(Exec.session(), task.getFilterConfigs());
|
340
|
+
}
|
341
|
+
|
146
342
|
protected OutputPlugin newOutputPlugin(ExecutorTask task)
|
147
343
|
{
|
148
344
|
return Exec.newPlugin(OutputPlugin.class, task.getOutputConfig().get(PluginType.class, "type"));
|
149
345
|
}
|
150
346
|
|
151
|
-
public
|
347
|
+
public ExecutionResult run(ExecSession exec, final ConfigSource config)
|
152
348
|
{
|
153
|
-
log = exec.getLogger(LocalExecutor.class);
|
154
349
|
try {
|
155
|
-
return Exec.doWith(exec, new ExecAction<
|
156
|
-
public
|
350
|
+
return Exec.doWith(exec, new ExecAction<ExecutionResult>() {
|
351
|
+
public ExecutionResult run()
|
157
352
|
{
|
158
353
|
return doRun(config);
|
159
354
|
}
|
@@ -163,114 +358,270 @@ public class LocalExecutor
|
|
163
358
|
}
|
164
359
|
}
|
165
360
|
|
166
|
-
|
361
|
+
public ExecutionResult resume(final ConfigSource config, final ResumeState resume)
|
362
|
+
{
|
363
|
+
try {
|
364
|
+
ExecSession exec = new ExecSession(injector, resume.getExecSessionConfigSource());
|
365
|
+
return Exec.doWith(exec, new ExecAction<ExecutionResult>() {
|
366
|
+
public ExecutionResult run()
|
367
|
+
{
|
368
|
+
return doResume(config, resume);
|
369
|
+
}
|
370
|
+
});
|
371
|
+
} catch (Exception ex) {
|
372
|
+
throw Throwables.propagate(ex);
|
373
|
+
}
|
374
|
+
}
|
375
|
+
|
376
|
+
public void cleanup(final ConfigSource config, final ResumeState resume)
|
377
|
+
{
|
378
|
+
try {
|
379
|
+
ExecSession exec = new ExecSession(injector, resume.getExecSessionConfigSource());
|
380
|
+
Exec.doWith(exec, new ExecAction<Void>() {
|
381
|
+
public Void run()
|
382
|
+
{
|
383
|
+
doCleanup(config, resume);
|
384
|
+
return null;
|
385
|
+
}
|
386
|
+
});
|
387
|
+
} catch (Exception ex) {
|
388
|
+
throw Throwables.propagate(ex);
|
389
|
+
}
|
390
|
+
}
|
391
|
+
|
392
|
+
public void doCleanup(ConfigSource config, ResumeState resume)
|
393
|
+
{
|
394
|
+
ExecutorTask task = config.loadConfig(ExecutorTask.class);
|
395
|
+
InputPlugin in = newInputPlugin(task);
|
396
|
+
OutputPlugin out = newOutputPlugin(task);
|
397
|
+
|
398
|
+
List<CommitReport> successInputCommitReports = ImmutableList.copyOf(
|
399
|
+
Iterables.filter(resume.getInputCommitReports(), Predicates.notNull()));
|
400
|
+
List<CommitReport> successOutputCommitReports = ImmutableList.copyOf(
|
401
|
+
Iterables.filter(resume.getOutputCommitReports(), Predicates.notNull()));
|
402
|
+
|
403
|
+
in.cleanup(resume.getInputTaskSource(), resume.getInputSchema(),
|
404
|
+
resume.getInputCommitReports().size(), successInputCommitReports);
|
405
|
+
|
406
|
+
out.cleanup(resume.getOutputTaskSource(), resume.getOutputSchema(),
|
407
|
+
resume.getOutputCommitReports().size(), successOutputCommitReports);
|
408
|
+
}
|
409
|
+
|
410
|
+
private ExecutionResult doRun(ConfigSource config)
|
167
411
|
{
|
168
412
|
final ExecutorTask task = config.loadConfig(ExecutorTask.class);
|
169
413
|
|
170
414
|
final InputPlugin in = newInputPlugin(task);
|
415
|
+
final List<FilterPlugin> filterPlugins = newFilterPlugins(task);
|
171
416
|
final OutputPlugin out = newOutputPlugin(task);
|
172
417
|
|
173
|
-
final
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
418
|
+
final ProcessState state = new ProcessState(Exec.getLogger(LocalExecutor.class));
|
419
|
+
try {
|
420
|
+
NextConfig inputNextConfig = in.transaction(task.getInputConfig(), new InputPlugin.Control() {
|
421
|
+
public List<CommitReport> run(final TaskSource inputTask, final Schema inputSchema, final int processorCount)
|
422
|
+
{
|
423
|
+
state.initialize(processorCount);
|
424
|
+
state.setInputSchema(inputSchema);
|
425
|
+
Filters.transaction(filterPlugins, task.getFilterConfigs(), inputSchema, new Filters.Control() {
|
426
|
+
public void run(final List<TaskSource> filterTasks, final List<Schema> filterSchemas)
|
427
|
+
{
|
428
|
+
Schema outputSchema = last(filterSchemas);
|
429
|
+
state.setOutputSchema(outputSchema);
|
430
|
+
NextConfig outputNextConfig = out.transaction(task.getOutputConfig(), outputSchema, processorCount, new OutputPlugin.Control() {
|
431
|
+
public List<CommitReport> run(final TaskSource outputTask)
|
432
|
+
{
|
433
|
+
task.setInputTask(inputTask);
|
434
|
+
task.setFilterTasks(filterTasks);
|
435
|
+
task.setOutputTask(outputTask);
|
436
|
+
|
437
|
+
process(task.dump(), filterSchemas, processorCount, state);
|
438
|
+
if (!state.isAllCommitted()) {
|
439
|
+
throw state.getRepresentativeException();
|
440
|
+
}
|
441
|
+
return state.getOutputCommitReports();
|
442
|
+
}
|
443
|
+
});
|
444
|
+
state.setOutputNextConfig(outputNextConfig);
|
193
445
|
}
|
446
|
+
});
|
447
|
+
return state.getInputCommitReports();
|
448
|
+
}
|
449
|
+
});
|
450
|
+
state.setInputNextConfig(inputNextConfig);
|
194
451
|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
452
|
+
return state.buildExecuteResult();
|
453
|
+
|
454
|
+
} catch (Throwable ex) {
|
455
|
+
if (state.isAllCommitted()) {
|
456
|
+
// ignore the exception
|
457
|
+
return state.buildExecuteResultWithWarningException(ex);
|
200
458
|
}
|
201
|
-
|
202
|
-
|
459
|
+
if (!state.isAnyStarted()) {
|
460
|
+
throw ex;
|
461
|
+
}
|
462
|
+
throw state.buildPartialExecuteException(ex, task, Exec.session());
|
463
|
+
}
|
464
|
+
}
|
465
|
+
|
466
|
+
private ExecutionResult doResume(ConfigSource config, final ResumeState resume)
|
467
|
+
{
|
468
|
+
final ExecutorTask task = config.loadConfig(ExecutorTask.class);
|
469
|
+
|
470
|
+
final InputPlugin in = newInputPlugin(task);
|
471
|
+
final List<FilterPlugin> filterPlugins = newFilterPlugins(task);
|
472
|
+
final OutputPlugin out = newOutputPlugin(task);
|
473
|
+
|
474
|
+
final ProcessState state = new ProcessState(Exec.getLogger(LocalExecutor.class));
|
475
|
+
try {
|
476
|
+
NextConfig inputNextConfig = in.resume(resume.getInputTaskSource(), resume.getInputSchema(), resume.getInputCommitReports().size(), new InputPlugin.Control() {
|
477
|
+
public List<CommitReport> run(final TaskSource inputTask, final Schema inputSchema, final int processorCount)
|
478
|
+
{
|
479
|
+
// TODO validate inputTask?
|
480
|
+
// TODO validate inputSchema
|
481
|
+
// TODO validate processorCount
|
482
|
+
state.initialize(processorCount);
|
483
|
+
Filters.transaction(filterPlugins, task.getFilterConfigs(), inputSchema, new Filters.Control() {
|
484
|
+
public void run(final List<TaskSource> filterTasks, final List<Schema> filterSchemas)
|
485
|
+
{
|
486
|
+
Schema outputSchema = last(filterSchemas);
|
487
|
+
state.setOutputSchema(outputSchema);
|
488
|
+
NextConfig outputNextConfig = out.resume(resume.getOutputTaskSource(), outputSchema, processorCount, new OutputPlugin.Control() {
|
489
|
+
public List<CommitReport> run(final TaskSource outputTask)
|
490
|
+
{
|
491
|
+
// TODO validate outputTask?
|
492
|
+
task.setInputTask(inputTask);
|
493
|
+
task.setFilterTasks(filterTasks);
|
494
|
+
task.setOutputTask(outputTask);
|
495
|
+
|
496
|
+
for (int i=0; i < resume.getOutputCommitReports().size(); i++) {
|
497
|
+
if (resume.getOutputCommitReports().get(i) != null) {
|
498
|
+
state.start(i);
|
499
|
+
state.setInputCommitReport(i, resume.getInputCommitReports().get(i));
|
500
|
+
state.setOutputCommitReport(i, resume.getOutputCommitReports().get(i));
|
501
|
+
state.finish(i);
|
502
|
+
}
|
503
|
+
}
|
504
|
+
|
505
|
+
process(task.dump(), filterSchemas, processorCount, state);
|
506
|
+
if (!state.isAllCommitted()) {
|
507
|
+
throw state.getRepresentativeException();
|
508
|
+
}
|
509
|
+
return state.getOutputCommitReports();
|
510
|
+
}
|
511
|
+
});
|
512
|
+
state.setOutputNextConfig(outputNextConfig);
|
513
|
+
}
|
514
|
+
});
|
515
|
+
return state.getInputCommitReports();
|
516
|
+
}
|
517
|
+
});
|
518
|
+
state.setInputNextConfig(inputNextConfig);
|
203
519
|
|
204
|
-
|
520
|
+
return state.buildExecuteResult();
|
521
|
+
|
522
|
+
} catch (Throwable ex) {
|
523
|
+
if (state.isAllCommitted()) {
|
524
|
+
// ignore the exception
|
525
|
+
return state.buildExecuteResultWithWarningException(ex);
|
526
|
+
}
|
527
|
+
if (!state.isAnyStarted()) {
|
528
|
+
throw ex;
|
529
|
+
}
|
530
|
+
throw state.buildPartialExecuteException(ex, task, Exec.session());
|
531
|
+
}
|
205
532
|
}
|
206
533
|
|
207
|
-
private
|
534
|
+
private void process(TaskSource taskSource, List<Schema> filterSchemas, int processorCount,
|
535
|
+
ProcessState state)
|
208
536
|
{
|
209
|
-
List<Future<
|
210
|
-
List<ProcessResult> joined = new ArrayList<>();
|
537
|
+
List<Future<Throwable>> futures = new ArrayList<>(processorCount);
|
211
538
|
try {
|
212
|
-
log.info("Running {} tasks using {} local threads", processorCount, maxThreads);
|
213
|
-
showProgress(processorCount);
|
214
539
|
for (int i=0; i < processorCount; i++) {
|
215
|
-
|
540
|
+
if (state.isOutputCommitted(i)) {
|
541
|
+
state.getLogger().warn("Skipped resumed task {}", i);
|
542
|
+
futures.add(null); // resumed
|
543
|
+
} else {
|
544
|
+
futures.add(startProcessor(taskSource, filterSchemas, i, state));
|
545
|
+
}
|
216
546
|
}
|
547
|
+
showProgress(state);
|
217
548
|
|
218
549
|
for (int i=0; i < processorCount; i++) {
|
550
|
+
if (futures.get(i) == null) {
|
551
|
+
continue;
|
552
|
+
}
|
219
553
|
try {
|
220
|
-
|
221
|
-
showProgress(processorCount);
|
222
|
-
|
554
|
+
state.setException(i, futures.get(i).get());
|
223
555
|
} catch (ExecutionException ex) {
|
224
|
-
|
556
|
+
state.setException(i, ex.getCause());
|
557
|
+
//Throwables.propagate(ex.getCause());
|
225
558
|
} catch (InterruptedException ex) {
|
226
|
-
|
559
|
+
state.setException(i, new ExecutionInterruptedException(ex));
|
227
560
|
}
|
561
|
+
showProgress(state);
|
228
562
|
}
|
229
|
-
return joined;
|
230
563
|
} finally {
|
231
|
-
for (
|
232
|
-
|
233
|
-
|
564
|
+
for (Future<Throwable> future : futures) {
|
565
|
+
if (future != null && !future.isDone()) {
|
566
|
+
future.cancel(true);
|
567
|
+
// TODO join?
|
568
|
+
}
|
234
569
|
}
|
235
570
|
}
|
236
571
|
}
|
237
572
|
|
238
|
-
private void showProgress(
|
573
|
+
private void showProgress(ProcessState state)
|
239
574
|
{
|
240
|
-
int
|
241
|
-
int
|
242
|
-
|
575
|
+
int total = state.getProcessorCount();
|
576
|
+
int finished = state.getFinishedCount();
|
577
|
+
int started = state.getStartedCount();
|
578
|
+
state.getLogger().info(String.format("{done:%3d / %d, running: %d}", finished, total, started - finished));
|
243
579
|
}
|
244
580
|
|
245
|
-
private Future<
|
581
|
+
private Future<Throwable> startProcessor(final TaskSource taskSource,
|
582
|
+
final List<Schema> filterSchemas, final int index,
|
583
|
+
final ProcessState state)
|
246
584
|
{
|
247
|
-
return executor.submit(new Callable<
|
248
|
-
public
|
585
|
+
return executor.submit(new Callable<Throwable>() {
|
586
|
+
public Throwable call()
|
249
587
|
{
|
588
|
+
final ExecutorTask task = taskSource.loadTask(ExecutorTask.class);
|
589
|
+
final InputPlugin in = newInputPlugin(task);
|
590
|
+
final List<FilterPlugin> filterPlugins = newFilterPlugins(task);
|
591
|
+
final OutputPlugin out = newOutputPlugin(task);
|
592
|
+
|
593
|
+
TransactionalPageOutput tran = out.open(task.getOutputTask(), last(filterSchemas), index);
|
594
|
+
PageOutput closeThis = tran;
|
595
|
+
state.start(index);
|
250
596
|
try {
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
TransactionalPageOutput tran = out.open(task.getOutputTask(), schema, index);
|
257
|
-
boolean committed = false;
|
597
|
+
PageOutput filtered = closeThis = Filters.open(filterPlugins, task.getFilterTasks(), filterSchemas, tran);
|
598
|
+
state.setInputCommitReport(index, in.run(task.getInputTask(), first(filterSchemas), index, filtered));
|
599
|
+
state.setOutputCommitReport(index, tran.commit()); // TODO check output.finish() is called. wrap or abstract
|
600
|
+
return null;
|
601
|
+
} finally {
|
258
602
|
try {
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
tran.abort();
|
603
|
+
try {
|
604
|
+
if (!state.isOutputCommitted(index)) {
|
605
|
+
tran.abort();
|
606
|
+
}
|
607
|
+
} finally {
|
608
|
+
closeThis.close();
|
266
609
|
}
|
267
|
-
|
610
|
+
} finally {
|
611
|
+
state.finish(index);
|
268
612
|
}
|
269
|
-
} finally {
|
270
|
-
runningTaskCount.getAndDecrement();
|
271
|
-
completedTaskCount.getAndIncrement();
|
272
613
|
}
|
273
614
|
}
|
274
615
|
});
|
275
616
|
}
|
617
|
+
|
618
|
+
private static Schema first(List<Schema> filterSchemas)
|
619
|
+
{
|
620
|
+
return filterSchemas.get(0);
|
621
|
+
}
|
622
|
+
|
623
|
+
private static Schema last(List<Schema> filterSchemas)
|
624
|
+
{
|
625
|
+
return filterSchemas.get(filterSchemas.size() - 1);
|
626
|
+
}
|
276
627
|
}
|