embulk 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/ChangeLog +12 -0
- data/README.md +38 -13
- data/build.gradle +6 -1
- data/embulk-cli/pom.xml +1 -1
- data/embulk-core/pom.xml +1 -1
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +87 -8
- data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +1 -1
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +16 -3
- data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/ExecutionInterruptedException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecutionResult.java +26 -0
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +37 -1
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +461 -110
- data/embulk-core/src/main/java/org/embulk/exec/PartialExecutionException.java +18 -0
- data/embulk-core/src/main/java/org/embulk/exec/ResumeState.java +82 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +35 -4
- data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +14 -3
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +55 -24
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +57 -24
- data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +21 -0
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +14 -3
- data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +87 -0
- data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +4 -2
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +16 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +15 -0
- data/embulk-standards/pom.xml +1 -1
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +16 -2
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +14 -1
- data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +14 -1
- data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +15 -3
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +15 -1
- data/lib/embulk/command/embulk_run.rb +16 -1
- data/lib/embulk/data/bundle/embulk/filter_example.rb +42 -0
- data/lib/embulk/data/bundle/embulk/input_example.rb +43 -33
- data/lib/embulk/data/bundle/embulk/output_example.rb +43 -36
- data/lib/embulk/filter_plugin.rb +86 -0
- data/lib/embulk/input_plugin.rb +37 -2
- data/lib/embulk/java/imports.rb +1 -0
- data/lib/embulk/output_plugin.rb +30 -0
- data/lib/embulk/plugin.rb +32 -19
- data/lib/embulk/schema.rb +16 -9
- data/lib/embulk/version.rb +1 -1
- data/pom.xml +1 -1
- metadata +13 -7
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +0 -10
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +0 -19
@@ -1,6 +1,7 @@
|
|
1
1
|
package org.embulk.exec;
|
2
2
|
|
3
3
|
import java.util.List;
|
4
|
+
import java.util.Arrays;
|
4
5
|
import java.util.ArrayList;
|
5
6
|
import java.util.concurrent.Callable;
|
6
7
|
import java.util.concurrent.Future;
|
@@ -13,9 +14,12 @@ import com.google.common.collect.ImmutableList;
|
|
13
14
|
import com.google.inject.Inject;
|
14
15
|
import com.google.inject.Injector;
|
15
16
|
import com.google.common.base.Throwables;
|
17
|
+
import com.google.common.base.Predicates;
|
18
|
+
import com.google.common.collect.Iterables;
|
16
19
|
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
17
20
|
import org.embulk.config.Task;
|
18
21
|
import org.embulk.config.Config;
|
22
|
+
import org.embulk.config.ConfigDefault;
|
19
23
|
import org.embulk.config.ConfigSource;
|
20
24
|
import org.embulk.config.TaskSource;
|
21
25
|
import org.embulk.config.NextConfig;
|
@@ -26,8 +30,11 @@ import org.embulk.spi.Exec;
|
|
26
30
|
import org.embulk.spi.ExecSession;
|
27
31
|
import org.embulk.spi.ExecAction;
|
28
32
|
import org.embulk.spi.InputPlugin;
|
33
|
+
import org.embulk.spi.FilterPlugin;
|
29
34
|
import org.embulk.spi.OutputPlugin;
|
35
|
+
import org.embulk.spi.PageOutput;
|
30
36
|
import org.embulk.spi.TransactionalPageOutput;
|
37
|
+
import org.embulk.spi.util.Filters;
|
31
38
|
import org.slf4j.Logger;
|
32
39
|
|
33
40
|
public class LocalExecutor
|
@@ -37,22 +44,25 @@ public class LocalExecutor
|
|
37
44
|
private final int maxThreads;
|
38
45
|
private final ExecutorService executor;
|
39
46
|
|
40
|
-
private Logger log;
|
41
|
-
private final AtomicInteger runningTaskCount;
|
42
|
-
private final AtomicInteger completedTaskCount;
|
43
|
-
|
44
47
|
public interface ExecutorTask
|
45
48
|
extends Task
|
46
49
|
{
|
47
50
|
@Config("in")
|
48
51
|
public ConfigSource getInputConfig();
|
49
52
|
|
53
|
+
@Config("filters")
|
54
|
+
@ConfigDefault("[]")
|
55
|
+
public List<ConfigSource> getFilterConfigs();
|
56
|
+
|
50
57
|
@Config("out")
|
51
58
|
public ConfigSource getOutputConfig();
|
52
59
|
|
53
60
|
public TaskSource getInputTask();
|
54
61
|
public void setInputTask(TaskSource taskSource);
|
55
62
|
|
63
|
+
public List<TaskSource> getFilterTasks();
|
64
|
+
public void setFilterTasks(List<TaskSource> taskSources);
|
65
|
+
|
56
66
|
public TaskSource getOutputTask();
|
57
67
|
public void setOutputTask(TaskSource taskSource);
|
58
68
|
}
|
@@ -71,70 +81,251 @@ public class LocalExecutor
|
|
71
81
|
.setNameFormat("embulk-executor-%d")
|
72
82
|
.setDaemon(true)
|
73
83
|
.build());
|
74
|
-
|
75
|
-
this.runningTaskCount = new AtomicInteger(0);
|
76
|
-
this.completedTaskCount = new AtomicInteger(0);
|
77
84
|
}
|
78
85
|
|
79
|
-
private static class
|
86
|
+
private static class ProcessState
|
80
87
|
{
|
81
|
-
private
|
82
|
-
private
|
88
|
+
private final Logger logger;
|
89
|
+
private volatile boolean[] started;
|
90
|
+
private volatile boolean[] finished;
|
91
|
+
private volatile Schema inputSchema;
|
92
|
+
private volatile Schema outputSchema;
|
93
|
+
private volatile Throwable[] exceptions;
|
94
|
+
private volatile CommitReport[] inputCommitReports;
|
95
|
+
private volatile CommitReport[] outputCommitReports;
|
96
|
+
private volatile NextConfig inputNextConfig;
|
97
|
+
private volatile NextConfig outputNextConfig;
|
98
|
+
private int processorCount;
|
99
|
+
|
100
|
+
public ProcessState(Logger logger)
|
101
|
+
{
|
102
|
+
this.logger = logger;
|
103
|
+
}
|
83
104
|
|
84
|
-
public
|
105
|
+
public Logger getLogger()
|
85
106
|
{
|
86
|
-
|
107
|
+
return logger;
|
87
108
|
}
|
88
109
|
|
89
|
-
public void
|
110
|
+
public void initialize(int count)
|
90
111
|
{
|
91
|
-
this.
|
112
|
+
this.started = new boolean[count];
|
113
|
+
this.finished = new boolean[count];
|
114
|
+
this.exceptions = new Throwable[count];
|
115
|
+
this.inputCommitReports = new CommitReport[count];
|
116
|
+
this.outputCommitReports = new CommitReport[count];
|
117
|
+
this.processorCount = count;
|
92
118
|
}
|
93
119
|
|
94
|
-
public
|
120
|
+
public void setInputSchema(Schema inputSchema)
|
95
121
|
{
|
96
|
-
|
122
|
+
this.inputSchema = inputSchema;
|
97
123
|
}
|
98
124
|
|
99
|
-
public
|
125
|
+
public void setOutputSchema(Schema outputSchema)
|
100
126
|
{
|
101
|
-
|
127
|
+
this.outputSchema = outputSchema;
|
102
128
|
}
|
103
129
|
|
104
|
-
public
|
130
|
+
public Schema getInputSchema()
|
105
131
|
{
|
106
|
-
|
107
|
-
|
132
|
+
return inputSchema;
|
133
|
+
}
|
134
|
+
|
135
|
+
public Schema getOutputSchema()
|
136
|
+
{
|
137
|
+
return outputSchema;
|
138
|
+
}
|
139
|
+
|
140
|
+
public boolean isAnyStarted()
|
141
|
+
{
|
142
|
+
if (started == null) {
|
143
|
+
return false;
|
108
144
|
}
|
145
|
+
for (boolean b : started) {
|
146
|
+
if (b) { return true; }
|
147
|
+
}
|
148
|
+
return false;
|
149
|
+
}
|
150
|
+
|
151
|
+
public void start(int i)
|
152
|
+
{
|
153
|
+
started[i] = true;
|
154
|
+
}
|
155
|
+
|
156
|
+
public void finish(int i)
|
157
|
+
{
|
158
|
+
finished[i] = true;
|
159
|
+
}
|
160
|
+
|
161
|
+
public int getProcessorCount()
|
162
|
+
{
|
163
|
+
return processorCount;
|
164
|
+
}
|
165
|
+
|
166
|
+
public int getStartedCount()
|
167
|
+
{
|
168
|
+
int count = 0;
|
169
|
+
for (int i=0; i < started.length; i++) {
|
170
|
+
if (started[i]) { count++; }
|
171
|
+
}
|
172
|
+
return count;
|
173
|
+
}
|
174
|
+
|
175
|
+
public int getFinishedCount()
|
176
|
+
{
|
177
|
+
int count = 0;
|
178
|
+
for (int i=0; i < finished.length; i++) {
|
179
|
+
if (finished[i]) { count++; }
|
180
|
+
}
|
181
|
+
return count;
|
182
|
+
}
|
183
|
+
|
184
|
+
public void setInputCommitReport(int i, CommitReport inputCommitReport)
|
185
|
+
{
|
186
|
+
if (inputCommitReport == null) {
|
187
|
+
inputCommitReport = Exec.newCommitReport();
|
188
|
+
}
|
189
|
+
this.inputCommitReports[i] = inputCommitReport;
|
190
|
+
}
|
191
|
+
|
192
|
+
public void setOutputCommitReport(int i, CommitReport outputCommitReport)
|
193
|
+
{
|
194
|
+
if (outputCommitReport == null) {
|
195
|
+
outputCommitReport = Exec.newCommitReport();
|
196
|
+
}
|
197
|
+
this.outputCommitReports[i] = outputCommitReport;
|
198
|
+
}
|
199
|
+
|
200
|
+
public boolean isOutputCommitted(int i)
|
201
|
+
{
|
202
|
+
return outputCommitReports[i] != null;
|
203
|
+
}
|
204
|
+
|
205
|
+
public void setException(int i, Throwable exception)
|
206
|
+
{
|
207
|
+
this.exceptions[i] = exception;
|
208
|
+
}
|
209
|
+
|
210
|
+
public boolean isAllCommitted()
|
211
|
+
{
|
212
|
+
if (processorCount <= 0) {
|
213
|
+
// not initialized
|
214
|
+
return false;
|
215
|
+
}
|
216
|
+
for (int i=0; i < processorCount; i++) {
|
217
|
+
if (!isOutputCommitted(i)) {
|
218
|
+
return false;
|
219
|
+
}
|
220
|
+
}
|
221
|
+
return true;
|
222
|
+
}
|
223
|
+
|
224
|
+
public boolean isAnyCommitted()
|
225
|
+
{
|
226
|
+
for (int i=0; i < processorCount; i++) {
|
227
|
+
if (isOutputCommitted(i)) {
|
228
|
+
return true;
|
229
|
+
}
|
230
|
+
}
|
231
|
+
return false;
|
232
|
+
}
|
233
|
+
|
234
|
+
public void setOutputNextConfig(NextConfig outputNextConfig)
|
235
|
+
{
|
109
236
|
if (outputNextConfig == null) {
|
110
237
|
outputNextConfig = Exec.newNextConfig();
|
111
238
|
}
|
112
|
-
|
113
|
-
nextConfig.getNestedOrSetEmpty("in").merge(inputNextConfig);
|
114
|
-
nextConfig.getNestedOrSetEmpty("out").merge(outputNextConfig);
|
115
|
-
return new ExecuteResult(nextConfig);
|
239
|
+
this.outputNextConfig = outputNextConfig;
|
116
240
|
}
|
117
|
-
}
|
118
241
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
242
|
+
public void setInputNextConfig(NextConfig inputNextConfig)
|
243
|
+
{
|
244
|
+
if (inputNextConfig == null) {
|
245
|
+
inputNextConfig = Exec.newNextConfig();
|
246
|
+
}
|
247
|
+
this.inputNextConfig = inputNextConfig;
|
248
|
+
}
|
123
249
|
|
124
|
-
public
|
250
|
+
public List<CommitReport> getInputCommitReports()
|
125
251
|
{
|
126
|
-
|
127
|
-
this.outputCommitReport = outputCommitReport;
|
252
|
+
return ImmutableList.copyOf(inputCommitReports);
|
128
253
|
}
|
129
254
|
|
130
|
-
public CommitReport
|
255
|
+
public List<CommitReport> getOutputCommitReports()
|
131
256
|
{
|
132
|
-
return
|
257
|
+
return ImmutableList.copyOf(outputCommitReports);
|
133
258
|
}
|
134
259
|
|
135
|
-
public
|
260
|
+
public RuntimeException getRepresentativeException()
|
136
261
|
{
|
137
|
-
|
262
|
+
RuntimeException top = null;
|
263
|
+
for (Throwable ex : exceptions) {
|
264
|
+
if (ex != null) {
|
265
|
+
if (top != null) {
|
266
|
+
top.addSuppressed(ex);
|
267
|
+
} else {
|
268
|
+
if (ex instanceof RuntimeException) {
|
269
|
+
top = (RuntimeException) ex;
|
270
|
+
} else {
|
271
|
+
top = new RuntimeException(ex);
|
272
|
+
}
|
273
|
+
}
|
274
|
+
}
|
275
|
+
}
|
276
|
+
if (top == null) {
|
277
|
+
top = new RuntimeException("Some transactions are not committed");
|
278
|
+
}
|
279
|
+
return top;
|
280
|
+
}
|
281
|
+
|
282
|
+
public int getCommittedUnclosedCount()
|
283
|
+
{
|
284
|
+
int count = 0;
|
285
|
+
for (int i=0; i < exceptions.length; i++) {
|
286
|
+
if (exceptions[i] != null && isOutputCommitted(i)) {
|
287
|
+
count++;
|
288
|
+
}
|
289
|
+
}
|
290
|
+
return count;
|
291
|
+
}
|
292
|
+
|
293
|
+
public ExecutionResult buildExecuteResult()
|
294
|
+
{
|
295
|
+
return buildExecuteResultWithWarningException(null);
|
296
|
+
}
|
297
|
+
|
298
|
+
public ExecutionResult buildExecuteResultWithWarningException(Throwable ex)
|
299
|
+
{
|
300
|
+
NextConfig nextConfig = Exec.newNextConfig();
|
301
|
+
if (inputNextConfig != null) {
|
302
|
+
nextConfig.getNestedOrSetEmpty("in").merge(inputNextConfig);
|
303
|
+
}
|
304
|
+
if (outputNextConfig != null) {
|
305
|
+
nextConfig.getNestedOrSetEmpty("out").merge(outputNextConfig);
|
306
|
+
}
|
307
|
+
|
308
|
+
ImmutableList.Builder<Throwable> ignoredExceptions = ImmutableList.builder();
|
309
|
+
for (Throwable e : exceptions) {
|
310
|
+
if (e != null) {
|
311
|
+
ignoredExceptions.add(e);
|
312
|
+
}
|
313
|
+
}
|
314
|
+
if (ex != null) {
|
315
|
+
ignoredExceptions.add(ex);
|
316
|
+
}
|
317
|
+
|
318
|
+
return new ExecutionResult(nextConfig, ignoredExceptions.build());
|
319
|
+
}
|
320
|
+
|
321
|
+
public PartialExecutionException buildPartialExecuteException(Throwable cause,
|
322
|
+
ExecutorTask task, ExecSession exec)
|
323
|
+
{
|
324
|
+
return new PartialExecutionException(cause, new ResumeState(
|
325
|
+
exec.getSessionConfigSource(),
|
326
|
+
task.getInputTask(), task.getOutputTask(),
|
327
|
+
inputSchema, outputSchema,
|
328
|
+
Arrays.asList(inputCommitReports), Arrays.asList(outputCommitReports)));
|
138
329
|
}
|
139
330
|
}
|
140
331
|
|
@@ -143,17 +334,21 @@ public class LocalExecutor
|
|
143
334
|
return Exec.newPlugin(InputPlugin.class, task.getInputConfig().get(PluginType.class, "type"));
|
144
335
|
}
|
145
336
|
|
337
|
+
protected List<FilterPlugin> newFilterPlugins(ExecutorTask task)
|
338
|
+
{
|
339
|
+
return Filters.newFilterPlugins(Exec.session(), task.getFilterConfigs());
|
340
|
+
}
|
341
|
+
|
146
342
|
protected OutputPlugin newOutputPlugin(ExecutorTask task)
|
147
343
|
{
|
148
344
|
return Exec.newPlugin(OutputPlugin.class, task.getOutputConfig().get(PluginType.class, "type"));
|
149
345
|
}
|
150
346
|
|
151
|
-
public
|
347
|
+
public ExecutionResult run(ExecSession exec, final ConfigSource config)
|
152
348
|
{
|
153
|
-
log = exec.getLogger(LocalExecutor.class);
|
154
349
|
try {
|
155
|
-
return Exec.doWith(exec, new ExecAction<
|
156
|
-
public
|
350
|
+
return Exec.doWith(exec, new ExecAction<ExecutionResult>() {
|
351
|
+
public ExecutionResult run()
|
157
352
|
{
|
158
353
|
return doRun(config);
|
159
354
|
}
|
@@ -163,114 +358,270 @@ public class LocalExecutor
|
|
163
358
|
}
|
164
359
|
}
|
165
360
|
|
166
|
-
|
361
|
+
public ExecutionResult resume(final ConfigSource config, final ResumeState resume)
|
362
|
+
{
|
363
|
+
try {
|
364
|
+
ExecSession exec = new ExecSession(injector, resume.getExecSessionConfigSource());
|
365
|
+
return Exec.doWith(exec, new ExecAction<ExecutionResult>() {
|
366
|
+
public ExecutionResult run()
|
367
|
+
{
|
368
|
+
return doResume(config, resume);
|
369
|
+
}
|
370
|
+
});
|
371
|
+
} catch (Exception ex) {
|
372
|
+
throw Throwables.propagate(ex);
|
373
|
+
}
|
374
|
+
}
|
375
|
+
|
376
|
+
public void cleanup(final ConfigSource config, final ResumeState resume)
|
377
|
+
{
|
378
|
+
try {
|
379
|
+
ExecSession exec = new ExecSession(injector, resume.getExecSessionConfigSource());
|
380
|
+
Exec.doWith(exec, new ExecAction<Void>() {
|
381
|
+
public Void run()
|
382
|
+
{
|
383
|
+
doCleanup(config, resume);
|
384
|
+
return null;
|
385
|
+
}
|
386
|
+
});
|
387
|
+
} catch (Exception ex) {
|
388
|
+
throw Throwables.propagate(ex);
|
389
|
+
}
|
390
|
+
}
|
391
|
+
|
392
|
+
public void doCleanup(ConfigSource config, ResumeState resume)
|
393
|
+
{
|
394
|
+
ExecutorTask task = config.loadConfig(ExecutorTask.class);
|
395
|
+
InputPlugin in = newInputPlugin(task);
|
396
|
+
OutputPlugin out = newOutputPlugin(task);
|
397
|
+
|
398
|
+
List<CommitReport> successInputCommitReports = ImmutableList.copyOf(
|
399
|
+
Iterables.filter(resume.getInputCommitReports(), Predicates.notNull()));
|
400
|
+
List<CommitReport> successOutputCommitReports = ImmutableList.copyOf(
|
401
|
+
Iterables.filter(resume.getOutputCommitReports(), Predicates.notNull()));
|
402
|
+
|
403
|
+
in.cleanup(resume.getInputTaskSource(), resume.getInputSchema(),
|
404
|
+
resume.getInputCommitReports().size(), successInputCommitReports);
|
405
|
+
|
406
|
+
out.cleanup(resume.getOutputTaskSource(), resume.getOutputSchema(),
|
407
|
+
resume.getOutputCommitReports().size(), successOutputCommitReports);
|
408
|
+
}
|
409
|
+
|
410
|
+
private ExecutionResult doRun(ConfigSource config)
|
167
411
|
{
|
168
412
|
final ExecutorTask task = config.loadConfig(ExecutorTask.class);
|
169
413
|
|
170
414
|
final InputPlugin in = newInputPlugin(task);
|
415
|
+
final List<FilterPlugin> filterPlugins = newFilterPlugins(task);
|
171
416
|
final OutputPlugin out = newOutputPlugin(task);
|
172
417
|
|
173
|
-
final
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
418
|
+
final ProcessState state = new ProcessState(Exec.getLogger(LocalExecutor.class));
|
419
|
+
try {
|
420
|
+
NextConfig inputNextConfig = in.transaction(task.getInputConfig(), new InputPlugin.Control() {
|
421
|
+
public List<CommitReport> run(final TaskSource inputTask, final Schema inputSchema, final int processorCount)
|
422
|
+
{
|
423
|
+
state.initialize(processorCount);
|
424
|
+
state.setInputSchema(inputSchema);
|
425
|
+
Filters.transaction(filterPlugins, task.getFilterConfigs(), inputSchema, new Filters.Control() {
|
426
|
+
public void run(final List<TaskSource> filterTasks, final List<Schema> filterSchemas)
|
427
|
+
{
|
428
|
+
Schema outputSchema = last(filterSchemas);
|
429
|
+
state.setOutputSchema(outputSchema);
|
430
|
+
NextConfig outputNextConfig = out.transaction(task.getOutputConfig(), outputSchema, processorCount, new OutputPlugin.Control() {
|
431
|
+
public List<CommitReport> run(final TaskSource outputTask)
|
432
|
+
{
|
433
|
+
task.setInputTask(inputTask);
|
434
|
+
task.setFilterTasks(filterTasks);
|
435
|
+
task.setOutputTask(outputTask);
|
436
|
+
|
437
|
+
process(task.dump(), filterSchemas, processorCount, state);
|
438
|
+
if (!state.isAllCommitted()) {
|
439
|
+
throw state.getRepresentativeException();
|
440
|
+
}
|
441
|
+
return state.getOutputCommitReports();
|
442
|
+
}
|
443
|
+
});
|
444
|
+
state.setOutputNextConfig(outputNextConfig);
|
193
445
|
}
|
446
|
+
});
|
447
|
+
return state.getInputCommitReports();
|
448
|
+
}
|
449
|
+
});
|
450
|
+
state.setInputNextConfig(inputNextConfig);
|
194
451
|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
452
|
+
return state.buildExecuteResult();
|
453
|
+
|
454
|
+
} catch (Throwable ex) {
|
455
|
+
if (state.isAllCommitted()) {
|
456
|
+
// ignore the exception
|
457
|
+
return state.buildExecuteResultWithWarningException(ex);
|
200
458
|
}
|
201
|
-
|
202
|
-
|
459
|
+
if (!state.isAnyStarted()) {
|
460
|
+
throw ex;
|
461
|
+
}
|
462
|
+
throw state.buildPartialExecuteException(ex, task, Exec.session());
|
463
|
+
}
|
464
|
+
}
|
465
|
+
|
466
|
+
private ExecutionResult doResume(ConfigSource config, final ResumeState resume)
|
467
|
+
{
|
468
|
+
final ExecutorTask task = config.loadConfig(ExecutorTask.class);
|
469
|
+
|
470
|
+
final InputPlugin in = newInputPlugin(task);
|
471
|
+
final List<FilterPlugin> filterPlugins = newFilterPlugins(task);
|
472
|
+
final OutputPlugin out = newOutputPlugin(task);
|
473
|
+
|
474
|
+
final ProcessState state = new ProcessState(Exec.getLogger(LocalExecutor.class));
|
475
|
+
try {
|
476
|
+
NextConfig inputNextConfig = in.resume(resume.getInputTaskSource(), resume.getInputSchema(), resume.getInputCommitReports().size(), new InputPlugin.Control() {
|
477
|
+
public List<CommitReport> run(final TaskSource inputTask, final Schema inputSchema, final int processorCount)
|
478
|
+
{
|
479
|
+
// TODO validate inputTask?
|
480
|
+
// TODO validate inputSchema
|
481
|
+
// TODO validate processorCount
|
482
|
+
state.initialize(processorCount);
|
483
|
+
Filters.transaction(filterPlugins, task.getFilterConfigs(), inputSchema, new Filters.Control() {
|
484
|
+
public void run(final List<TaskSource> filterTasks, final List<Schema> filterSchemas)
|
485
|
+
{
|
486
|
+
Schema outputSchema = last(filterSchemas);
|
487
|
+
state.setOutputSchema(outputSchema);
|
488
|
+
NextConfig outputNextConfig = out.resume(resume.getOutputTaskSource(), outputSchema, processorCount, new OutputPlugin.Control() {
|
489
|
+
public List<CommitReport> run(final TaskSource outputTask)
|
490
|
+
{
|
491
|
+
// TODO validate outputTask?
|
492
|
+
task.setInputTask(inputTask);
|
493
|
+
task.setFilterTasks(filterTasks);
|
494
|
+
task.setOutputTask(outputTask);
|
495
|
+
|
496
|
+
for (int i=0; i < resume.getOutputCommitReports().size(); i++) {
|
497
|
+
if (resume.getOutputCommitReports().get(i) != null) {
|
498
|
+
state.start(i);
|
499
|
+
state.setInputCommitReport(i, resume.getInputCommitReports().get(i));
|
500
|
+
state.setOutputCommitReport(i, resume.getOutputCommitReports().get(i));
|
501
|
+
state.finish(i);
|
502
|
+
}
|
503
|
+
}
|
504
|
+
|
505
|
+
process(task.dump(), filterSchemas, processorCount, state);
|
506
|
+
if (!state.isAllCommitted()) {
|
507
|
+
throw state.getRepresentativeException();
|
508
|
+
}
|
509
|
+
return state.getOutputCommitReports();
|
510
|
+
}
|
511
|
+
});
|
512
|
+
state.setOutputNextConfig(outputNextConfig);
|
513
|
+
}
|
514
|
+
});
|
515
|
+
return state.getInputCommitReports();
|
516
|
+
}
|
517
|
+
});
|
518
|
+
state.setInputNextConfig(inputNextConfig);
|
203
519
|
|
204
|
-
|
520
|
+
return state.buildExecuteResult();
|
521
|
+
|
522
|
+
} catch (Throwable ex) {
|
523
|
+
if (state.isAllCommitted()) {
|
524
|
+
// ignore the exception
|
525
|
+
return state.buildExecuteResultWithWarningException(ex);
|
526
|
+
}
|
527
|
+
if (!state.isAnyStarted()) {
|
528
|
+
throw ex;
|
529
|
+
}
|
530
|
+
throw state.buildPartialExecuteException(ex, task, Exec.session());
|
531
|
+
}
|
205
532
|
}
|
206
533
|
|
207
|
-
private
|
534
|
+
private void process(TaskSource taskSource, List<Schema> filterSchemas, int processorCount,
|
535
|
+
ProcessState state)
|
208
536
|
{
|
209
|
-
List<Future<
|
210
|
-
List<ProcessResult> joined = new ArrayList<>();
|
537
|
+
List<Future<Throwable>> futures = new ArrayList<>(processorCount);
|
211
538
|
try {
|
212
|
-
log.info("Running {} tasks using {} local threads", processorCount, maxThreads);
|
213
|
-
showProgress(processorCount);
|
214
539
|
for (int i=0; i < processorCount; i++) {
|
215
|
-
|
540
|
+
if (state.isOutputCommitted(i)) {
|
541
|
+
state.getLogger().warn("Skipped resumed task {}", i);
|
542
|
+
futures.add(null); // resumed
|
543
|
+
} else {
|
544
|
+
futures.add(startProcessor(taskSource, filterSchemas, i, state));
|
545
|
+
}
|
216
546
|
}
|
547
|
+
showProgress(state);
|
217
548
|
|
218
549
|
for (int i=0; i < processorCount; i++) {
|
550
|
+
if (futures.get(i) == null) {
|
551
|
+
continue;
|
552
|
+
}
|
219
553
|
try {
|
220
|
-
|
221
|
-
showProgress(processorCount);
|
222
|
-
|
554
|
+
state.setException(i, futures.get(i).get());
|
223
555
|
} catch (ExecutionException ex) {
|
224
|
-
|
556
|
+
state.setException(i, ex.getCause());
|
557
|
+
//Throwables.propagate(ex.getCause());
|
225
558
|
} catch (InterruptedException ex) {
|
226
|
-
|
559
|
+
state.setException(i, new ExecutionInterruptedException(ex));
|
227
560
|
}
|
561
|
+
showProgress(state);
|
228
562
|
}
|
229
|
-
return joined;
|
230
563
|
} finally {
|
231
|
-
for (
|
232
|
-
|
233
|
-
|
564
|
+
for (Future<Throwable> future : futures) {
|
565
|
+
if (future != null && !future.isDone()) {
|
566
|
+
future.cancel(true);
|
567
|
+
// TODO join?
|
568
|
+
}
|
234
569
|
}
|
235
570
|
}
|
236
571
|
}
|
237
572
|
|
238
|
-
private void showProgress(
|
573
|
+
private void showProgress(ProcessState state)
|
239
574
|
{
|
240
|
-
int
|
241
|
-
int
|
242
|
-
|
575
|
+
int total = state.getProcessorCount();
|
576
|
+
int finished = state.getFinishedCount();
|
577
|
+
int started = state.getStartedCount();
|
578
|
+
state.getLogger().info(String.format("{done:%3d / %d, running: %d}", finished, total, started - finished));
|
243
579
|
}
|
244
580
|
|
245
|
-
private Future<
|
581
|
+
private Future<Throwable> startProcessor(final TaskSource taskSource,
|
582
|
+
final List<Schema> filterSchemas, final int index,
|
583
|
+
final ProcessState state)
|
246
584
|
{
|
247
|
-
return executor.submit(new Callable<
|
248
|
-
public
|
585
|
+
return executor.submit(new Callable<Throwable>() {
|
586
|
+
public Throwable call()
|
249
587
|
{
|
588
|
+
final ExecutorTask task = taskSource.loadTask(ExecutorTask.class);
|
589
|
+
final InputPlugin in = newInputPlugin(task);
|
590
|
+
final List<FilterPlugin> filterPlugins = newFilterPlugins(task);
|
591
|
+
final OutputPlugin out = newOutputPlugin(task);
|
592
|
+
|
593
|
+
TransactionalPageOutput tran = out.open(task.getOutputTask(), last(filterSchemas), index);
|
594
|
+
PageOutput closeThis = tran;
|
595
|
+
state.start(index);
|
250
596
|
try {
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
TransactionalPageOutput tran = out.open(task.getOutputTask(), schema, index);
|
257
|
-
boolean committed = false;
|
597
|
+
PageOutput filtered = closeThis = Filters.open(filterPlugins, task.getFilterTasks(), filterSchemas, tran);
|
598
|
+
state.setInputCommitReport(index, in.run(task.getInputTask(), first(filterSchemas), index, filtered));
|
599
|
+
state.setOutputCommitReport(index, tran.commit()); // TODO check output.finish() is called. wrap or abstract
|
600
|
+
return null;
|
601
|
+
} finally {
|
258
602
|
try {
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
tran.abort();
|
603
|
+
try {
|
604
|
+
if (!state.isOutputCommitted(index)) {
|
605
|
+
tran.abort();
|
606
|
+
}
|
607
|
+
} finally {
|
608
|
+
closeThis.close();
|
266
609
|
}
|
267
|
-
|
610
|
+
} finally {
|
611
|
+
state.finish(index);
|
268
612
|
}
|
269
|
-
} finally {
|
270
|
-
runningTaskCount.getAndDecrement();
|
271
|
-
completedTaskCount.getAndIncrement();
|
272
613
|
}
|
273
614
|
}
|
274
615
|
});
|
275
616
|
}
|
617
|
+
|
618
|
+
private static Schema first(List<Schema> filterSchemas)
|
619
|
+
{
|
620
|
+
return filterSchemas.get(0);
|
621
|
+
}
|
622
|
+
|
623
|
+
private static Schema last(List<Schema> filterSchemas)
|
624
|
+
{
|
625
|
+
return filterSchemas.get(filterSchemas.size() - 1);
|
626
|
+
}
|
276
627
|
}
|