embulk 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/ChangeLog +12 -0
- data/README.md +38 -13
- data/build.gradle +6 -1
- data/embulk-cli/pom.xml +1 -1
- data/embulk-core/pom.xml +1 -1
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +87 -8
- data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +1 -1
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +16 -3
- data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/ExecutionInterruptedException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecutionResult.java +26 -0
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +37 -1
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +461 -110
- data/embulk-core/src/main/java/org/embulk/exec/PartialExecutionException.java +18 -0
- data/embulk-core/src/main/java/org/embulk/exec/ResumeState.java +82 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +35 -4
- data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +14 -3
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +55 -24
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +57 -24
- data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +21 -0
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +14 -3
- data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +87 -0
- data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +4 -2
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +16 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +15 -0
- data/embulk-standards/pom.xml +1 -1
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +16 -2
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +14 -1
- data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +14 -1
- data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +15 -3
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +15 -1
- data/lib/embulk/command/embulk_run.rb +16 -1
- data/lib/embulk/data/bundle/embulk/filter_example.rb +42 -0
- data/lib/embulk/data/bundle/embulk/input_example.rb +43 -33
- data/lib/embulk/data/bundle/embulk/output_example.rb +43 -36
- data/lib/embulk/filter_plugin.rb +86 -0
- data/lib/embulk/input_plugin.rb +37 -2
- data/lib/embulk/java/imports.rb +1 -0
- data/lib/embulk/output_plugin.rb +30 -0
- data/lib/embulk/plugin.rb +32 -19
- data/lib/embulk/schema.rb +16 -9
- data/lib/embulk/version.rb +1 -1
- data/pom.xml +1 -1
- metadata +13 -7
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +0 -10
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +0 -19
@@ -0,0 +1,18 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
public class PartialExecutionException
|
4
|
+
extends RuntimeException
|
5
|
+
{
|
6
|
+
private final ResumeState resumeState;
|
7
|
+
|
8
|
+
public PartialExecutionException(Throwable cause, ResumeState resumeState)
|
9
|
+
{
|
10
|
+
super(cause);
|
11
|
+
this.resumeState = resumeState;
|
12
|
+
}
|
13
|
+
|
14
|
+
public ResumeState getResumeState()
|
15
|
+
{
|
16
|
+
return resumeState;
|
17
|
+
}
|
18
|
+
}
|
@@ -0,0 +1,82 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
5
|
+
import com.fasterxml.jackson.annotation.JsonProperty;
|
6
|
+
import org.embulk.config.TaskSource;
|
7
|
+
import org.embulk.config.ConfigSource;
|
8
|
+
import org.embulk.config.CommitReport;
|
9
|
+
import org.embulk.spi.Schema;
|
10
|
+
import org.embulk.spi.ExecSession;
|
11
|
+
|
12
|
+
public class ResumeState
|
13
|
+
{
|
14
|
+
private final ConfigSource execSessionConfigSource;
|
15
|
+
private final TaskSource inputTaskSource;
|
16
|
+
private final TaskSource outputTaskSource;
|
17
|
+
private final Schema inputSchema;
|
18
|
+
private final Schema outputSchema;
|
19
|
+
private final List<CommitReport> inputCommitReports;
|
20
|
+
private final List<CommitReport> outputCommitReports;
|
21
|
+
|
22
|
+
@JsonCreator
|
23
|
+
public ResumeState(
|
24
|
+
@JsonProperty("exec_task") ConfigSource execSessionConfigSource,
|
25
|
+
@JsonProperty("in_task") TaskSource inputTaskSource,
|
26
|
+
@JsonProperty("out_task") TaskSource outputTaskSource,
|
27
|
+
@JsonProperty("in_schema") Schema inputSchema,
|
28
|
+
@JsonProperty("out_schema") Schema outputSchema,
|
29
|
+
@JsonProperty("in_reports") List<CommitReport> inputCommitReports,
|
30
|
+
@JsonProperty("out_reports") List<CommitReport> outputCommitReports)
|
31
|
+
{
|
32
|
+
this.execSessionConfigSource = execSessionConfigSource;
|
33
|
+
this.inputTaskSource = inputTaskSource;
|
34
|
+
this.outputTaskSource = outputTaskSource;
|
35
|
+
this.inputSchema = inputSchema;
|
36
|
+
this.outputSchema = outputSchema;
|
37
|
+
this.inputCommitReports = inputCommitReports;
|
38
|
+
this.outputCommitReports = outputCommitReports;
|
39
|
+
}
|
40
|
+
|
41
|
+
@JsonProperty("exec_task")
|
42
|
+
public ConfigSource getExecSessionConfigSource()
|
43
|
+
{
|
44
|
+
return execSessionConfigSource;
|
45
|
+
}
|
46
|
+
|
47
|
+
@JsonProperty("in_task")
|
48
|
+
public TaskSource getInputTaskSource()
|
49
|
+
{
|
50
|
+
return inputTaskSource;
|
51
|
+
}
|
52
|
+
|
53
|
+
@JsonProperty("out_task")
|
54
|
+
public TaskSource getOutputTaskSource()
|
55
|
+
{
|
56
|
+
return outputTaskSource;
|
57
|
+
}
|
58
|
+
|
59
|
+
@JsonProperty("in_schema")
|
60
|
+
public Schema getInputSchema()
|
61
|
+
{
|
62
|
+
return inputSchema;
|
63
|
+
}
|
64
|
+
|
65
|
+
@JsonProperty("out_schema")
|
66
|
+
public Schema getOutputSchema()
|
67
|
+
{
|
68
|
+
return outputSchema;
|
69
|
+
}
|
70
|
+
|
71
|
+
@JsonProperty("in_reports")
|
72
|
+
public List<CommitReport> getInputCommitReports()
|
73
|
+
{
|
74
|
+
return inputCommitReports;
|
75
|
+
}
|
76
|
+
|
77
|
+
@JsonProperty("out_reports")
|
78
|
+
public List<CommitReport> getOutputCommitReports()
|
79
|
+
{
|
80
|
+
return outputCommitReports;
|
81
|
+
}
|
82
|
+
}
|
@@ -12,7 +12,7 @@ import org.embulk.spi.ParserPlugin;
|
|
12
12
|
import org.embulk.spi.FormatterPlugin;
|
13
13
|
import org.embulk.spi.DecoderPlugin;
|
14
14
|
import org.embulk.spi.EncoderPlugin;
|
15
|
-
|
15
|
+
import org.embulk.spi.FilterPlugin;
|
16
16
|
import org.embulk.spi.GuessPlugin;
|
17
17
|
|
18
18
|
public class JRubyPluginSource
|
@@ -50,8 +50,8 @@ public class JRubyPluginSource
|
|
50
50
|
category = "decoder";
|
51
51
|
} else if (EncoderPlugin.class.isAssignableFrom(iface)) {
|
52
52
|
category = "encoder";
|
53
|
-
|
54
|
-
|
53
|
+
} else if (FilterPlugin.class.isAssignableFrom(iface)) {
|
54
|
+
category = "filter";
|
55
55
|
} else if (GuessPlugin.class.isAssignableFrom(iface)) {
|
56
56
|
category = "guess";
|
57
57
|
} else {
|
@@ -3,7 +3,11 @@ package org.embulk.spi;
|
|
3
3
|
import org.joda.time.DateTimeZone;
|
4
4
|
import org.slf4j.Logger;
|
5
5
|
import org.slf4j.ILoggerFactory;
|
6
|
+
import com.google.common.base.Optional;
|
6
7
|
import com.google.inject.Injector;
|
8
|
+
import org.embulk.config.Task;
|
9
|
+
import org.embulk.config.Config;
|
10
|
+
import org.embulk.config.ConfigDefault;
|
7
11
|
import org.embulk.config.ModelManager;
|
8
12
|
import org.embulk.config.CommitReport;
|
9
13
|
import org.embulk.config.NextConfig;
|
@@ -26,18 +30,45 @@ public class ExecSession
|
|
26
30
|
private final Timestamp transactionTime;
|
27
31
|
private final DateTimeZone transactionTimeZone;
|
28
32
|
|
33
|
+
public interface SessionTask
|
34
|
+
extends Task
|
35
|
+
{
|
36
|
+
@Config("transaction_time")
|
37
|
+
@ConfigDefault("null")
|
38
|
+
Optional<Timestamp> getTransactionTime();
|
39
|
+
|
40
|
+
@Config("transaction_time_zone")
|
41
|
+
@ConfigDefault("\"UTC\"")
|
42
|
+
DateTimeZone getTransactionTimeZone();
|
43
|
+
}
|
44
|
+
|
29
45
|
public ExecSession(Injector injector, ConfigSource execConfig)
|
30
46
|
{
|
31
|
-
|
47
|
+
this(injector, execConfig.loadConfig(SessionTask.class));
|
48
|
+
}
|
49
|
+
|
50
|
+
public ExecSession(Injector injector, TaskSource taskSource)
|
51
|
+
{
|
52
|
+
this(injector, taskSource.loadTask(SessionTask.class));
|
53
|
+
}
|
54
|
+
|
55
|
+
public ExecSession(Injector injector, SessionTask task)
|
56
|
+
{
|
32
57
|
this.injector = injector;
|
33
58
|
this.loggerFactory = injector.getInstance(ILoggerFactory.class);
|
34
59
|
this.modelManager = injector.getInstance(ModelManager.class);
|
35
60
|
this.pluginManager = injector.getInstance(PluginManager.class);
|
36
61
|
this.bufferAllocator = injector.getInstance(BufferAllocator.class);
|
37
62
|
|
38
|
-
this.transactionTime =
|
39
|
-
|
40
|
-
|
63
|
+
this.transactionTime = task.getTransactionTime().or(Timestamp.ofEpochMilli(System.currentTimeMillis())); // TODO get nanoseconds for default
|
64
|
+
this.transactionTimeZone = task.getTransactionTimeZone();
|
65
|
+
}
|
66
|
+
|
67
|
+
public ConfigSource getSessionConfigSource()
|
68
|
+
{
|
69
|
+
return newConfigSource()
|
70
|
+
.set("transaction_time", transactionTime)
|
71
|
+
.set("transaction_time_zone", transactionTimeZone);
|
41
72
|
}
|
42
73
|
|
43
74
|
public Injector getInjector()
|
@@ -10,10 +10,21 @@ public interface FileInputPlugin
|
|
10
10
|
{
|
11
11
|
public interface Control
|
12
12
|
{
|
13
|
-
public List<CommitReport> run(TaskSource taskSource,
|
13
|
+
public List<CommitReport> run(TaskSource taskSource,
|
14
|
+
int processorCount);
|
14
15
|
}
|
15
16
|
|
16
|
-
public NextConfig transaction(ConfigSource config,
|
17
|
+
public NextConfig transaction(ConfigSource config,
|
18
|
+
FileInputPlugin.Control control);
|
17
19
|
|
18
|
-
public
|
20
|
+
public NextConfig resume(TaskSource taskSource,
|
21
|
+
int processorCount,
|
22
|
+
FileInputPlugin.Control control);
|
23
|
+
|
24
|
+
public void cleanup(TaskSource taskSource,
|
25
|
+
int processorCount,
|
26
|
+
List<CommitReport> successCommitReports);
|
27
|
+
|
28
|
+
public TransactionalFileInput open(TaskSource taskSource,
|
29
|
+
int processorIndex);
|
19
30
|
}
|
@@ -57,30 +57,61 @@ public class FileInputRunner
|
|
57
57
|
public NextConfig transaction(ConfigSource config, final InputPlugin.Control control)
|
58
58
|
{
|
59
59
|
final RunnerTask task = config.loadConfig(RunnerTask.class);
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
60
|
+
return fileInputPlugin.transaction(config, new RunnerControl(task, control));
|
61
|
+
}
|
62
|
+
|
63
|
+
public NextConfig resume(TaskSource taskSource,
|
64
|
+
Schema schema, int processorCount,
|
65
|
+
InputPlugin.Control control)
|
66
|
+
{
|
67
|
+
final RunnerTask task = taskSource.loadTask(RunnerTask.class);
|
68
|
+
return fileInputPlugin.resume(task.getFileInputTaskSource(), processorCount, new RunnerControl(task, control));
|
69
|
+
}
|
70
|
+
|
71
|
+
private class RunnerControl
|
72
|
+
implements FileInputPlugin.Control
|
73
|
+
{
|
74
|
+
private final RunnerTask task;
|
75
|
+
private final List<DecoderPlugin> decoderPlugins;
|
76
|
+
private final ParserPlugin parserPlugin;
|
77
|
+
private final InputPlugin.Control nextControl;
|
78
|
+
|
79
|
+
public RunnerControl(RunnerTask task, InputPlugin.Control nextControl)
|
80
|
+
{
|
81
|
+
this.task = task;
|
82
|
+
// create plugins earlier than run() to throw exceptions early
|
83
|
+
this.decoderPlugins = newDecoderPlugins(task);
|
84
|
+
this.parserPlugin = newParserPlugin(task);
|
85
|
+
this.nextControl = nextControl;
|
86
|
+
}
|
87
|
+
|
88
|
+
@Override
|
89
|
+
public List<CommitReport> run(final TaskSource fileInputTaskSource, final int processorCount)
|
90
|
+
{
|
91
|
+
final List<CommitReport> commitReports = new ArrayList<CommitReport>();
|
92
|
+
Decoders.transaction(decoderPlugins, task.getDecoderConfigs(), new Decoders.Control() {
|
93
|
+
public void run(final List<TaskSource> decoderTaskSources)
|
94
|
+
{
|
95
|
+
parserPlugin.transaction(task.getParserConfig(), new ParserPlugin.Control() {
|
96
|
+
public void run(final TaskSource parserTaskSource, final Schema schema)
|
97
|
+
{
|
98
|
+
task.setFileInputTaskSource(fileInputTaskSource);
|
99
|
+
task.setDecoderTaskSources(decoderTaskSources);
|
100
|
+
task.setParserTaskSource(parserTaskSource);
|
101
|
+
commitReports.addAll(nextControl.run(task.dump(), schema, processorCount));
|
102
|
+
}
|
103
|
+
});
|
104
|
+
}
|
105
|
+
});
|
106
|
+
return commitReports;
|
107
|
+
}
|
108
|
+
}
|
109
|
+
|
110
|
+
public void cleanup(TaskSource taskSource,
|
111
|
+
Schema schema, int processorCount,
|
112
|
+
List<CommitReport> successCommitReports)
|
113
|
+
{
|
114
|
+
fileInputPlugin.cleanup(taskSource, processorCount, successCommitReports);
|
84
115
|
}
|
85
116
|
|
86
117
|
@Override
|
@@ -16,5 +16,13 @@ public interface FileOutputPlugin
|
|
16
16
|
public NextConfig transaction(ConfigSource config, int processorCount,
|
17
17
|
FileOutputPlugin.Control control);
|
18
18
|
|
19
|
+
public NextConfig resume(TaskSource taskSource,
|
20
|
+
int processorCount,
|
21
|
+
FileOutputPlugin.Control control);
|
22
|
+
|
23
|
+
public void cleanup(TaskSource taskSource,
|
24
|
+
int processorCount,
|
25
|
+
List<CommitReport> successCommitReports);
|
26
|
+
|
19
27
|
public TransactionalFileOutput open(TaskSource taskSource, int processorIndex);
|
20
28
|
}
|
@@ -60,30 +60,63 @@ public class FileOutputRunner
|
|
60
60
|
final OutputPlugin.Control control)
|
61
61
|
{
|
62
62
|
final RunnerTask task = config.loadConfig(RunnerTask.class);
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
63
|
+
return fileOutputPlugin.transaction(config, processorCount, new RunnerControl(schema, task, control));
|
64
|
+
}
|
65
|
+
|
66
|
+
public NextConfig resume(TaskSource taskSource,
|
67
|
+
Schema schema, int processorCount,
|
68
|
+
final OutputPlugin.Control control)
|
69
|
+
{
|
70
|
+
final RunnerTask task = taskSource.loadTask(RunnerTask.class);
|
71
|
+
return fileOutputPlugin.resume(task.getFileOutputTaskSource(), processorCount, new RunnerControl(schema, task, control));
|
72
|
+
}
|
73
|
+
|
74
|
+
private class RunnerControl
|
75
|
+
implements FileOutputPlugin.Control
|
76
|
+
{
|
77
|
+
private final Schema schema;
|
78
|
+
private final RunnerTask task;
|
79
|
+
private final List<EncoderPlugin> encoderPlugins;
|
80
|
+
private final FormatterPlugin formatterPlugin;
|
81
|
+
private final OutputPlugin.Control nextControl;
|
82
|
+
|
83
|
+
public RunnerControl(Schema schema, RunnerTask task, OutputPlugin.Control nextControl)
|
84
|
+
{
|
85
|
+
this.schema = schema;
|
86
|
+
this.task = task;
|
87
|
+
// create plugins earlier than run() to throw exceptions early
|
88
|
+
this.encoderPlugins = newEncoderPlugins(task);
|
89
|
+
this.formatterPlugin = newFormatterPlugin(task);
|
90
|
+
this.nextControl = nextControl;
|
91
|
+
}
|
92
|
+
|
93
|
+
@Override
|
94
|
+
public List<CommitReport> run(final TaskSource fileOutputTaskSource)
|
95
|
+
{
|
96
|
+
final List<CommitReport> commitReports = new ArrayList<CommitReport>();
|
97
|
+
Encoders.transaction(encoderPlugins, task.getEncoderConfigs(), new Encoders.Control() {
|
98
|
+
public void run(final List<TaskSource> encoderTaskSources)
|
99
|
+
{
|
100
|
+
formatterPlugin.transaction(task.getFormatterConfig(), schema, new FormatterPlugin.Control() {
|
101
|
+
public void run(final TaskSource formatterTaskSource)
|
102
|
+
{
|
103
|
+
task.setFileOutputTaskSource(fileOutputTaskSource);
|
104
|
+
task.setEncoderTaskSources(encoderTaskSources);
|
105
|
+
task.setFormatterTaskSource(formatterTaskSource);
|
106
|
+
commitReports.addAll(nextControl.run(task.dump()));
|
107
|
+
}
|
108
|
+
});
|
109
|
+
}
|
110
|
+
});
|
111
|
+
return commitReports;
|
112
|
+
}
|
113
|
+
}
|
114
|
+
|
115
|
+
public void cleanup(TaskSource taskSource,
|
116
|
+
Schema schema, int processorCount,
|
117
|
+
List<CommitReport> successCommitReports)
|
118
|
+
{
|
119
|
+
fileOutputPlugin.cleanup(taskSource, processorCount, successCommitReports);
|
87
120
|
}
|
88
121
|
|
89
122
|
@Override
|
@@ -0,0 +1,21 @@
|
|
1
|
+
package org.embulk.spi;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import org.embulk.config.TaskSource;
|
5
|
+
import org.embulk.config.ConfigSource;
|
6
|
+
import org.embulk.config.NextConfig;
|
7
|
+
import org.embulk.config.CommitReport;
|
8
|
+
|
9
|
+
public interface FilterPlugin
|
10
|
+
{
|
11
|
+
public interface Control
|
12
|
+
{
|
13
|
+
public void run(TaskSource taskSource, Schema outputSchema);
|
14
|
+
}
|
15
|
+
|
16
|
+
public void transaction(ConfigSource config, Schema inputSchema,
|
17
|
+
FilterPlugin.Control control);
|
18
|
+
|
19
|
+
public PageOutput open(TaskSource taskSource, Schema inputSchema,
|
20
|
+
Schema outputSchema, PageOutput output);
|
21
|
+
}
|
@@ -10,11 +10,22 @@ public interface InputPlugin
|
|
10
10
|
{
|
11
11
|
public interface Control
|
12
12
|
{
|
13
|
-
public List<CommitReport> run(TaskSource taskSource,
|
13
|
+
public List<CommitReport> run(TaskSource taskSource,
|
14
|
+
Schema schema, int processorCount);
|
14
15
|
}
|
15
16
|
|
16
|
-
public NextConfig transaction(ConfigSource config,
|
17
|
+
public NextConfig transaction(ConfigSource config,
|
18
|
+
InputPlugin.Control control);
|
17
19
|
|
18
|
-
public
|
20
|
+
public NextConfig resume(TaskSource taskSource,
|
21
|
+
Schema schema, int processorCount,
|
22
|
+
InputPlugin.Control control);
|
23
|
+
|
24
|
+
public void cleanup(TaskSource taskSource,
|
25
|
+
Schema schema, int processorCount,
|
26
|
+
List<CommitReport> successCommitReports);
|
27
|
+
|
28
|
+
public CommitReport run(TaskSource taskSource,
|
29
|
+
Schema schema, int processorIndex,
|
19
30
|
PageOutput output);
|
20
31
|
}
|