embulk 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/ChangeLog +12 -0
- data/README.md +38 -13
- data/build.gradle +6 -1
- data/embulk-cli/pom.xml +1 -1
- data/embulk-core/pom.xml +1 -1
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +87 -8
- data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +1 -1
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +16 -3
- data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/ExecutionInterruptedException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecutionResult.java +26 -0
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +37 -1
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +461 -110
- data/embulk-core/src/main/java/org/embulk/exec/PartialExecutionException.java +18 -0
- data/embulk-core/src/main/java/org/embulk/exec/ResumeState.java +82 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +35 -4
- data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +14 -3
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +55 -24
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +57 -24
- data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +21 -0
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +14 -3
- data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +87 -0
- data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +4 -2
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +16 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +15 -0
- data/embulk-standards/pom.xml +1 -1
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +16 -2
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +14 -1
- data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +14 -1
- data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +15 -3
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +15 -1
- data/lib/embulk/command/embulk_run.rb +16 -1
- data/lib/embulk/data/bundle/embulk/filter_example.rb +42 -0
- data/lib/embulk/data/bundle/embulk/input_example.rb +43 -33
- data/lib/embulk/data/bundle/embulk/output_example.rb +43 -36
- data/lib/embulk/filter_plugin.rb +86 -0
- data/lib/embulk/input_plugin.rb +37 -2
- data/lib/embulk/java/imports.rb +1 -0
- data/lib/embulk/output_plugin.rb +30 -0
- data/lib/embulk/plugin.rb +32 -19
- data/lib/embulk/schema.rb +16 -9
- data/lib/embulk/version.rb +1 -1
- data/pom.xml +1 -1
- metadata +13 -7
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +0 -10
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +0 -19
@@ -0,0 +1,18 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
public class PartialExecutionException
|
4
|
+
extends RuntimeException
|
5
|
+
{
|
6
|
+
private final ResumeState resumeState;
|
7
|
+
|
8
|
+
public PartialExecutionException(Throwable cause, ResumeState resumeState)
|
9
|
+
{
|
10
|
+
super(cause);
|
11
|
+
this.resumeState = resumeState;
|
12
|
+
}
|
13
|
+
|
14
|
+
public ResumeState getResumeState()
|
15
|
+
{
|
16
|
+
return resumeState;
|
17
|
+
}
|
18
|
+
}
|
@@ -0,0 +1,82 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
5
|
+
import com.fasterxml.jackson.annotation.JsonProperty;
|
6
|
+
import org.embulk.config.TaskSource;
|
7
|
+
import org.embulk.config.ConfigSource;
|
8
|
+
import org.embulk.config.CommitReport;
|
9
|
+
import org.embulk.spi.Schema;
|
10
|
+
import org.embulk.spi.ExecSession;
|
11
|
+
|
12
|
+
public class ResumeState
|
13
|
+
{
|
14
|
+
private final ConfigSource execSessionConfigSource;
|
15
|
+
private final TaskSource inputTaskSource;
|
16
|
+
private final TaskSource outputTaskSource;
|
17
|
+
private final Schema inputSchema;
|
18
|
+
private final Schema outputSchema;
|
19
|
+
private final List<CommitReport> inputCommitReports;
|
20
|
+
private final List<CommitReport> outputCommitReports;
|
21
|
+
|
22
|
+
@JsonCreator
|
23
|
+
public ResumeState(
|
24
|
+
@JsonProperty("exec_task") ConfigSource execSessionConfigSource,
|
25
|
+
@JsonProperty("in_task") TaskSource inputTaskSource,
|
26
|
+
@JsonProperty("out_task") TaskSource outputTaskSource,
|
27
|
+
@JsonProperty("in_schema") Schema inputSchema,
|
28
|
+
@JsonProperty("out_schema") Schema outputSchema,
|
29
|
+
@JsonProperty("in_reports") List<CommitReport> inputCommitReports,
|
30
|
+
@JsonProperty("out_reports") List<CommitReport> outputCommitReports)
|
31
|
+
{
|
32
|
+
this.execSessionConfigSource = execSessionConfigSource;
|
33
|
+
this.inputTaskSource = inputTaskSource;
|
34
|
+
this.outputTaskSource = outputTaskSource;
|
35
|
+
this.inputSchema = inputSchema;
|
36
|
+
this.outputSchema = outputSchema;
|
37
|
+
this.inputCommitReports = inputCommitReports;
|
38
|
+
this.outputCommitReports = outputCommitReports;
|
39
|
+
}
|
40
|
+
|
41
|
+
@JsonProperty("exec_task")
|
42
|
+
public ConfigSource getExecSessionConfigSource()
|
43
|
+
{
|
44
|
+
return execSessionConfigSource;
|
45
|
+
}
|
46
|
+
|
47
|
+
@JsonProperty("in_task")
|
48
|
+
public TaskSource getInputTaskSource()
|
49
|
+
{
|
50
|
+
return inputTaskSource;
|
51
|
+
}
|
52
|
+
|
53
|
+
@JsonProperty("out_task")
|
54
|
+
public TaskSource getOutputTaskSource()
|
55
|
+
{
|
56
|
+
return outputTaskSource;
|
57
|
+
}
|
58
|
+
|
59
|
+
@JsonProperty("in_schema")
|
60
|
+
public Schema getInputSchema()
|
61
|
+
{
|
62
|
+
return inputSchema;
|
63
|
+
}
|
64
|
+
|
65
|
+
@JsonProperty("out_schema")
|
66
|
+
public Schema getOutputSchema()
|
67
|
+
{
|
68
|
+
return outputSchema;
|
69
|
+
}
|
70
|
+
|
71
|
+
@JsonProperty("in_reports")
|
72
|
+
public List<CommitReport> getInputCommitReports()
|
73
|
+
{
|
74
|
+
return inputCommitReports;
|
75
|
+
}
|
76
|
+
|
77
|
+
@JsonProperty("out_reports")
|
78
|
+
public List<CommitReport> getOutputCommitReports()
|
79
|
+
{
|
80
|
+
return outputCommitReports;
|
81
|
+
}
|
82
|
+
}
|
@@ -12,7 +12,7 @@ import org.embulk.spi.ParserPlugin;
|
|
12
12
|
import org.embulk.spi.FormatterPlugin;
|
13
13
|
import org.embulk.spi.DecoderPlugin;
|
14
14
|
import org.embulk.spi.EncoderPlugin;
|
15
|
-
|
15
|
+
import org.embulk.spi.FilterPlugin;
|
16
16
|
import org.embulk.spi.GuessPlugin;
|
17
17
|
|
18
18
|
public class JRubyPluginSource
|
@@ -50,8 +50,8 @@ public class JRubyPluginSource
|
|
50
50
|
category = "decoder";
|
51
51
|
} else if (EncoderPlugin.class.isAssignableFrom(iface)) {
|
52
52
|
category = "encoder";
|
53
|
-
|
54
|
-
|
53
|
+
} else if (FilterPlugin.class.isAssignableFrom(iface)) {
|
54
|
+
category = "filter";
|
55
55
|
} else if (GuessPlugin.class.isAssignableFrom(iface)) {
|
56
56
|
category = "guess";
|
57
57
|
} else {
|
@@ -3,7 +3,11 @@ package org.embulk.spi;
|
|
3
3
|
import org.joda.time.DateTimeZone;
|
4
4
|
import org.slf4j.Logger;
|
5
5
|
import org.slf4j.ILoggerFactory;
|
6
|
+
import com.google.common.base.Optional;
|
6
7
|
import com.google.inject.Injector;
|
8
|
+
import org.embulk.config.Task;
|
9
|
+
import org.embulk.config.Config;
|
10
|
+
import org.embulk.config.ConfigDefault;
|
7
11
|
import org.embulk.config.ModelManager;
|
8
12
|
import org.embulk.config.CommitReport;
|
9
13
|
import org.embulk.config.NextConfig;
|
@@ -26,18 +30,45 @@ public class ExecSession
|
|
26
30
|
private final Timestamp transactionTime;
|
27
31
|
private final DateTimeZone transactionTimeZone;
|
28
32
|
|
33
|
+
public interface SessionTask
|
34
|
+
extends Task
|
35
|
+
{
|
36
|
+
@Config("transaction_time")
|
37
|
+
@ConfigDefault("null")
|
38
|
+
Optional<Timestamp> getTransactionTime();
|
39
|
+
|
40
|
+
@Config("transaction_time_zone")
|
41
|
+
@ConfigDefault("\"UTC\"")
|
42
|
+
DateTimeZone getTransactionTimeZone();
|
43
|
+
}
|
44
|
+
|
29
45
|
public ExecSession(Injector injector, ConfigSource execConfig)
|
30
46
|
{
|
31
|
-
|
47
|
+
this(injector, execConfig.loadConfig(SessionTask.class));
|
48
|
+
}
|
49
|
+
|
50
|
+
public ExecSession(Injector injector, TaskSource taskSource)
|
51
|
+
{
|
52
|
+
this(injector, taskSource.loadTask(SessionTask.class));
|
53
|
+
}
|
54
|
+
|
55
|
+
public ExecSession(Injector injector, SessionTask task)
|
56
|
+
{
|
32
57
|
this.injector = injector;
|
33
58
|
this.loggerFactory = injector.getInstance(ILoggerFactory.class);
|
34
59
|
this.modelManager = injector.getInstance(ModelManager.class);
|
35
60
|
this.pluginManager = injector.getInstance(PluginManager.class);
|
36
61
|
this.bufferAllocator = injector.getInstance(BufferAllocator.class);
|
37
62
|
|
38
|
-
this.transactionTime =
|
39
|
-
|
40
|
-
|
63
|
+
this.transactionTime = task.getTransactionTime().or(Timestamp.ofEpochMilli(System.currentTimeMillis())); // TODO get nanoseconds for default
|
64
|
+
this.transactionTimeZone = task.getTransactionTimeZone();
|
65
|
+
}
|
66
|
+
|
67
|
+
public ConfigSource getSessionConfigSource()
|
68
|
+
{
|
69
|
+
return newConfigSource()
|
70
|
+
.set("transaction_time", transactionTime)
|
71
|
+
.set("transaction_time_zone", transactionTimeZone);
|
41
72
|
}
|
42
73
|
|
43
74
|
public Injector getInjector()
|
@@ -10,10 +10,21 @@ public interface FileInputPlugin
|
|
10
10
|
{
|
11
11
|
public interface Control
|
12
12
|
{
|
13
|
-
public List<CommitReport> run(TaskSource taskSource,
|
13
|
+
public List<CommitReport> run(TaskSource taskSource,
|
14
|
+
int processorCount);
|
14
15
|
}
|
15
16
|
|
16
|
-
public NextConfig transaction(ConfigSource config,
|
17
|
+
public NextConfig transaction(ConfigSource config,
|
18
|
+
FileInputPlugin.Control control);
|
17
19
|
|
18
|
-
public
|
20
|
+
public NextConfig resume(TaskSource taskSource,
|
21
|
+
int processorCount,
|
22
|
+
FileInputPlugin.Control control);
|
23
|
+
|
24
|
+
public void cleanup(TaskSource taskSource,
|
25
|
+
int processorCount,
|
26
|
+
List<CommitReport> successCommitReports);
|
27
|
+
|
28
|
+
public TransactionalFileInput open(TaskSource taskSource,
|
29
|
+
int processorIndex);
|
19
30
|
}
|
@@ -57,30 +57,61 @@ public class FileInputRunner
|
|
57
57
|
public NextConfig transaction(ConfigSource config, final InputPlugin.Control control)
|
58
58
|
{
|
59
59
|
final RunnerTask task = config.loadConfig(RunnerTask.class);
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
60
|
+
return fileInputPlugin.transaction(config, new RunnerControl(task, control));
|
61
|
+
}
|
62
|
+
|
63
|
+
public NextConfig resume(TaskSource taskSource,
|
64
|
+
Schema schema, int processorCount,
|
65
|
+
InputPlugin.Control control)
|
66
|
+
{
|
67
|
+
final RunnerTask task = taskSource.loadTask(RunnerTask.class);
|
68
|
+
return fileInputPlugin.resume(task.getFileInputTaskSource(), processorCount, new RunnerControl(task, control));
|
69
|
+
}
|
70
|
+
|
71
|
+
private class RunnerControl
|
72
|
+
implements FileInputPlugin.Control
|
73
|
+
{
|
74
|
+
private final RunnerTask task;
|
75
|
+
private final List<DecoderPlugin> decoderPlugins;
|
76
|
+
private final ParserPlugin parserPlugin;
|
77
|
+
private final InputPlugin.Control nextControl;
|
78
|
+
|
79
|
+
public RunnerControl(RunnerTask task, InputPlugin.Control nextControl)
|
80
|
+
{
|
81
|
+
this.task = task;
|
82
|
+
// create plugins earlier than run() to throw exceptions early
|
83
|
+
this.decoderPlugins = newDecoderPlugins(task);
|
84
|
+
this.parserPlugin = newParserPlugin(task);
|
85
|
+
this.nextControl = nextControl;
|
86
|
+
}
|
87
|
+
|
88
|
+
@Override
|
89
|
+
public List<CommitReport> run(final TaskSource fileInputTaskSource, final int processorCount)
|
90
|
+
{
|
91
|
+
final List<CommitReport> commitReports = new ArrayList<CommitReport>();
|
92
|
+
Decoders.transaction(decoderPlugins, task.getDecoderConfigs(), new Decoders.Control() {
|
93
|
+
public void run(final List<TaskSource> decoderTaskSources)
|
94
|
+
{
|
95
|
+
parserPlugin.transaction(task.getParserConfig(), new ParserPlugin.Control() {
|
96
|
+
public void run(final TaskSource parserTaskSource, final Schema schema)
|
97
|
+
{
|
98
|
+
task.setFileInputTaskSource(fileInputTaskSource);
|
99
|
+
task.setDecoderTaskSources(decoderTaskSources);
|
100
|
+
task.setParserTaskSource(parserTaskSource);
|
101
|
+
commitReports.addAll(nextControl.run(task.dump(), schema, processorCount));
|
102
|
+
}
|
103
|
+
});
|
104
|
+
}
|
105
|
+
});
|
106
|
+
return commitReports;
|
107
|
+
}
|
108
|
+
}
|
109
|
+
|
110
|
+
public void cleanup(TaskSource taskSource,
|
111
|
+
Schema schema, int processorCount,
|
112
|
+
List<CommitReport> successCommitReports)
|
113
|
+
{
|
114
|
+
fileInputPlugin.cleanup(taskSource, processorCount, successCommitReports);
|
84
115
|
}
|
85
116
|
|
86
117
|
@Override
|
@@ -16,5 +16,13 @@ public interface FileOutputPlugin
|
|
16
16
|
public NextConfig transaction(ConfigSource config, int processorCount,
|
17
17
|
FileOutputPlugin.Control control);
|
18
18
|
|
19
|
+
public NextConfig resume(TaskSource taskSource,
|
20
|
+
int processorCount,
|
21
|
+
FileOutputPlugin.Control control);
|
22
|
+
|
23
|
+
public void cleanup(TaskSource taskSource,
|
24
|
+
int processorCount,
|
25
|
+
List<CommitReport> successCommitReports);
|
26
|
+
|
19
27
|
public TransactionalFileOutput open(TaskSource taskSource, int processorIndex);
|
20
28
|
}
|
@@ -60,30 +60,63 @@ public class FileOutputRunner
|
|
60
60
|
final OutputPlugin.Control control)
|
61
61
|
{
|
62
62
|
final RunnerTask task = config.loadConfig(RunnerTask.class);
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
63
|
+
return fileOutputPlugin.transaction(config, processorCount, new RunnerControl(schema, task, control));
|
64
|
+
}
|
65
|
+
|
66
|
+
public NextConfig resume(TaskSource taskSource,
|
67
|
+
Schema schema, int processorCount,
|
68
|
+
final OutputPlugin.Control control)
|
69
|
+
{
|
70
|
+
final RunnerTask task = taskSource.loadTask(RunnerTask.class);
|
71
|
+
return fileOutputPlugin.resume(task.getFileOutputTaskSource(), processorCount, new RunnerControl(schema, task, control));
|
72
|
+
}
|
73
|
+
|
74
|
+
private class RunnerControl
|
75
|
+
implements FileOutputPlugin.Control
|
76
|
+
{
|
77
|
+
private final Schema schema;
|
78
|
+
private final RunnerTask task;
|
79
|
+
private final List<EncoderPlugin> encoderPlugins;
|
80
|
+
private final FormatterPlugin formatterPlugin;
|
81
|
+
private final OutputPlugin.Control nextControl;
|
82
|
+
|
83
|
+
public RunnerControl(Schema schema, RunnerTask task, OutputPlugin.Control nextControl)
|
84
|
+
{
|
85
|
+
this.schema = schema;
|
86
|
+
this.task = task;
|
87
|
+
// create plugins earlier than run() to throw exceptions early
|
88
|
+
this.encoderPlugins = newEncoderPlugins(task);
|
89
|
+
this.formatterPlugin = newFormatterPlugin(task);
|
90
|
+
this.nextControl = nextControl;
|
91
|
+
}
|
92
|
+
|
93
|
+
@Override
|
94
|
+
public List<CommitReport> run(final TaskSource fileOutputTaskSource)
|
95
|
+
{
|
96
|
+
final List<CommitReport> commitReports = new ArrayList<CommitReport>();
|
97
|
+
Encoders.transaction(encoderPlugins, task.getEncoderConfigs(), new Encoders.Control() {
|
98
|
+
public void run(final List<TaskSource> encoderTaskSources)
|
99
|
+
{
|
100
|
+
formatterPlugin.transaction(task.getFormatterConfig(), schema, new FormatterPlugin.Control() {
|
101
|
+
public void run(final TaskSource formatterTaskSource)
|
102
|
+
{
|
103
|
+
task.setFileOutputTaskSource(fileOutputTaskSource);
|
104
|
+
task.setEncoderTaskSources(encoderTaskSources);
|
105
|
+
task.setFormatterTaskSource(formatterTaskSource);
|
106
|
+
commitReports.addAll(nextControl.run(task.dump()));
|
107
|
+
}
|
108
|
+
});
|
109
|
+
}
|
110
|
+
});
|
111
|
+
return commitReports;
|
112
|
+
}
|
113
|
+
}
|
114
|
+
|
115
|
+
public void cleanup(TaskSource taskSource,
|
116
|
+
Schema schema, int processorCount,
|
117
|
+
List<CommitReport> successCommitReports)
|
118
|
+
{
|
119
|
+
fileOutputPlugin.cleanup(taskSource, processorCount, successCommitReports);
|
87
120
|
}
|
88
121
|
|
89
122
|
@Override
|
@@ -0,0 +1,21 @@
|
|
1
|
+
package org.embulk.spi;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import org.embulk.config.TaskSource;
|
5
|
+
import org.embulk.config.ConfigSource;
|
6
|
+
import org.embulk.config.NextConfig;
|
7
|
+
import org.embulk.config.CommitReport;
|
8
|
+
|
9
|
+
public interface FilterPlugin
|
10
|
+
{
|
11
|
+
public interface Control
|
12
|
+
{
|
13
|
+
public void run(TaskSource taskSource, Schema outputSchema);
|
14
|
+
}
|
15
|
+
|
16
|
+
public void transaction(ConfigSource config, Schema inputSchema,
|
17
|
+
FilterPlugin.Control control);
|
18
|
+
|
19
|
+
public PageOutput open(TaskSource taskSource, Schema inputSchema,
|
20
|
+
Schema outputSchema, PageOutput output);
|
21
|
+
}
|
@@ -10,11 +10,22 @@ public interface InputPlugin
|
|
10
10
|
{
|
11
11
|
public interface Control
|
12
12
|
{
|
13
|
-
public List<CommitReport> run(TaskSource taskSource,
|
13
|
+
public List<CommitReport> run(TaskSource taskSource,
|
14
|
+
Schema schema, int processorCount);
|
14
15
|
}
|
15
16
|
|
16
|
-
public NextConfig transaction(ConfigSource config,
|
17
|
+
public NextConfig transaction(ConfigSource config,
|
18
|
+
InputPlugin.Control control);
|
17
19
|
|
18
|
-
public
|
20
|
+
public NextConfig resume(TaskSource taskSource,
|
21
|
+
Schema schema, int processorCount,
|
22
|
+
InputPlugin.Control control);
|
23
|
+
|
24
|
+
public void cleanup(TaskSource taskSource,
|
25
|
+
Schema schema, int processorCount,
|
26
|
+
List<CommitReport> successCommitReports);
|
27
|
+
|
28
|
+
public CommitReport run(TaskSource taskSource,
|
29
|
+
Schema schema, int processorIndex,
|
19
30
|
PageOutput output);
|
20
31
|
}
|