embulk 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +8 -8
  2. data/ChangeLog +12 -0
  3. data/README.md +38 -13
  4. data/build.gradle +6 -1
  5. data/embulk-cli/pom.xml +1 -1
  6. data/embulk-core/pom.xml +1 -1
  7. data/embulk-core/src/main/java/org/embulk/command/Runner.java +87 -8
  8. data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +1 -1
  9. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +16 -3
  10. data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +1 -1
  11. data/embulk-core/src/main/java/org/embulk/exec/ExecutionInterruptedException.java +10 -0
  12. data/embulk-core/src/main/java/org/embulk/exec/ExecutionResult.java +26 -0
  13. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +37 -1
  14. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +461 -110
  15. data/embulk-core/src/main/java/org/embulk/exec/PartialExecutionException.java +18 -0
  16. data/embulk-core/src/main/java/org/embulk/exec/ResumeState.java +82 -0
  17. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +3 -3
  18. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +35 -4
  19. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +14 -3
  20. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +55 -24
  21. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +8 -0
  22. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +57 -24
  23. data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +21 -0
  24. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +14 -3
  25. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +8 -0
  26. data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +87 -0
  27. data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +4 -2
  28. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +16 -0
  29. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +15 -0
  30. data/embulk-standards/pom.xml +1 -1
  31. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +16 -2
  32. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +14 -1
  33. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +14 -1
  34. data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +15 -3
  35. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +15 -1
  36. data/lib/embulk/command/embulk_run.rb +16 -1
  37. data/lib/embulk/data/bundle/embulk/filter_example.rb +42 -0
  38. data/lib/embulk/data/bundle/embulk/input_example.rb +43 -33
  39. data/lib/embulk/data/bundle/embulk/output_example.rb +43 -36
  40. data/lib/embulk/filter_plugin.rb +86 -0
  41. data/lib/embulk/input_plugin.rb +37 -2
  42. data/lib/embulk/java/imports.rb +1 -0
  43. data/lib/embulk/output_plugin.rb +30 -0
  44. data/lib/embulk/plugin.rb +32 -19
  45. data/lib/embulk/schema.rb +16 -9
  46. data/lib/embulk/version.rb +1 -1
  47. data/pom.xml +1 -1
  48. metadata +13 -7
  49. data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +0 -10
  50. data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +0 -19
@@ -0,0 +1,18 @@
1
+ package org.embulk.exec;
2
+
3
+ public class PartialExecutionException
4
+ extends RuntimeException
5
+ {
6
+ private final ResumeState resumeState;
7
+
8
+ public PartialExecutionException(Throwable cause, ResumeState resumeState)
9
+ {
10
+ super(cause);
11
+ this.resumeState = resumeState;
12
+ }
13
+
14
+ public ResumeState getResumeState()
15
+ {
16
+ return resumeState;
17
+ }
18
+ }
@@ -0,0 +1,82 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.List;
4
+ import com.fasterxml.jackson.annotation.JsonCreator;
5
+ import com.fasterxml.jackson.annotation.JsonProperty;
6
+ import org.embulk.config.TaskSource;
7
+ import org.embulk.config.ConfigSource;
8
+ import org.embulk.config.CommitReport;
9
+ import org.embulk.spi.Schema;
10
+ import org.embulk.spi.ExecSession;
11
+
12
+ public class ResumeState
13
+ {
14
+ private final ConfigSource execSessionConfigSource;
15
+ private final TaskSource inputTaskSource;
16
+ private final TaskSource outputTaskSource;
17
+ private final Schema inputSchema;
18
+ private final Schema outputSchema;
19
+ private final List<CommitReport> inputCommitReports;
20
+ private final List<CommitReport> outputCommitReports;
21
+
22
+ @JsonCreator
23
+ public ResumeState(
24
+ @JsonProperty("exec_task") ConfigSource execSessionConfigSource,
25
+ @JsonProperty("in_task") TaskSource inputTaskSource,
26
+ @JsonProperty("out_task") TaskSource outputTaskSource,
27
+ @JsonProperty("in_schema") Schema inputSchema,
28
+ @JsonProperty("out_schema") Schema outputSchema,
29
+ @JsonProperty("in_reports") List<CommitReport> inputCommitReports,
30
+ @JsonProperty("out_reports") List<CommitReport> outputCommitReports)
31
+ {
32
+ this.execSessionConfigSource = execSessionConfigSource;
33
+ this.inputTaskSource = inputTaskSource;
34
+ this.outputTaskSource = outputTaskSource;
35
+ this.inputSchema = inputSchema;
36
+ this.outputSchema = outputSchema;
37
+ this.inputCommitReports = inputCommitReports;
38
+ this.outputCommitReports = outputCommitReports;
39
+ }
40
+
41
+ @JsonProperty("exec_task")
42
+ public ConfigSource getExecSessionConfigSource()
43
+ {
44
+ return execSessionConfigSource;
45
+ }
46
+
47
+ @JsonProperty("in_task")
48
+ public TaskSource getInputTaskSource()
49
+ {
50
+ return inputTaskSource;
51
+ }
52
+
53
+ @JsonProperty("out_task")
54
+ public TaskSource getOutputTaskSource()
55
+ {
56
+ return outputTaskSource;
57
+ }
58
+
59
+ @JsonProperty("in_schema")
60
+ public Schema getInputSchema()
61
+ {
62
+ return inputSchema;
63
+ }
64
+
65
+ @JsonProperty("out_schema")
66
+ public Schema getOutputSchema()
67
+ {
68
+ return outputSchema;
69
+ }
70
+
71
+ @JsonProperty("in_reports")
72
+ public List<CommitReport> getInputCommitReports()
73
+ {
74
+ return inputCommitReports;
75
+ }
76
+
77
+ @JsonProperty("out_reports")
78
+ public List<CommitReport> getOutputCommitReports()
79
+ {
80
+ return outputCommitReports;
81
+ }
82
+ }
@@ -12,7 +12,7 @@ import org.embulk.spi.ParserPlugin;
12
12
  import org.embulk.spi.FormatterPlugin;
13
13
  import org.embulk.spi.DecoderPlugin;
14
14
  import org.embulk.spi.EncoderPlugin;
15
- //import org.embulk.spi.LineFilterPlugin;
15
+ import org.embulk.spi.FilterPlugin;
16
16
  import org.embulk.spi.GuessPlugin;
17
17
 
18
18
  public class JRubyPluginSource
@@ -50,8 +50,8 @@ public class JRubyPluginSource
50
50
  category = "decoder";
51
51
  } else if (EncoderPlugin.class.isAssignableFrom(iface)) {
52
52
  category = "encoder";
53
- //} else if (LineFilterPlugin.class.isAssignableFrom(iface)) {
54
- // category = "line_filter";
53
+ } else if (FilterPlugin.class.isAssignableFrom(iface)) {
54
+ category = "filter";
55
55
  } else if (GuessPlugin.class.isAssignableFrom(iface)) {
56
56
  category = "guess";
57
57
  } else {
@@ -3,7 +3,11 @@ package org.embulk.spi;
3
3
  import org.joda.time.DateTimeZone;
4
4
  import org.slf4j.Logger;
5
5
  import org.slf4j.ILoggerFactory;
6
+ import com.google.common.base.Optional;
6
7
  import com.google.inject.Injector;
8
+ import org.embulk.config.Task;
9
+ import org.embulk.config.Config;
10
+ import org.embulk.config.ConfigDefault;
7
11
  import org.embulk.config.ModelManager;
8
12
  import org.embulk.config.CommitReport;
9
13
  import org.embulk.config.NextConfig;
@@ -26,18 +30,45 @@ public class ExecSession
26
30
  private final Timestamp transactionTime;
27
31
  private final DateTimeZone transactionTimeZone;
28
32
 
33
+ public interface SessionTask
34
+ extends Task
35
+ {
36
+ @Config("transaction_time")
37
+ @ConfigDefault("null")
38
+ Optional<Timestamp> getTransactionTime();
39
+
40
+ @Config("transaction_time_zone")
41
+ @ConfigDefault("\"UTC\"")
42
+ DateTimeZone getTransactionTimeZone();
43
+ }
44
+
29
45
  public ExecSession(Injector injector, ConfigSource execConfig)
30
46
  {
31
- super();
47
+ this(injector, execConfig.loadConfig(SessionTask.class));
48
+ }
49
+
50
+ public ExecSession(Injector injector, TaskSource taskSource)
51
+ {
52
+ this(injector, taskSource.loadTask(SessionTask.class));
53
+ }
54
+
55
+ public ExecSession(Injector injector, SessionTask task)
56
+ {
32
57
  this.injector = injector;
33
58
  this.loggerFactory = injector.getInstance(ILoggerFactory.class);
34
59
  this.modelManager = injector.getInstance(ModelManager.class);
35
60
  this.pluginManager = injector.getInstance(PluginManager.class);
36
61
  this.bufferAllocator = injector.getInstance(BufferAllocator.class);
37
62
 
38
- this.transactionTime = execConfig.get(Timestamp.class, "transaction_time",
39
- Timestamp.ofEpochMilli(System.currentTimeMillis())); // TODO get nanoseconds for default
40
- this.transactionTimeZone = execConfig.get(DateTimeZone.class, "transaction_time_zone", DateTimeZone.UTC);
63
+ this.transactionTime = task.getTransactionTime().or(Timestamp.ofEpochMilli(System.currentTimeMillis())); // TODO get nanoseconds for default
64
+ this.transactionTimeZone = task.getTransactionTimeZone();
65
+ }
66
+
67
+ public ConfigSource getSessionConfigSource()
68
+ {
69
+ return newConfigSource()
70
+ .set("transaction_time", transactionTime)
71
+ .set("transaction_time_zone", transactionTimeZone);
41
72
  }
42
73
 
43
74
  public Injector getInjector()
@@ -10,10 +10,21 @@ public interface FileInputPlugin
10
10
  {
11
11
  public interface Control
12
12
  {
13
- public List<CommitReport> run(TaskSource taskSource, int processorCount);
13
+ public List<CommitReport> run(TaskSource taskSource,
14
+ int processorCount);
14
15
  }
15
16
 
16
- public NextConfig transaction(ConfigSource config, FileInputPlugin.Control control);
17
+ public NextConfig transaction(ConfigSource config,
18
+ FileInputPlugin.Control control);
17
19
 
18
- public TransactionalFileInput open(TaskSource taskSource, int processorIndex);
20
+ public NextConfig resume(TaskSource taskSource,
21
+ int processorCount,
22
+ FileInputPlugin.Control control);
23
+
24
+ public void cleanup(TaskSource taskSource,
25
+ int processorCount,
26
+ List<CommitReport> successCommitReports);
27
+
28
+ public TransactionalFileInput open(TaskSource taskSource,
29
+ int processorIndex);
19
30
  }
@@ -57,30 +57,61 @@ public class FileInputRunner
57
57
  public NextConfig transaction(ConfigSource config, final InputPlugin.Control control)
58
58
  {
59
59
  final RunnerTask task = config.loadConfig(RunnerTask.class);
60
- final List<DecoderPlugin> decoderPlugins = newDecoderPlugins(task);
61
- final ParserPlugin parserPlugin = newParserPlugin(task);
62
-
63
- return fileInputPlugin.transaction(config, new FileInputPlugin.Control() {
64
- public List<CommitReport> run(final TaskSource fileInputTaskSource, final int processorCount)
65
- {
66
- final List<CommitReport> commitReports = new ArrayList<CommitReport>();
67
- Decoders.transaction(decoderPlugins, task.getDecoderConfigs(), new Decoders.Control() {
68
- public void run(final List<TaskSource> decoderTaskSources)
69
- {
70
- parserPlugin.transaction(task.getParserConfig(), new ParserPlugin.Control() {
71
- public void run(final TaskSource parserTaskSource, final Schema schema)
72
- {
73
- task.setFileInputTaskSource(fileInputTaskSource);
74
- task.setDecoderTaskSources(decoderTaskSources);
75
- task.setParserTaskSource(parserTaskSource);
76
- commitReports.addAll(control.run(task.dump(), schema, processorCount));
77
- }
78
- });
79
- }
80
- });
81
- return commitReports;
82
- }
83
- });
60
+ return fileInputPlugin.transaction(config, new RunnerControl(task, control));
61
+ }
62
+
63
+ public NextConfig resume(TaskSource taskSource,
64
+ Schema schema, int processorCount,
65
+ InputPlugin.Control control)
66
+ {
67
+ final RunnerTask task = taskSource.loadTask(RunnerTask.class);
68
+ return fileInputPlugin.resume(task.getFileInputTaskSource(), processorCount, new RunnerControl(task, control));
69
+ }
70
+
71
+ private class RunnerControl
72
+ implements FileInputPlugin.Control
73
+ {
74
+ private final RunnerTask task;
75
+ private final List<DecoderPlugin> decoderPlugins;
76
+ private final ParserPlugin parserPlugin;
77
+ private final InputPlugin.Control nextControl;
78
+
79
+ public RunnerControl(RunnerTask task, InputPlugin.Control nextControl)
80
+ {
81
+ this.task = task;
82
+ // create plugins earlier than run() to throw exceptions early
83
+ this.decoderPlugins = newDecoderPlugins(task);
84
+ this.parserPlugin = newParserPlugin(task);
85
+ this.nextControl = nextControl;
86
+ }
87
+
88
+ @Override
89
+ public List<CommitReport> run(final TaskSource fileInputTaskSource, final int processorCount)
90
+ {
91
+ final List<CommitReport> commitReports = new ArrayList<CommitReport>();
92
+ Decoders.transaction(decoderPlugins, task.getDecoderConfigs(), new Decoders.Control() {
93
+ public void run(final List<TaskSource> decoderTaskSources)
94
+ {
95
+ parserPlugin.transaction(task.getParserConfig(), new ParserPlugin.Control() {
96
+ public void run(final TaskSource parserTaskSource, final Schema schema)
97
+ {
98
+ task.setFileInputTaskSource(fileInputTaskSource);
99
+ task.setDecoderTaskSources(decoderTaskSources);
100
+ task.setParserTaskSource(parserTaskSource);
101
+ commitReports.addAll(nextControl.run(task.dump(), schema, processorCount));
102
+ }
103
+ });
104
+ }
105
+ });
106
+ return commitReports;
107
+ }
108
+ }
109
+
110
+ public void cleanup(TaskSource taskSource,
111
+ Schema schema, int processorCount,
112
+ List<CommitReport> successCommitReports)
113
+ {
114
+ fileInputPlugin.cleanup(taskSource, processorCount, successCommitReports);
84
115
  }
85
116
 
86
117
  @Override
@@ -16,5 +16,13 @@ public interface FileOutputPlugin
16
16
  public NextConfig transaction(ConfigSource config, int processorCount,
17
17
  FileOutputPlugin.Control control);
18
18
 
19
+ public NextConfig resume(TaskSource taskSource,
20
+ int processorCount,
21
+ FileOutputPlugin.Control control);
22
+
23
+ public void cleanup(TaskSource taskSource,
24
+ int processorCount,
25
+ List<CommitReport> successCommitReports);
26
+
19
27
  public TransactionalFileOutput open(TaskSource taskSource, int processorIndex);
20
28
  }
@@ -60,30 +60,63 @@ public class FileOutputRunner
60
60
  final OutputPlugin.Control control)
61
61
  {
62
62
  final RunnerTask task = config.loadConfig(RunnerTask.class);
63
- final List<EncoderPlugin> encoderPlugins = newEncoderPlugins(task);
64
- final FormatterPlugin formatterPlugin = newFormatterPlugin(task);
65
-
66
- return fileOutputPlugin.transaction(config, processorCount, new FileOutputPlugin.Control() {
67
- public List<CommitReport> run(final TaskSource fileOutputTaskSource)
68
- {
69
- final List<CommitReport> commitReports = new ArrayList<CommitReport>();
70
- Encoders.transaction(encoderPlugins, task.getEncoderConfigs(), new Encoders.Control() {
71
- public void run(final List<TaskSource> encoderTaskSources)
72
- {
73
- formatterPlugin.transaction(task.getFormatterConfig(), schema, new FormatterPlugin.Control() {
74
- public void run(final TaskSource formatterTaskSource)
75
- {
76
- task.setFileOutputTaskSource(fileOutputTaskSource);
77
- task.setEncoderTaskSources(encoderTaskSources);
78
- task.setFormatterTaskSource(formatterTaskSource);
79
- commitReports.addAll(control.run(task.dump()));
80
- }
81
- });
82
- }
83
- });
84
- return commitReports;
85
- }
86
- });
63
+ return fileOutputPlugin.transaction(config, processorCount, new RunnerControl(schema, task, control));
64
+ }
65
+
66
+ public NextConfig resume(TaskSource taskSource,
67
+ Schema schema, int processorCount,
68
+ final OutputPlugin.Control control)
69
+ {
70
+ final RunnerTask task = taskSource.loadTask(RunnerTask.class);
71
+ return fileOutputPlugin.resume(task.getFileOutputTaskSource(), processorCount, new RunnerControl(schema, task, control));
72
+ }
73
+
74
+ private class RunnerControl
75
+ implements FileOutputPlugin.Control
76
+ {
77
+ private final Schema schema;
78
+ private final RunnerTask task;
79
+ private final List<EncoderPlugin> encoderPlugins;
80
+ private final FormatterPlugin formatterPlugin;
81
+ private final OutputPlugin.Control nextControl;
82
+
83
+ public RunnerControl(Schema schema, RunnerTask task, OutputPlugin.Control nextControl)
84
+ {
85
+ this.schema = schema;
86
+ this.task = task;
87
+ // create plugins earlier than run() to throw exceptions early
88
+ this.encoderPlugins = newEncoderPlugins(task);
89
+ this.formatterPlugin = newFormatterPlugin(task);
90
+ this.nextControl = nextControl;
91
+ }
92
+
93
+ @Override
94
+ public List<CommitReport> run(final TaskSource fileOutputTaskSource)
95
+ {
96
+ final List<CommitReport> commitReports = new ArrayList<CommitReport>();
97
+ Encoders.transaction(encoderPlugins, task.getEncoderConfigs(), new Encoders.Control() {
98
+ public void run(final List<TaskSource> encoderTaskSources)
99
+ {
100
+ formatterPlugin.transaction(task.getFormatterConfig(), schema, new FormatterPlugin.Control() {
101
+ public void run(final TaskSource formatterTaskSource)
102
+ {
103
+ task.setFileOutputTaskSource(fileOutputTaskSource);
104
+ task.setEncoderTaskSources(encoderTaskSources);
105
+ task.setFormatterTaskSource(formatterTaskSource);
106
+ commitReports.addAll(nextControl.run(task.dump()));
107
+ }
108
+ });
109
+ }
110
+ });
111
+ return commitReports;
112
+ }
113
+ }
114
+
115
+ public void cleanup(TaskSource taskSource,
116
+ Schema schema, int processorCount,
117
+ List<CommitReport> successCommitReports)
118
+ {
119
+ fileOutputPlugin.cleanup(taskSource, processorCount, successCommitReports);
87
120
  }
88
121
 
89
122
  @Override
@@ -0,0 +1,21 @@
1
+ package org.embulk.spi;
2
+
3
+ import java.util.List;
4
+ import org.embulk.config.TaskSource;
5
+ import org.embulk.config.ConfigSource;
6
+ import org.embulk.config.NextConfig;
7
+ import org.embulk.config.CommitReport;
8
+
9
+ public interface FilterPlugin
10
+ {
11
+ public interface Control
12
+ {
13
+ public void run(TaskSource taskSource, Schema outputSchema);
14
+ }
15
+
16
+ public void transaction(ConfigSource config, Schema inputSchema,
17
+ FilterPlugin.Control control);
18
+
19
+ public PageOutput open(TaskSource taskSource, Schema inputSchema,
20
+ Schema outputSchema, PageOutput output);
21
+ }
@@ -10,11 +10,22 @@ public interface InputPlugin
10
10
  {
11
11
  public interface Control
12
12
  {
13
- public List<CommitReport> run(TaskSource taskSource, Schema schema, int processorCount);
13
+ public List<CommitReport> run(TaskSource taskSource,
14
+ Schema schema, int processorCount);
14
15
  }
15
16
 
16
- public NextConfig transaction(ConfigSource config, InputPlugin.Control control);
17
+ public NextConfig transaction(ConfigSource config,
18
+ InputPlugin.Control control);
17
19
 
18
- public CommitReport run(TaskSource taskSource, Schema schema, int processorIndex,
20
+ public NextConfig resume(TaskSource taskSource,
21
+ Schema schema, int processorCount,
22
+ InputPlugin.Control control);
23
+
24
+ public void cleanup(TaskSource taskSource,
25
+ Schema schema, int processorCount,
26
+ List<CommitReport> successCommitReports);
27
+
28
+ public CommitReport run(TaskSource taskSource,
29
+ Schema schema, int processorIndex,
19
30
  PageOutput output);
20
31
  }