embulk 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +8 -8
  2. data/ChangeLog +12 -0
  3. data/README.md +38 -13
  4. data/build.gradle +6 -1
  5. data/embulk-cli/pom.xml +1 -1
  6. data/embulk-core/pom.xml +1 -1
  7. data/embulk-core/src/main/java/org/embulk/command/Runner.java +87 -8
  8. data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +1 -1
  9. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +16 -3
  10. data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +1 -1
  11. data/embulk-core/src/main/java/org/embulk/exec/ExecutionInterruptedException.java +10 -0
  12. data/embulk-core/src/main/java/org/embulk/exec/ExecutionResult.java +26 -0
  13. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +37 -1
  14. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +461 -110
  15. data/embulk-core/src/main/java/org/embulk/exec/PartialExecutionException.java +18 -0
  16. data/embulk-core/src/main/java/org/embulk/exec/ResumeState.java +82 -0
  17. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +3 -3
  18. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +35 -4
  19. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +14 -3
  20. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +55 -24
  21. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +8 -0
  22. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +57 -24
  23. data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +21 -0
  24. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +14 -3
  25. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +8 -0
  26. data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +87 -0
  27. data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +4 -2
  28. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +16 -0
  29. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +15 -0
  30. data/embulk-standards/pom.xml +1 -1
  31. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +16 -2
  32. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +14 -1
  33. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +14 -1
  34. data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +15 -3
  35. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +15 -1
  36. data/lib/embulk/command/embulk_run.rb +16 -1
  37. data/lib/embulk/data/bundle/embulk/filter_example.rb +42 -0
  38. data/lib/embulk/data/bundle/embulk/input_example.rb +43 -33
  39. data/lib/embulk/data/bundle/embulk/output_example.rb +43 -36
  40. data/lib/embulk/filter_plugin.rb +86 -0
  41. data/lib/embulk/input_plugin.rb +37 -2
  42. data/lib/embulk/java/imports.rb +1 -0
  43. data/lib/embulk/output_plugin.rb +30 -0
  44. data/lib/embulk/plugin.rb +32 -19
  45. data/lib/embulk/schema.rb +16 -9
  46. data/lib/embulk/version.rb +1 -1
  47. data/pom.xml +1 -1
  48. metadata +13 -7
  49. data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +0 -10
  50. data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +0 -19
@@ -0,0 +1,18 @@
1
+ package org.embulk.exec;
2
+
3
+ public class PartialExecutionException
4
+ extends RuntimeException
5
+ {
6
+ private final ResumeState resumeState;
7
+
8
+ public PartialExecutionException(Throwable cause, ResumeState resumeState)
9
+ {
10
+ super(cause);
11
+ this.resumeState = resumeState;
12
+ }
13
+
14
+ public ResumeState getResumeState()
15
+ {
16
+ return resumeState;
17
+ }
18
+ }
@@ -0,0 +1,82 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.List;
4
+ import com.fasterxml.jackson.annotation.JsonCreator;
5
+ import com.fasterxml.jackson.annotation.JsonProperty;
6
+ import org.embulk.config.TaskSource;
7
+ import org.embulk.config.ConfigSource;
8
+ import org.embulk.config.CommitReport;
9
+ import org.embulk.spi.Schema;
10
+ import org.embulk.spi.ExecSession;
11
+
12
+ public class ResumeState
13
+ {
14
+ private final ConfigSource execSessionConfigSource;
15
+ private final TaskSource inputTaskSource;
16
+ private final TaskSource outputTaskSource;
17
+ private final Schema inputSchema;
18
+ private final Schema outputSchema;
19
+ private final List<CommitReport> inputCommitReports;
20
+ private final List<CommitReport> outputCommitReports;
21
+
22
+ @JsonCreator
23
+ public ResumeState(
24
+ @JsonProperty("exec_task") ConfigSource execSessionConfigSource,
25
+ @JsonProperty("in_task") TaskSource inputTaskSource,
26
+ @JsonProperty("out_task") TaskSource outputTaskSource,
27
+ @JsonProperty("in_schema") Schema inputSchema,
28
+ @JsonProperty("out_schema") Schema outputSchema,
29
+ @JsonProperty("in_reports") List<CommitReport> inputCommitReports,
30
+ @JsonProperty("out_reports") List<CommitReport> outputCommitReports)
31
+ {
32
+ this.execSessionConfigSource = execSessionConfigSource;
33
+ this.inputTaskSource = inputTaskSource;
34
+ this.outputTaskSource = outputTaskSource;
35
+ this.inputSchema = inputSchema;
36
+ this.outputSchema = outputSchema;
37
+ this.inputCommitReports = inputCommitReports;
38
+ this.outputCommitReports = outputCommitReports;
39
+ }
40
+
41
+ @JsonProperty("exec_task")
42
+ public ConfigSource getExecSessionConfigSource()
43
+ {
44
+ return execSessionConfigSource;
45
+ }
46
+
47
+ @JsonProperty("in_task")
48
+ public TaskSource getInputTaskSource()
49
+ {
50
+ return inputTaskSource;
51
+ }
52
+
53
+ @JsonProperty("out_task")
54
+ public TaskSource getOutputTaskSource()
55
+ {
56
+ return outputTaskSource;
57
+ }
58
+
59
+ @JsonProperty("in_schema")
60
+ public Schema getInputSchema()
61
+ {
62
+ return inputSchema;
63
+ }
64
+
65
+ @JsonProperty("out_schema")
66
+ public Schema getOutputSchema()
67
+ {
68
+ return outputSchema;
69
+ }
70
+
71
+ @JsonProperty("in_reports")
72
+ public List<CommitReport> getInputCommitReports()
73
+ {
74
+ return inputCommitReports;
75
+ }
76
+
77
+ @JsonProperty("out_reports")
78
+ public List<CommitReport> getOutputCommitReports()
79
+ {
80
+ return outputCommitReports;
81
+ }
82
+ }
@@ -12,7 +12,7 @@ import org.embulk.spi.ParserPlugin;
12
12
  import org.embulk.spi.FormatterPlugin;
13
13
  import org.embulk.spi.DecoderPlugin;
14
14
  import org.embulk.spi.EncoderPlugin;
15
- //import org.embulk.spi.LineFilterPlugin;
15
+ import org.embulk.spi.FilterPlugin;
16
16
  import org.embulk.spi.GuessPlugin;
17
17
 
18
18
  public class JRubyPluginSource
@@ -50,8 +50,8 @@ public class JRubyPluginSource
50
50
  category = "decoder";
51
51
  } else if (EncoderPlugin.class.isAssignableFrom(iface)) {
52
52
  category = "encoder";
53
- //} else if (LineFilterPlugin.class.isAssignableFrom(iface)) {
54
- // category = "line_filter";
53
+ } else if (FilterPlugin.class.isAssignableFrom(iface)) {
54
+ category = "filter";
55
55
  } else if (GuessPlugin.class.isAssignableFrom(iface)) {
56
56
  category = "guess";
57
57
  } else {
@@ -3,7 +3,11 @@ package org.embulk.spi;
3
3
  import org.joda.time.DateTimeZone;
4
4
  import org.slf4j.Logger;
5
5
  import org.slf4j.ILoggerFactory;
6
+ import com.google.common.base.Optional;
6
7
  import com.google.inject.Injector;
8
+ import org.embulk.config.Task;
9
+ import org.embulk.config.Config;
10
+ import org.embulk.config.ConfigDefault;
7
11
  import org.embulk.config.ModelManager;
8
12
  import org.embulk.config.CommitReport;
9
13
  import org.embulk.config.NextConfig;
@@ -26,18 +30,45 @@ public class ExecSession
26
30
  private final Timestamp transactionTime;
27
31
  private final DateTimeZone transactionTimeZone;
28
32
 
33
+ public interface SessionTask
34
+ extends Task
35
+ {
36
+ @Config("transaction_time")
37
+ @ConfigDefault("null")
38
+ Optional<Timestamp> getTransactionTime();
39
+
40
+ @Config("transaction_time_zone")
41
+ @ConfigDefault("\"UTC\"")
42
+ DateTimeZone getTransactionTimeZone();
43
+ }
44
+
29
45
  public ExecSession(Injector injector, ConfigSource execConfig)
30
46
  {
31
- super();
47
+ this(injector, execConfig.loadConfig(SessionTask.class));
48
+ }
49
+
50
+ public ExecSession(Injector injector, TaskSource taskSource)
51
+ {
52
+ this(injector, taskSource.loadTask(SessionTask.class));
53
+ }
54
+
55
+ public ExecSession(Injector injector, SessionTask task)
56
+ {
32
57
  this.injector = injector;
33
58
  this.loggerFactory = injector.getInstance(ILoggerFactory.class);
34
59
  this.modelManager = injector.getInstance(ModelManager.class);
35
60
  this.pluginManager = injector.getInstance(PluginManager.class);
36
61
  this.bufferAllocator = injector.getInstance(BufferAllocator.class);
37
62
 
38
- this.transactionTime = execConfig.get(Timestamp.class, "transaction_time",
39
- Timestamp.ofEpochMilli(System.currentTimeMillis())); // TODO get nanoseconds for default
40
- this.transactionTimeZone = execConfig.get(DateTimeZone.class, "transaction_time_zone", DateTimeZone.UTC);
63
+ this.transactionTime = task.getTransactionTime().or(Timestamp.ofEpochMilli(System.currentTimeMillis())); // TODO get nanoseconds for default
64
+ this.transactionTimeZone = task.getTransactionTimeZone();
65
+ }
66
+
67
+ public ConfigSource getSessionConfigSource()
68
+ {
69
+ return newConfigSource()
70
+ .set("transaction_time", transactionTime)
71
+ .set("transaction_time_zone", transactionTimeZone);
41
72
  }
42
73
 
43
74
  public Injector getInjector()
@@ -10,10 +10,21 @@ public interface FileInputPlugin
10
10
  {
11
11
  public interface Control
12
12
  {
13
- public List<CommitReport> run(TaskSource taskSource, int processorCount);
13
+ public List<CommitReport> run(TaskSource taskSource,
14
+ int processorCount);
14
15
  }
15
16
 
16
- public NextConfig transaction(ConfigSource config, FileInputPlugin.Control control);
17
+ public NextConfig transaction(ConfigSource config,
18
+ FileInputPlugin.Control control);
17
19
 
18
- public TransactionalFileInput open(TaskSource taskSource, int processorIndex);
20
+ public NextConfig resume(TaskSource taskSource,
21
+ int processorCount,
22
+ FileInputPlugin.Control control);
23
+
24
+ public void cleanup(TaskSource taskSource,
25
+ int processorCount,
26
+ List<CommitReport> successCommitReports);
27
+
28
+ public TransactionalFileInput open(TaskSource taskSource,
29
+ int processorIndex);
19
30
  }
@@ -57,30 +57,61 @@ public class FileInputRunner
57
57
  public NextConfig transaction(ConfigSource config, final InputPlugin.Control control)
58
58
  {
59
59
  final RunnerTask task = config.loadConfig(RunnerTask.class);
60
- final List<DecoderPlugin> decoderPlugins = newDecoderPlugins(task);
61
- final ParserPlugin parserPlugin = newParserPlugin(task);
62
-
63
- return fileInputPlugin.transaction(config, new FileInputPlugin.Control() {
64
- public List<CommitReport> run(final TaskSource fileInputTaskSource, final int processorCount)
65
- {
66
- final List<CommitReport> commitReports = new ArrayList<CommitReport>();
67
- Decoders.transaction(decoderPlugins, task.getDecoderConfigs(), new Decoders.Control() {
68
- public void run(final List<TaskSource> decoderTaskSources)
69
- {
70
- parserPlugin.transaction(task.getParserConfig(), new ParserPlugin.Control() {
71
- public void run(final TaskSource parserTaskSource, final Schema schema)
72
- {
73
- task.setFileInputTaskSource(fileInputTaskSource);
74
- task.setDecoderTaskSources(decoderTaskSources);
75
- task.setParserTaskSource(parserTaskSource);
76
- commitReports.addAll(control.run(task.dump(), schema, processorCount));
77
- }
78
- });
79
- }
80
- });
81
- return commitReports;
82
- }
83
- });
60
+ return fileInputPlugin.transaction(config, new RunnerControl(task, control));
61
+ }
62
+
63
+ public NextConfig resume(TaskSource taskSource,
64
+ Schema schema, int processorCount,
65
+ InputPlugin.Control control)
66
+ {
67
+ final RunnerTask task = taskSource.loadTask(RunnerTask.class);
68
+ return fileInputPlugin.resume(task.getFileInputTaskSource(), processorCount, new RunnerControl(task, control));
69
+ }
70
+
71
+ private class RunnerControl
72
+ implements FileInputPlugin.Control
73
+ {
74
+ private final RunnerTask task;
75
+ private final List<DecoderPlugin> decoderPlugins;
76
+ private final ParserPlugin parserPlugin;
77
+ private final InputPlugin.Control nextControl;
78
+
79
+ public RunnerControl(RunnerTask task, InputPlugin.Control nextControl)
80
+ {
81
+ this.task = task;
82
+ // create plugins earlier than run() to throw exceptions early
83
+ this.decoderPlugins = newDecoderPlugins(task);
84
+ this.parserPlugin = newParserPlugin(task);
85
+ this.nextControl = nextControl;
86
+ }
87
+
88
+ @Override
89
+ public List<CommitReport> run(final TaskSource fileInputTaskSource, final int processorCount)
90
+ {
91
+ final List<CommitReport> commitReports = new ArrayList<CommitReport>();
92
+ Decoders.transaction(decoderPlugins, task.getDecoderConfigs(), new Decoders.Control() {
93
+ public void run(final List<TaskSource> decoderTaskSources)
94
+ {
95
+ parserPlugin.transaction(task.getParserConfig(), new ParserPlugin.Control() {
96
+ public void run(final TaskSource parserTaskSource, final Schema schema)
97
+ {
98
+ task.setFileInputTaskSource(fileInputTaskSource);
99
+ task.setDecoderTaskSources(decoderTaskSources);
100
+ task.setParserTaskSource(parserTaskSource);
101
+ commitReports.addAll(nextControl.run(task.dump(), schema, processorCount));
102
+ }
103
+ });
104
+ }
105
+ });
106
+ return commitReports;
107
+ }
108
+ }
109
+
110
+ public void cleanup(TaskSource taskSource,
111
+ Schema schema, int processorCount,
112
+ List<CommitReport> successCommitReports)
113
+ {
114
+ fileInputPlugin.cleanup(taskSource, processorCount, successCommitReports);
84
115
  }
85
116
 
86
117
  @Override
@@ -16,5 +16,13 @@ public interface FileOutputPlugin
16
16
  public NextConfig transaction(ConfigSource config, int processorCount,
17
17
  FileOutputPlugin.Control control);
18
18
 
19
+ public NextConfig resume(TaskSource taskSource,
20
+ int processorCount,
21
+ FileOutputPlugin.Control control);
22
+
23
+ public void cleanup(TaskSource taskSource,
24
+ int processorCount,
25
+ List<CommitReport> successCommitReports);
26
+
19
27
  public TransactionalFileOutput open(TaskSource taskSource, int processorIndex);
20
28
  }
@@ -60,30 +60,63 @@ public class FileOutputRunner
60
60
  final OutputPlugin.Control control)
61
61
  {
62
62
  final RunnerTask task = config.loadConfig(RunnerTask.class);
63
- final List<EncoderPlugin> encoderPlugins = newEncoderPlugins(task);
64
- final FormatterPlugin formatterPlugin = newFormatterPlugin(task);
65
-
66
- return fileOutputPlugin.transaction(config, processorCount, new FileOutputPlugin.Control() {
67
- public List<CommitReport> run(final TaskSource fileOutputTaskSource)
68
- {
69
- final List<CommitReport> commitReports = new ArrayList<CommitReport>();
70
- Encoders.transaction(encoderPlugins, task.getEncoderConfigs(), new Encoders.Control() {
71
- public void run(final List<TaskSource> encoderTaskSources)
72
- {
73
- formatterPlugin.transaction(task.getFormatterConfig(), schema, new FormatterPlugin.Control() {
74
- public void run(final TaskSource formatterTaskSource)
75
- {
76
- task.setFileOutputTaskSource(fileOutputTaskSource);
77
- task.setEncoderTaskSources(encoderTaskSources);
78
- task.setFormatterTaskSource(formatterTaskSource);
79
- commitReports.addAll(control.run(task.dump()));
80
- }
81
- });
82
- }
83
- });
84
- return commitReports;
85
- }
86
- });
63
+ return fileOutputPlugin.transaction(config, processorCount, new RunnerControl(schema, task, control));
64
+ }
65
+
66
+ public NextConfig resume(TaskSource taskSource,
67
+ Schema schema, int processorCount,
68
+ final OutputPlugin.Control control)
69
+ {
70
+ final RunnerTask task = taskSource.loadTask(RunnerTask.class);
71
+ return fileOutputPlugin.resume(task.getFileOutputTaskSource(), processorCount, new RunnerControl(schema, task, control));
72
+ }
73
+
74
+ private class RunnerControl
75
+ implements FileOutputPlugin.Control
76
+ {
77
+ private final Schema schema;
78
+ private final RunnerTask task;
79
+ private final List<EncoderPlugin> encoderPlugins;
80
+ private final FormatterPlugin formatterPlugin;
81
+ private final OutputPlugin.Control nextControl;
82
+
83
+ public RunnerControl(Schema schema, RunnerTask task, OutputPlugin.Control nextControl)
84
+ {
85
+ this.schema = schema;
86
+ this.task = task;
87
+ // create plugins earlier than run() to throw exceptions early
88
+ this.encoderPlugins = newEncoderPlugins(task);
89
+ this.formatterPlugin = newFormatterPlugin(task);
90
+ this.nextControl = nextControl;
91
+ }
92
+
93
+ @Override
94
+ public List<CommitReport> run(final TaskSource fileOutputTaskSource)
95
+ {
96
+ final List<CommitReport> commitReports = new ArrayList<CommitReport>();
97
+ Encoders.transaction(encoderPlugins, task.getEncoderConfigs(), new Encoders.Control() {
98
+ public void run(final List<TaskSource> encoderTaskSources)
99
+ {
100
+ formatterPlugin.transaction(task.getFormatterConfig(), schema, new FormatterPlugin.Control() {
101
+ public void run(final TaskSource formatterTaskSource)
102
+ {
103
+ task.setFileOutputTaskSource(fileOutputTaskSource);
104
+ task.setEncoderTaskSources(encoderTaskSources);
105
+ task.setFormatterTaskSource(formatterTaskSource);
106
+ commitReports.addAll(nextControl.run(task.dump()));
107
+ }
108
+ });
109
+ }
110
+ });
111
+ return commitReports;
112
+ }
113
+ }
114
+
115
+ public void cleanup(TaskSource taskSource,
116
+ Schema schema, int processorCount,
117
+ List<CommitReport> successCommitReports)
118
+ {
119
+ fileOutputPlugin.cleanup(taskSource, processorCount, successCommitReports);
87
120
  }
88
121
 
89
122
  @Override
@@ -0,0 +1,21 @@
1
+ package org.embulk.spi;
2
+
3
+ import java.util.List;
4
+ import org.embulk.config.TaskSource;
5
+ import org.embulk.config.ConfigSource;
6
+ import org.embulk.config.NextConfig;
7
+ import org.embulk.config.CommitReport;
8
+
9
+ public interface FilterPlugin
10
+ {
11
+ public interface Control
12
+ {
13
+ public void run(TaskSource taskSource, Schema outputSchema);
14
+ }
15
+
16
+ public void transaction(ConfigSource config, Schema inputSchema,
17
+ FilterPlugin.Control control);
18
+
19
+ public PageOutput open(TaskSource taskSource, Schema inputSchema,
20
+ Schema outputSchema, PageOutput output);
21
+ }
@@ -10,11 +10,22 @@ public interface InputPlugin
10
10
  {
11
11
  public interface Control
12
12
  {
13
- public List<CommitReport> run(TaskSource taskSource, Schema schema, int processorCount);
13
+ public List<CommitReport> run(TaskSource taskSource,
14
+ Schema schema, int processorCount);
14
15
  }
15
16
 
16
- public NextConfig transaction(ConfigSource config, InputPlugin.Control control);
17
+ public NextConfig transaction(ConfigSource config,
18
+ InputPlugin.Control control);
17
19
 
18
- public CommitReport run(TaskSource taskSource, Schema schema, int processorIndex,
20
+ public NextConfig resume(TaskSource taskSource,
21
+ Schema schema, int processorCount,
22
+ InputPlugin.Control control);
23
+
24
+ public void cleanup(TaskSource taskSource,
25
+ Schema schema, int processorCount,
26
+ List<CommitReport> successCommitReports);
27
+
28
+ public CommitReport run(TaskSource taskSource,
29
+ Schema schema, int processorIndex,
19
30
  PageOutput output);
20
31
  }