embulk 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +8 -8
  2. data/ChangeLog +12 -0
  3. data/README.md +38 -13
  4. data/build.gradle +6 -1
  5. data/embulk-cli/pom.xml +1 -1
  6. data/embulk-core/pom.xml +1 -1
  7. data/embulk-core/src/main/java/org/embulk/command/Runner.java +87 -8
  8. data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +1 -1
  9. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +16 -3
  10. data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +1 -1
  11. data/embulk-core/src/main/java/org/embulk/exec/ExecutionInterruptedException.java +10 -0
  12. data/embulk-core/src/main/java/org/embulk/exec/ExecutionResult.java +26 -0
  13. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +37 -1
  14. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +461 -110
  15. data/embulk-core/src/main/java/org/embulk/exec/PartialExecutionException.java +18 -0
  16. data/embulk-core/src/main/java/org/embulk/exec/ResumeState.java +82 -0
  17. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +3 -3
  18. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +35 -4
  19. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +14 -3
  20. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +55 -24
  21. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +8 -0
  22. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +57 -24
  23. data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +21 -0
  24. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +14 -3
  25. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +8 -0
  26. data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +87 -0
  27. data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +4 -2
  28. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +16 -0
  29. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +15 -0
  30. data/embulk-standards/pom.xml +1 -1
  31. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +16 -2
  32. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +14 -1
  33. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +14 -1
  34. data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +15 -3
  35. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +15 -1
  36. data/lib/embulk/command/embulk_run.rb +16 -1
  37. data/lib/embulk/data/bundle/embulk/filter_example.rb +42 -0
  38. data/lib/embulk/data/bundle/embulk/input_example.rb +43 -33
  39. data/lib/embulk/data/bundle/embulk/output_example.rb +43 -36
  40. data/lib/embulk/filter_plugin.rb +86 -0
  41. data/lib/embulk/input_plugin.rb +37 -2
  42. data/lib/embulk/java/imports.rb +1 -0
  43. data/lib/embulk/output_plugin.rb +30 -0
  44. data/lib/embulk/plugin.rb +32 -19
  45. data/lib/embulk/schema.rb +16 -9
  46. data/lib/embulk/version.rb +1 -1
  47. data/pom.xml +1 -1
  48. metadata +13 -7
  49. data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +0 -10
  50. data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +0 -19
@@ -17,5 +17,13 @@ public interface OutputPlugin
17
17
  Schema schema, int processorCount,
18
18
  OutputPlugin.Control control);
19
19
 
20
+ public NextConfig resume(TaskSource taskSource,
21
+ Schema schema, int processorCount,
22
+ OutputPlugin.Control control);
23
+
24
+ public void cleanup(TaskSource taskSource,
25
+ Schema schema, int processorCount,
26
+ List<CommitReport> successCommitReports);
27
+
20
28
  public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int processorIndex);
21
29
  }
@@ -0,0 +1,87 @@
1
+ package org.embulk.spi.util;
2
+
3
+ import java.util.List;
4
+ import com.google.common.collect.ImmutableList;
5
+ import org.embulk.config.TaskSource;
6
+ import org.embulk.config.ConfigSource;
7
+ import org.embulk.config.CommitReport;
8
+ import org.embulk.config.NextConfig;
9
+ import org.embulk.plugin.PluginType;
10
+ import org.embulk.spi.ExecSession;
11
+ import org.embulk.spi.Schema;
12
+ import org.embulk.spi.PageOutput;
13
+ import org.embulk.spi.FilterPlugin;
14
+
15
+ public abstract class Filters
16
+ {
17
+ private Filters() { }
18
+
19
+ public static List<FilterPlugin> newFilterPlugins(ExecSession exec, List<ConfigSource> configs)
20
+ {
21
+ ImmutableList.Builder<FilterPlugin> builder = ImmutableList.builder();
22
+ for (ConfigSource config : configs) {
23
+ builder.add(exec.newPlugin(FilterPlugin.class, config.get(PluginType.class, "type")));
24
+ }
25
+ return builder.build();
26
+ }
27
+
28
+ public interface Control
29
+ {
30
+ public void run(List<TaskSource> taskSources, List<Schema> filterSchemas);
31
+ }
32
+
33
+ public static void transaction(List<FilterPlugin> plugins, List<ConfigSource> configs,
34
+ Schema inputSchema, Filters.Control control)
35
+ {
36
+ new RecursiveControl(plugins, configs, control).transaction(inputSchema);
37
+ }
38
+
39
+ public static PageOutput open(List<FilterPlugin> plugins, List<TaskSource> taskSources,
40
+ List<Schema> filterSchemas, PageOutput output)
41
+ {
42
+ PageOutput out = output;
43
+ int pos = 0;
44
+ while (pos < plugins.size()) {
45
+ out = plugins.get(pos).open(taskSources.get(pos), filterSchemas.get(pos), filterSchemas.get(pos + 1), out);
46
+ pos++;
47
+ }
48
+ return out;
49
+ }
50
+
51
+ private static class RecursiveControl
52
+ {
53
+ private final List<FilterPlugin> plugins;
54
+ private final List<ConfigSource> configs;
55
+ private final Filters.Control finalControl;
56
+ private final ImmutableList.Builder<TaskSource> taskSources;
57
+ private final ImmutableList.Builder<Schema> filterSchemas;
58
+ private int pos;
59
+
60
+ RecursiveControl(List<FilterPlugin> plugins, List<ConfigSource> configs,
61
+ Filters.Control finalControl)
62
+ {
63
+ this.plugins = plugins;
64
+ this.configs = configs;
65
+ this.finalControl = finalControl;
66
+ this.taskSources = ImmutableList.builder();
67
+ this.filterSchemas = ImmutableList.builder();
68
+ }
69
+
70
+ public void transaction(Schema inputSchema)
71
+ {
72
+ filterSchemas.add(inputSchema);
73
+ if (pos < plugins.size()) {
74
+ plugins.get(pos).transaction(configs.get(pos), inputSchema, new FilterPlugin.Control() {
75
+ public void run(TaskSource taskSource, Schema outputSchema)
76
+ {
77
+ taskSources.add(taskSource);
78
+ pos++;
79
+ transaction(outputSchema);
80
+ }
81
+ });
82
+ } else {
83
+ finalControl.run(taskSources.build(), filterSchemas.build());
84
+ }
85
+ }
86
+ }
87
+ }
@@ -3,6 +3,7 @@ package org.embulk;
3
3
  import java.util.Random;
4
4
  import org.junit.runner.Description;
5
5
  import org.junit.runners.model.Statement;
6
+ import com.google.inject.Injector;
6
7
  import com.google.inject.Binder;
7
8
  import com.google.inject.Module;
8
9
  import org.embulk.config.ConfigSource;
@@ -49,8 +50,9 @@ public class EmbulkTestRuntime
49
50
  public EmbulkTestRuntime()
50
51
  {
51
52
  super(new TestRuntimeModule());
52
- ConfigSource execConfig = new DataSourceImpl(null);
53
- this.exec = new ExecSession(getInjector(), execConfig);
53
+ Injector injector = getInjector();
54
+ ConfigSource execConfig = new DataSourceImpl(injector.getInstance(ModelManager.class));
55
+ this.exec = new ExecSession(injector, execConfig);
54
56
  }
55
57
 
56
58
  public ExecSession getExec()
@@ -43,6 +43,7 @@ public class TestFileInputRunner
43
43
  this.buffers = buffers;
44
44
  }
45
45
 
46
+ @Override
46
47
  public NextConfig transaction(ConfigSource config,
47
48
  FileInputPlugin.Control control)
48
49
  {
@@ -50,6 +51,21 @@ public class TestFileInputRunner
50
51
  return null;
51
52
  }
52
53
 
54
+ @Override
55
+ public NextConfig resume(TaskSource taskSource,
56
+ int processorCount,
57
+ FileInputPlugin.Control control)
58
+ {
59
+ throw new UnsupportedOperationException();
60
+ }
61
+
62
+ @Override
63
+ public void cleanup(TaskSource taskSource,
64
+ int processorCount,
65
+ List<CommitReport> successCommitReports)
66
+ {
67
+ }
68
+
53
69
  public TransactionalFileInput open(TaskSource taskSource,
54
70
  int processorIndex)
55
71
  {
@@ -41,6 +41,21 @@ public class TestFileOutputRunner
41
41
  return Exec.newNextConfig();
42
42
  }
43
43
 
44
+ @Override
45
+ public NextConfig resume(TaskSource taskSource,
46
+ int processorCount,
47
+ FileOutputPlugin.Control control)
48
+ {
49
+ throw new UnsupportedOperationException();
50
+ }
51
+
52
+ @Override
53
+ public void cleanup(TaskSource taskSource,
54
+ int processorCount,
55
+ List<CommitReport> successCommitReports)
56
+ {
57
+ }
58
+
44
59
  @Override
45
60
  public TransactionalFileOutput open(TaskSource taskSource,
46
61
  final int processorIndex)
@@ -5,7 +5,7 @@
5
5
  <parent>
6
6
  <groupId>org.embulk</groupId>
7
7
  <artifactId>embulk-parent</artifactId>
8
- <version>0.2.1-SNAPSHOT</version>
8
+ <version>0.3.0-SNAPSHOT</version>
9
9
  </parent>
10
10
 
11
11
  <artifactId>embulk-standards</artifactId>
@@ -56,12 +56,26 @@ public class LocalFileInputPlugin
56
56
  // list files recursively
57
57
  task.setFiles(listFiles(task));
58
58
 
59
- // run with threads. number of processors is same with number of files
60
- control.run(task.dump(), task.getFiles().size());
59
+ // number of processors is same with number of files
60
+ int processorCount = task.getFiles().size();
61
+ return resume(task.dump(), processorCount, control);
62
+ }
61
63
 
64
+ @Override
65
+ public NextConfig resume(TaskSource taskSource,
66
+ int processorCount,
67
+ FileInputPlugin.Control control)
68
+ {
69
+ control.run(taskSource, processorCount);
62
70
  return Exec.newNextConfig();
63
71
  }
64
72
 
73
+ @Override
74
+ public void cleanup(TaskSource taskSource,
75
+ int processorCount,
76
+ List<CommitReport> successCommitReports)
77
+ { }
78
+
65
79
  public List<String> listFiles(PluginTask task)
66
80
  {
67
81
  final ImmutableList.Builder<String> builder = ImmutableList.builder();
@@ -47,11 +47,24 @@ public class LocalFileOutputPlugin
47
47
  {
48
48
  PluginTask task = config.loadConfig(PluginTask.class);
49
49
 
50
- control.run(task.dump());
50
+ return resume(task.dump(), processorCount, control);
51
+ }
51
52
 
53
+ @Override
54
+ public NextConfig resume(TaskSource taskSource,
55
+ int processorCount,
56
+ FileOutputPlugin.Control control)
57
+ {
58
+ control.run(taskSource);
52
59
  return Exec.newNextConfig();
53
60
  }
54
61
 
62
+ @Override
63
+ public void cleanup(TaskSource taskSource,
64
+ int processorCount,
65
+ List<CommitReport> successCommitReports)
66
+ { }
67
+
55
68
  @Override
56
69
  public TransactionalFileOutput open(TaskSource taskSource, final int processorIndex)
57
70
  {
@@ -1,5 +1,6 @@
1
1
  package org.embulk.standards;
2
2
 
3
+ import java.util.List;
3
4
  import org.embulk.config.ConfigSource;
4
5
  import org.embulk.config.TaskSource;
5
6
  import org.embulk.config.NextConfig;
@@ -18,10 +19,22 @@ public class NullOutputPlugin
18
19
  Schema schema, int processorCount,
19
20
  OutputPlugin.Control control)
20
21
  {
21
- control.run(Exec.newTaskSource());
22
+ return resume(Exec.newTaskSource(), schema, processorCount, control);
23
+ }
24
+
25
+ public NextConfig resume(TaskSource taskSource,
26
+ Schema schema, int processorCount,
27
+ OutputPlugin.Control control)
28
+ {
29
+ control.run(taskSource);
22
30
  return Exec.newNextConfig();
23
31
  }
24
32
 
33
+ public void cleanup(TaskSource taskSource,
34
+ Schema schema, int processorCount,
35
+ List<CommitReport> successCommitReports)
36
+ { }
37
+
25
38
  @Override
26
39
  public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int processorIndex)
27
40
  {
@@ -77,13 +77,25 @@ public class S3FileInputPlugin
77
77
  task.setFiles(listFiles(task));
78
78
 
79
79
  // number of processors is same with number of files
80
+ int processorCount = task.getFiles().size();
81
+ return resume(task.dump(), processorCount, control);
82
+ }
80
83
 
81
- // run
82
- control.run(task.dump(), task.getFiles().size());
83
-
84
+ @Override
85
+ public NextConfig resume(TaskSource taskSource,
86
+ int processorCount,
87
+ FileInputPlugin.Control control)
88
+ {
89
+ control.run(taskSource, processorCount);
84
90
  return Exec.newNextConfig();
85
91
  }
86
92
 
93
+ @Override
94
+ public void cleanup(TaskSource taskSource,
95
+ int processorCount,
96
+ List<CommitReport> successCommitReports)
97
+ { }
98
+
87
99
  public static AWSCredentialsProvider getCredentialsProvider(PluginTask task)
88
100
  {
89
101
  final AWSCredentials cred = new BasicAWSCredentials(
@@ -1,5 +1,6 @@
1
1
  package org.embulk.standards;
2
2
 
3
+ import java.util.List;
3
4
  import org.embulk.config.ConfigSource;
4
5
  import org.embulk.config.TaskSource;
5
6
  import org.embulk.config.NextConfig;
@@ -30,10 +31,23 @@ public class StdoutOutputPlugin
30
31
  OutputPlugin.Control control)
31
32
  {
32
33
  final PluginTask task = config.loadConfig(PluginTask.class);
33
- control.run(task.dump());
34
+ return resume(task.dump(), schema, processorCount, control);
35
+ }
36
+
37
+ @Override
38
+ public NextConfig resume(TaskSource taskSource,
39
+ Schema schema, int processorCount,
40
+ OutputPlugin.Control control)
41
+ {
42
+ control.run(taskSource);
34
43
  return Exec.newNextConfig();
35
44
  }
36
45
 
46
+ public void cleanup(TaskSource taskSource,
47
+ Schema schema, int processorCount,
48
+ List<CommitReport> successCommitReports)
49
+ { }
50
+
37
51
  @Override
38
52
  public TransactionalPageOutput open(TaskSource taskSource, final Schema schema,
39
53
  int processorIndex)
@@ -51,6 +51,21 @@ module Embulk
51
51
  op.on('-o', '--output PATH', 'Path to a file to write the next configuration') do |path|
52
52
  options[:nextConfigOutputPath] = path
53
53
  end
54
+ op.on('-r', '--resume-state PATH', 'Path to a file to write or read resume state') do |path|
55
+ options[:resumeStatePath] = path
56
+ end
57
+ args = 1..1
58
+
59
+ when :cleanup
60
+ op.banner = "Usage: run <config.yml>"
61
+ op.on('-b', '--bundle BUNDLE_DIR', 'Path to a Gemfile directory') do |path|
62
+ end
63
+ op.on('-I', '--load-path PATH', 'Add ruby script directory path or jar file path') do |load_path|
64
+ load_paths << load_path
65
+ end
66
+ op.on('-r', '--resume-state PATH', 'Path to a file to write or read resume state') do |path|
67
+ options[:resumeStatePath] = path
68
+ end
54
69
  args = 1..1
55
70
 
56
71
  when :preview
@@ -118,7 +133,7 @@ module Embulk
118
133
  if __FILE__ =~ /^classpath:/ || __FILE__.include?('!/')
119
134
  # data is in embulk-core jar
120
135
  resource_class = org.embulk.command.Runner.java_class
121
- %w[.bundle/config embulk/input_example.rb embulk/output_example.rb Gemfile Gemfile.lock].each do |file| # TODO get file list from the jar
136
+ %w[.bundle/config embulk/input_example.rb embulk/filter_example.rb embulk/output_example.rb Gemfile Gemfile.lock].each do |file| # TODO get file list from the jar
122
137
  url = resource_class.resource("/embulk/data/bundle/#{file}").to_s
123
138
  dst = File.join(path, file)
124
139
  FileUtils.mkdir_p File.dirname(dst)
@@ -0,0 +1,42 @@
1
+ module Embulk
2
+ module Plugin
3
+
4
+ class FilterExample < FilterPlugin
5
+ # filter plugin file name must be: embulk/filter_<name>.rb
6
+ Plugin.register_filter('example', self)
7
+
8
+ def self.transaction(config, in_schema, &control)
9
+ task = {
10
+ 'key' => config.param('key', :string, default: "filter_key"),
11
+ 'value' => config.param('value', :string, default: "filter_value")
12
+ }
13
+
14
+ idx = in_schema.size
15
+ out_columns = in_schema + [Column.new(idx, task['key'], :string)]
16
+
17
+ puts "Example filter started."
18
+ yield(task, out_columns)
19
+ puts "Example filter finished."
20
+ end
21
+
22
+ def initialize(task, in_schema, out_schema, page_builder)
23
+ super
24
+ @value = task['value']
25
+ end
26
+
27
+ def close
28
+ end
29
+
30
+ def add(page)
31
+ page.each do |record|
32
+ @page_builder.add(record + [@value])
33
+ end
34
+ end
35
+
36
+ def finish
37
+ @page_builder.finish
38
+ end
39
+ end
40
+
41
+ end
42
+ end
@@ -1,44 +1,54 @@
1
1
  module Embulk
2
+ module Plugin
3
+
4
+ class InputExample < InputPlugin
5
+ # input plugin file name must be: embulk/input_<name>.rb
6
+ Plugin.register_input('example', self)
7
+
8
+ def self.transaction(config, &control)
9
+ files = ['file1', 'file2']
10
+ task = {
11
+ 'files' => files,
12
+ 'hostname' => config.param('hostname', :string, default: nil)
13
+ }
14
+
15
+ columns = [
16
+ Column.new(0, 'file', :string),
17
+ Column.new(1, 'hostname', :string),
18
+ Column.new(2, 'col0', :long),
19
+ Column.new(3, 'col1', :double),
20
+ ]
21
+
22
+ resume(task, columns, files.length, &control)
23
+ end
2
24
 
3
- class InputExample < InputPlugin
4
- # input plugin file name must be: embulk/input_<name>.rb
5
- Plugin.register_input('example', self)
6
-
7
- def self.transaction(config, &control)
8
- task = {
9
- 'message' => config.param('message', :string, default: nil)
10
- }
11
- threads = config.param('threads', :integer, default: 2)
12
-
13
- columns = [
14
- Column.new(0, 'col0', :long),
15
- Column.new(1, 'col1', :double),
16
- Column.new(2, 'col2', :string),
17
- ]
25
+ def self.resume(task, columns, count, &control)
26
+ puts "Example input started."
27
+ commit_reports = yield(task, columns, count)
28
+ puts "Example input finished. Commit reports = #{commit_reports.to_json}"
18
29
 
19
- puts "Example input started."
20
- commit_reports = yield(task, columns, threads)
21
- puts "Example input finished. Commit reports = #{commit_reports.to_json}"
30
+ next_config_diff = {}
31
+ return next_config_diff
32
+ end
22
33
 
23
- return {}
24
- end
34
+ def initialize(task, schema, index, page_builder)
35
+ super
36
+ @file = task['files'][index]
37
+ @hostname = task['hostname']
38
+ end
25
39
 
26
- def initialize(task, schema, index, page_builder)
27
- super
28
- end
40
+ def run
41
+ puts "Example input thread #{@index}..."
29
42
 
30
- def run
31
- puts "Example input thread #{@index}..."
43
+ 10.times do |i|
44
+ @page_builder.add([@file, @hostname, i, 10.0])
45
+ end
46
+ @page_builder.finish # don't forget to call finish :-)
32
47
 
33
- 10.times do |i|
34
- @page_builder.add([i, 10.0, "example"])
48
+ commit_report = {}
49
+ return commit_report
35
50
  end
36
- @page_builder.finish # don't forget to call finish :-)
37
-
38
- commit_report = {
39
- }
40
- return commit_report
41
51
  end
42
- end
43
52
 
53
+ end
44
54
  end