embulk 0.4.3 → 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +8 -11
  3. data/build.gradle +22 -2
  4. data/embulk-core/src/main/java/org/embulk/command/Runner.java +7 -0
  5. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +8 -6
  6. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +54 -46
  7. data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +20 -3
  8. data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +41 -10
  9. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +1 -1
  10. data/embulk-core/src/main/java/org/embulk/exec/SetCurrentThreadName.java +19 -0
  11. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +5 -0
  12. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +24 -0
  13. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +4 -4
  14. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +8 -8
  15. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +4 -4
  16. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +8 -8
  17. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +4 -4
  18. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +4 -4
  19. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +1 -1
  20. data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +32 -8
  21. data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +1 -1
  22. data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +3 -4
  23. data/embulk-core/src/main/java/org/embulk/spi/util/OutputStreamFileOutput.java +88 -0
  24. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +1 -1
  25. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +5 -5
  26. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +4 -4
  27. data/embulk-docs/src/release.rst +1 -0
  28. data/embulk-docs/src/release/release-0.4.4.rst +39 -0
  29. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +32 -7
  30. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +9 -9
  31. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +6 -6
  32. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +5 -5
  33. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +5 -5
  34. data/lib/embulk/command/embulk_run.rb +14 -2
  35. data/lib/embulk/data/new/java/file_input.java.erb +7 -7
  36. data/lib/embulk/data/new/java/file_output.java.erb +5 -5
  37. data/lib/embulk/data/new/java/input.java.erb +6 -6
  38. data/lib/embulk/data/new/java/output.java.erb +5 -5
  39. data/lib/embulk/data_source.rb +3 -3
  40. data/lib/embulk/guess_plugin.rb +5 -5
  41. data/lib/embulk/input_plugin.rb +7 -7
  42. data/lib/embulk/output_plugin.rb +7 -7
  43. data/lib/embulk/version.rb +1 -1
  44. metadata +7 -4
@@ -27,13 +27,13 @@ public class <%= java_class_name %>
27
27
  }
28
28
 
29
29
  @Override
30
- public ConfigDiff transaction(ConfigSource config, int processorCount,
30
+ public ConfigDiff transaction(ConfigSource config, int taskCount,
31
31
  FileOutputPlugin.Control control)
32
32
  {
33
33
  PluginTask task = config.loadConfig(PluginTask.class);
34
34
 
35
35
  // retryable (idempotent) output:
36
- // return resume(task.dump(), processorCount, control);
36
+ // return resume(task.dump(), taskCount, control);
37
37
 
38
38
  // non-retryable (non-idempotent) output:
39
39
  control.run(task.dump());
@@ -42,7 +42,7 @@ public class <%= java_class_name %>
42
42
 
43
43
  @Override
44
44
  public ConfigDiff resume(TaskSource taskSource,
45
- int processorCount,
45
+ int taskCount,
46
46
  FileOutputPlugin.Control control)
47
47
  {
48
48
  throw new UnsupportedOperationException("<%= name %> output plugin does not support resuming");
@@ -50,13 +50,13 @@ public class <%= java_class_name %>
50
50
 
51
51
  @Override
52
52
  public void cleanup(TaskSource taskSource,
53
- int processorCount,
53
+ int taskCount,
54
54
  List<CommitReport> successCommitReports)
55
55
  {
56
56
  }
57
57
 
58
58
  @Override
59
- public TransactionalFileOutput open(TaskSource taskSource, final int processorIndex)
59
+ public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex)
60
60
  {
61
61
  PluginTask task = taskSource.loadTask(PluginTask.class);
62
62
 
@@ -38,27 +38,27 @@ public class <%= java_class_name %>
38
38
  PluginTask task = config.loadConfig(PluginTask.class);
39
39
 
40
40
  Schema schema = task.getColumns().toSchema();
41
- int processorCount = 1; // number of run() method calls
41
+ int taskCount = 1; // number of run() method calls
42
42
 
43
- return resume(task.dump(), schema, processorCount, control);
43
+ return resume(task.dump(), schema, taskCount, control);
44
44
  }
45
45
 
46
46
  public ConfigDiff resume(TaskSource taskSource,
47
- Schema schema, int processorCount,
47
+ Schema schema, int taskCount,
48
48
  InputPlugin.Control control)
49
49
  {
50
- control.run(taskSource, schema, processorCount);
50
+ control.run(taskSource, schema, taskCount);
51
51
  return Exec.newConfigDiff();
52
52
  }
53
53
 
54
54
  public void cleanup(TaskSource taskSource,
55
- Schema schema, int processorCount,
55
+ Schema schema, int taskCount,
56
56
  List<CommitReport> successCommitReports)
57
57
  {
58
58
  }
59
59
 
60
60
  public CommitReport run(TaskSource taskSource,
61
- Schema schema, int processorIndex,
61
+ Schema schema, int taskIndex,
62
62
  PageOutput output)
63
63
  {
64
64
  PluginTask task = taskSource.loadTask(PluginTask.class);
@@ -29,13 +29,13 @@ public class <%= java_class_name %>
29
29
  }
30
30
 
31
31
  public ConfigDiff transaction(ConfigSource config,
32
- Schema schema, int processorCount,
32
+ Schema schema, int taskCount,
33
33
  OutputPlugin.Control control)
34
34
  {
35
35
  PluginTask task = config.loadConfig(PluginTask.class);
36
36
 
37
37
  // retryable (idempotent) output:
38
- // return resume(task.dump(), schema, processorCount, control);
38
+ // return resume(task.dump(), schema, taskCount, control);
39
39
 
40
40
  // non-retryable (non-idempotent) output:
41
41
  control.run(task.dump());
@@ -43,19 +43,19 @@ public class <%= java_class_name %>
43
43
  }
44
44
 
45
45
  public ConfigDiff resume(TaskSource taskSource,
46
- Schema schema, int processorCount,
46
+ Schema schema, int taskCount,
47
47
  OutputPlugin.Control control)
48
48
  {
49
49
  throw new UnsupportedOperationException("<%= name %> output plugin does not support resuming");
50
50
  }
51
51
 
52
52
  public void cleanup(TaskSource taskSource,
53
- Schema schema, int processorCount,
53
+ Schema schema, int taskCount,
54
54
  List<CommitReport> successCommitReports)
55
55
  {
56
56
  }
57
57
 
58
- public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int processorIndex)
58
+ public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int taskIndex)
59
59
  {
60
60
  PluginTask task = taskSource.loadTask(PluginTask.class);
61
61
 
@@ -12,15 +12,15 @@ module Embulk
12
12
  when :float
13
13
  Float(v)
14
14
  when :string
15
- String(v)
15
+ String(v).dup
16
16
  when :bool
17
17
  !!v # TODO validation
18
18
  when :hash
19
19
  raise ArgumentError, "Invalid value for :hash" unless v.is_a?(Hash)
20
- v
20
+ DataSource.new.merge!(v)
21
21
  when :array
22
22
  raise ArgumentError, "Invalid value for :array" unless v.is_a?(Array)
23
- v
23
+ v.dup
24
24
  else
25
25
  unless type.respond_to?(:load)
26
26
  raise ArgumentError, "Unknown type #{type.to_s.dump}"
@@ -47,7 +47,7 @@ module Embulk
47
47
  def guess(config, sample)
48
48
  # TODO pure-ruby LineDecoder implementation?
49
49
  begin
50
- task = config.load_config(Java::LineDecoder::DecoderTask)
50
+ parser_task = config.param("parser", :hash, default: {}).load_config(Java::LineDecoder::DecoderTask)
51
51
  rescue
52
52
  # TODO log?
53
53
  p $!
@@ -55,7 +55,7 @@ module Embulk
55
55
  return DataSource.new
56
56
  end
57
57
 
58
- decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), task)
58
+ decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), parser_task)
59
59
  sample_text = ''
60
60
  while decoder.nextFile
61
61
  first = true
@@ -63,7 +63,7 @@ module Embulk
63
63
  if first
64
64
  first = false
65
65
  else
66
- sample_text << task.getNewline().getString()
66
+ sample_text << parser_task.getNewline().getString()
67
67
  end
68
68
  sample_text << line
69
69
  end
@@ -81,7 +81,7 @@ module Embulk
81
81
  def guess(config, sample)
82
82
  # TODO pure-ruby LineDecoder implementation?
83
83
  begin
84
- task = config.load_config(Java::LineDecoder::DecoderTask)
84
+ parser_task = config.param("parser", :hash, default: {}).load_config(Java::LineDecoder::DecoderTask)
85
85
  rescue
86
86
  # TODO log?
87
87
  p $!
@@ -89,7 +89,7 @@ module Embulk
89
89
  return DataSource.new
90
90
  end
91
91
 
92
- decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), task)
92
+ decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), parser_task)
93
93
  sample_lines = []
94
94
  while decoder.nextFile
95
95
  while line = decoder.poll
@@ -46,10 +46,10 @@ module Embulk
46
46
 
47
47
  def transaction(java_config, java_control)
48
48
  config = DataSource.from_java(java_config)
49
- config_diff_hash = @ruby_class.transaction(config) do |task_source_hash,columns,processor_count|
49
+ config_diff_hash = @ruby_class.transaction(config) do |task_source_hash,columns,task_count|
50
50
  java_task_source = DataSource.from_ruby_hash(task_source_hash).to_java
51
51
  java_schema = Schema.new(columns).to_java
52
- java_commit_reports = java_control.run(java_task_source, java_schema, processor_count)
52
+ java_commit_reports = java_control.run(java_task_source, java_schema, task_count)
53
53
  java_commit_reports.map {|java_commit_report|
54
54
  DataSource.from_java(java_commit_report)
55
55
  }
@@ -58,13 +58,13 @@ module Embulk
58
58
  return DataSource.from_ruby_hash(config_diff_hash).to_java
59
59
  end
60
60
 
61
- def resume(java_task_source, java_schema, processor_count, java_control)
61
+ def resume(java_task_source, java_schema, task_count, java_control)
62
62
  task_source = DataSource.from_java(java_task_source)
63
63
  schema = Schema.from_java(java_schema)
64
- config_diff_hash = @ruby_class.resume(task_source, schema, processor_count) do |task_source_hash,columns,processor_count|
64
+ config_diff_hash = @ruby_class.resume(task_source, schema, task_count) do |task_source_hash,columns,task_count|
65
65
  java_task_source = DataSource.from_ruby_hash(task_source_hash).to_java
66
66
  java_schema = Schema.new(columns).to_java
67
- java_commit_reports = java_control.run(java_task_source, java_schema, processor_count)
67
+ java_commit_reports = java_control.run(java_task_source, java_schema, task_count)
68
68
  java_commit_reports.map {|java_commit_report|
69
69
  DataSource.from_java(java_commit_report)
70
70
  }
@@ -73,11 +73,11 @@ module Embulk
73
73
  return DataSource.from_ruby_hash(config_diff_hash).to_java
74
74
  end
75
75
 
76
- def cleanup(java_task_source, java_schema, processor_count, java_commit_reports)
76
+ def cleanup(java_task_source, java_schema, task_count, java_commit_reports)
77
77
  task_source = DataSource.from_java(java_task_source)
78
78
  schema = Schema.from_java(java_schema)
79
79
  commit_reports = java_commit_reports.map {|c| DataSource.from_java(c) }
80
- @ruby_class.cleanup(task_source, schema, processor_count, commit_reports)
80
+ @ruby_class.cleanup(task_source, schema, task_count, commit_reports)
81
81
  return nil
82
82
  end
83
83
 
@@ -5,7 +5,7 @@ module Embulk
5
5
  require 'embulk/page'
6
6
 
7
7
  class OutputPlugin
8
- def self.transaction(config, schema, processor_count, &control)
8
+ def self.transaction(config, schema, task_count, &control)
9
9
  yield(config)
10
10
  return {}
11
11
  end
@@ -59,10 +59,10 @@ module Embulk
59
59
  @ruby_class = ruby_class
60
60
  end
61
61
 
62
- def transaction(java_config, java_schema, processor_count, java_control)
62
+ def transaction(java_config, java_schema, task_count, java_control)
63
63
  config = DataSource.from_java(java_config)
64
64
  schema = Schema.from_java(java_schema)
65
- config_diff_hash = @ruby_class.transaction(config, schema, processor_count) do |task_source_hash|
65
+ config_diff_hash = @ruby_class.transaction(config, schema, task_count) do |task_source_hash|
66
66
  java_task_source = DataSource.from_ruby_hash(task_source_hash).to_java
67
67
  java_commit_reports = java_control.run(java_task_source)
68
68
  java_commit_reports.map {|java_commit_report|
@@ -73,10 +73,10 @@ module Embulk
73
73
  return DataSource.from_ruby_hash(config_diff_hash).to_java
74
74
  end
75
75
 
76
- def resume(java_task_source, java_schema, processor_count, java_control)
76
+ def resume(java_task_source, java_schema, task_count, java_control)
77
77
  task_source = DataSource.from_java(java_task_source)
78
78
  schema = Schema.from_java(java_schema)
79
- config_diff_hash = @ruby_class.resume(task_source, schema, processor_count) do |task_source_hash,columns,processor_count|
79
+ config_diff_hash = @ruby_class.resume(task_source, schema, task_count) do |task_source_hash,columns,task_count|
80
80
  java_task_source = DataSource.from_ruby_hash(task_source_hash).to_java
81
81
  java_commit_reports = java_control.run(java_task_source)
82
82
  java_commit_reports.map {|java_commit_report|
@@ -87,11 +87,11 @@ module Embulk
87
87
  return DataSource.from_ruby_hash(config_diff_hash).to_java
88
88
  end
89
89
 
90
- def cleanup(java_task_source, java_schema, processor_count, java_commit_reports)
90
+ def cleanup(java_task_source, java_schema, task_count, java_commit_reports)
91
91
  task_source = DataSource.from_java(java_task_source)
92
92
  schema = Schema.from_java(java_schema)
93
93
  commit_reports = java_commit_reports.map {|c| DataSource.from_java(c) }
94
- @ruby_class.cleanup(task_source, schema, processor_count, commit_reports)
94
+ @ruby_class.cleanup(task_source, schema, task_count, commit_reports)
95
95
  return nil
96
96
  end
97
97
 
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = "0.4.3"
2
+ VERSION = '0.4.4'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.3
4
+ version: 0.4.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-18 00:00:00.000000000 Z
11
+ date: 2015-02-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -152,6 +152,7 @@ files:
152
152
  - embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java
153
153
  - embulk-core/src/main/java/org/embulk/exec/ResumeState.java
154
154
  - embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java
155
+ - embulk-core/src/main/java/org/embulk/exec/SetCurrentThreadName.java
155
156
  - embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java
156
157
  - embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java
157
158
  - embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java
@@ -226,6 +227,7 @@ files:
226
227
  - embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java
227
228
  - embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java
228
229
  - embulk-core/src/main/java/org/embulk/spi/util/Newline.java
230
+ - embulk-core/src/main/java/org/embulk/spi/util/OutputStreamFileOutput.java
229
231
  - embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java
230
232
  - embulk-core/src/main/java/org/embulk/spi/util/Pages.java
231
233
  - embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java
@@ -268,6 +270,7 @@ files:
268
270
  - embulk-docs/src/release/release-0.4.1.rst
269
271
  - embulk-docs/src/release/release-0.4.2.rst
270
272
  - embulk-docs/src/release/release-0.4.3.rst
273
+ - embulk-docs/src/release/release-0.4.4.rst
271
274
  - embulk-standards/build.gradle
272
275
  - embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
273
276
  - embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
@@ -362,8 +365,8 @@ files:
362
365
  - classpath/bval-jsr303-0.5.jar
363
366
  - classpath/commons-beanutils-core-1.8.3.jar
364
367
  - classpath/commons-lang3-3.1.jar
365
- - classpath/embulk-core-0.4.3.jar
366
- - classpath/embulk-standards-0.4.3.jar
368
+ - classpath/embulk-core-0.4.4.jar
369
+ - classpath/embulk-standards-0.4.4.jar
367
370
  - classpath/guava-18.0.jar
368
371
  - classpath/guice-3.0.jar
369
372
  - classpath/guice-multibindings-3.0.jar