embulk 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +8 -11
  3. data/build.gradle +22 -2
  4. data/embulk-core/src/main/java/org/embulk/command/Runner.java +7 -0
  5. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +8 -6
  6. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +54 -46
  7. data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +20 -3
  8. data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +41 -10
  9. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +1 -1
  10. data/embulk-core/src/main/java/org/embulk/exec/SetCurrentThreadName.java +19 -0
  11. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +5 -0
  12. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +24 -0
  13. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +4 -4
  14. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +8 -8
  15. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +4 -4
  16. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +8 -8
  17. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +4 -4
  18. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +4 -4
  19. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +1 -1
  20. data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +32 -8
  21. data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +1 -1
  22. data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +3 -4
  23. data/embulk-core/src/main/java/org/embulk/spi/util/OutputStreamFileOutput.java +88 -0
  24. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +1 -1
  25. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +5 -5
  26. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +4 -4
  27. data/embulk-docs/src/release.rst +1 -0
  28. data/embulk-docs/src/release/release-0.4.4.rst +39 -0
  29. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +32 -7
  30. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +9 -9
  31. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +6 -6
  32. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +5 -5
  33. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +5 -5
  34. data/lib/embulk/command/embulk_run.rb +14 -2
  35. data/lib/embulk/data/new/java/file_input.java.erb +7 -7
  36. data/lib/embulk/data/new/java/file_output.java.erb +5 -5
  37. data/lib/embulk/data/new/java/input.java.erb +6 -6
  38. data/lib/embulk/data/new/java/output.java.erb +5 -5
  39. data/lib/embulk/data_source.rb +3 -3
  40. data/lib/embulk/guess_plugin.rb +5 -5
  41. data/lib/embulk/input_plugin.rb +7 -7
  42. data/lib/embulk/output_plugin.rb +7 -7
  43. data/lib/embulk/version.rb +1 -1
  44. metadata +7 -4
@@ -27,13 +27,13 @@ public class <%= java_class_name %>
27
27
  }
28
28
 
29
29
  @Override
30
- public ConfigDiff transaction(ConfigSource config, int processorCount,
30
+ public ConfigDiff transaction(ConfigSource config, int taskCount,
31
31
  FileOutputPlugin.Control control)
32
32
  {
33
33
  PluginTask task = config.loadConfig(PluginTask.class);
34
34
 
35
35
  // retryable (idempotent) output:
36
- // return resume(task.dump(), processorCount, control);
36
+ // return resume(task.dump(), taskCount, control);
37
37
 
38
38
  // non-retryable (non-idempotent) output:
39
39
  control.run(task.dump());
@@ -42,7 +42,7 @@ public class <%= java_class_name %>
42
42
 
43
43
  @Override
44
44
  public ConfigDiff resume(TaskSource taskSource,
45
- int processorCount,
45
+ int taskCount,
46
46
  FileOutputPlugin.Control control)
47
47
  {
48
48
  throw new UnsupportedOperationException("<%= name %> output plugin does not support resuming");
@@ -50,13 +50,13 @@ public class <%= java_class_name %>
50
50
 
51
51
  @Override
52
52
  public void cleanup(TaskSource taskSource,
53
- int processorCount,
53
+ int taskCount,
54
54
  List<CommitReport> successCommitReports)
55
55
  {
56
56
  }
57
57
 
58
58
  @Override
59
- public TransactionalFileOutput open(TaskSource taskSource, final int processorIndex)
59
+ public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex)
60
60
  {
61
61
  PluginTask task = taskSource.loadTask(PluginTask.class);
62
62
 
@@ -38,27 +38,27 @@ public class <%= java_class_name %>
38
38
  PluginTask task = config.loadConfig(PluginTask.class);
39
39
 
40
40
  Schema schema = task.getColumns().toSchema();
41
- int processorCount = 1; // number of run() method calls
41
+ int taskCount = 1; // number of run() method calls
42
42
 
43
- return resume(task.dump(), schema, processorCount, control);
43
+ return resume(task.dump(), schema, taskCount, control);
44
44
  }
45
45
 
46
46
  public ConfigDiff resume(TaskSource taskSource,
47
- Schema schema, int processorCount,
47
+ Schema schema, int taskCount,
48
48
  InputPlugin.Control control)
49
49
  {
50
- control.run(taskSource, schema, processorCount);
50
+ control.run(taskSource, schema, taskCount);
51
51
  return Exec.newConfigDiff();
52
52
  }
53
53
 
54
54
  public void cleanup(TaskSource taskSource,
55
- Schema schema, int processorCount,
55
+ Schema schema, int taskCount,
56
56
  List<CommitReport> successCommitReports)
57
57
  {
58
58
  }
59
59
 
60
60
  public CommitReport run(TaskSource taskSource,
61
- Schema schema, int processorIndex,
61
+ Schema schema, int taskIndex,
62
62
  PageOutput output)
63
63
  {
64
64
  PluginTask task = taskSource.loadTask(PluginTask.class);
@@ -29,13 +29,13 @@ public class <%= java_class_name %>
29
29
  }
30
30
 
31
31
  public ConfigDiff transaction(ConfigSource config,
32
- Schema schema, int processorCount,
32
+ Schema schema, int taskCount,
33
33
  OutputPlugin.Control control)
34
34
  {
35
35
  PluginTask task = config.loadConfig(PluginTask.class);
36
36
 
37
37
  // retryable (idempotent) output:
38
- // return resume(task.dump(), schema, processorCount, control);
38
+ // return resume(task.dump(), schema, taskCount, control);
39
39
 
40
40
  // non-retryable (non-idempotent) output:
41
41
  control.run(task.dump());
@@ -43,19 +43,19 @@ public class <%= java_class_name %>
43
43
  }
44
44
 
45
45
  public ConfigDiff resume(TaskSource taskSource,
46
- Schema schema, int processorCount,
46
+ Schema schema, int taskCount,
47
47
  OutputPlugin.Control control)
48
48
  {
49
49
  throw new UnsupportedOperationException("<%= name %> output plugin does not support resuming");
50
50
  }
51
51
 
52
52
  public void cleanup(TaskSource taskSource,
53
- Schema schema, int processorCount,
53
+ Schema schema, int taskCount,
54
54
  List<CommitReport> successCommitReports)
55
55
  {
56
56
  }
57
57
 
58
- public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int processorIndex)
58
+ public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int taskIndex)
59
59
  {
60
60
  PluginTask task = taskSource.loadTask(PluginTask.class);
61
61
 
@@ -12,15 +12,15 @@ module Embulk
12
12
  when :float
13
13
  Float(v)
14
14
  when :string
15
- String(v)
15
+ String(v).dup
16
16
  when :bool
17
17
  !!v # TODO validation
18
18
  when :hash
19
19
  raise ArgumentError, "Invalid value for :hash" unless v.is_a?(Hash)
20
- v
20
+ DataSource.new.merge!(v)
21
21
  when :array
22
22
  raise ArgumentError, "Invalid value for :array" unless v.is_a?(Array)
23
- v
23
+ v.dup
24
24
  else
25
25
  unless type.respond_to?(:load)
26
26
  raise ArgumentError, "Unknown type #{type.to_s.dump}"
@@ -47,7 +47,7 @@ module Embulk
47
47
  def guess(config, sample)
48
48
  # TODO pure-ruby LineDecoder implementation?
49
49
  begin
50
- task = config.load_config(Java::LineDecoder::DecoderTask)
50
+ parser_task = config.param("parser", :hash, default: {}).load_config(Java::LineDecoder::DecoderTask)
51
51
  rescue
52
52
  # TODO log?
53
53
  p $!
@@ -55,7 +55,7 @@ module Embulk
55
55
  return DataSource.new
56
56
  end
57
57
 
58
- decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), task)
58
+ decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), parser_task)
59
59
  sample_text = ''
60
60
  while decoder.nextFile
61
61
  first = true
@@ -63,7 +63,7 @@ module Embulk
63
63
  if first
64
64
  first = false
65
65
  else
66
- sample_text << task.getNewline().getString()
66
+ sample_text << parser_task.getNewline().getString()
67
67
  end
68
68
  sample_text << line
69
69
  end
@@ -81,7 +81,7 @@ module Embulk
81
81
  def guess(config, sample)
82
82
  # TODO pure-ruby LineDecoder implementation?
83
83
  begin
84
- task = config.load_config(Java::LineDecoder::DecoderTask)
84
+ parser_task = config.param("parser", :hash, default: {}).load_config(Java::LineDecoder::DecoderTask)
85
85
  rescue
86
86
  # TODO log?
87
87
  p $!
@@ -89,7 +89,7 @@ module Embulk
89
89
  return DataSource.new
90
90
  end
91
91
 
92
- decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), task)
92
+ decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), parser_task)
93
93
  sample_lines = []
94
94
  while decoder.nextFile
95
95
  while line = decoder.poll
@@ -46,10 +46,10 @@ module Embulk
46
46
 
47
47
  def transaction(java_config, java_control)
48
48
  config = DataSource.from_java(java_config)
49
- config_diff_hash = @ruby_class.transaction(config) do |task_source_hash,columns,processor_count|
49
+ config_diff_hash = @ruby_class.transaction(config) do |task_source_hash,columns,task_count|
50
50
  java_task_source = DataSource.from_ruby_hash(task_source_hash).to_java
51
51
  java_schema = Schema.new(columns).to_java
52
- java_commit_reports = java_control.run(java_task_source, java_schema, processor_count)
52
+ java_commit_reports = java_control.run(java_task_source, java_schema, task_count)
53
53
  java_commit_reports.map {|java_commit_report|
54
54
  DataSource.from_java(java_commit_report)
55
55
  }
@@ -58,13 +58,13 @@ module Embulk
58
58
  return DataSource.from_ruby_hash(config_diff_hash).to_java
59
59
  end
60
60
 
61
- def resume(java_task_source, java_schema, processor_count, java_control)
61
+ def resume(java_task_source, java_schema, task_count, java_control)
62
62
  task_source = DataSource.from_java(java_task_source)
63
63
  schema = Schema.from_java(java_schema)
64
- config_diff_hash = @ruby_class.resume(task_source, schema, processor_count) do |task_source_hash,columns,processor_count|
64
+ config_diff_hash = @ruby_class.resume(task_source, schema, task_count) do |task_source_hash,columns,task_count|
65
65
  java_task_source = DataSource.from_ruby_hash(task_source_hash).to_java
66
66
  java_schema = Schema.new(columns).to_java
67
- java_commit_reports = java_control.run(java_task_source, java_schema, processor_count)
67
+ java_commit_reports = java_control.run(java_task_source, java_schema, task_count)
68
68
  java_commit_reports.map {|java_commit_report|
69
69
  DataSource.from_java(java_commit_report)
70
70
  }
@@ -73,11 +73,11 @@ module Embulk
73
73
  return DataSource.from_ruby_hash(config_diff_hash).to_java
74
74
  end
75
75
 
76
- def cleanup(java_task_source, java_schema, processor_count, java_commit_reports)
76
+ def cleanup(java_task_source, java_schema, task_count, java_commit_reports)
77
77
  task_source = DataSource.from_java(java_task_source)
78
78
  schema = Schema.from_java(java_schema)
79
79
  commit_reports = java_commit_reports.map {|c| DataSource.from_java(c) }
80
- @ruby_class.cleanup(task_source, schema, processor_count, commit_reports)
80
+ @ruby_class.cleanup(task_source, schema, task_count, commit_reports)
81
81
  return nil
82
82
  end
83
83
 
@@ -5,7 +5,7 @@ module Embulk
5
5
  require 'embulk/page'
6
6
 
7
7
  class OutputPlugin
8
- def self.transaction(config, schema, processor_count, &control)
8
+ def self.transaction(config, schema, task_count, &control)
9
9
  yield(config)
10
10
  return {}
11
11
  end
@@ -59,10 +59,10 @@ module Embulk
59
59
  @ruby_class = ruby_class
60
60
  end
61
61
 
62
- def transaction(java_config, java_schema, processor_count, java_control)
62
+ def transaction(java_config, java_schema, task_count, java_control)
63
63
  config = DataSource.from_java(java_config)
64
64
  schema = Schema.from_java(java_schema)
65
- config_diff_hash = @ruby_class.transaction(config, schema, processor_count) do |task_source_hash|
65
+ config_diff_hash = @ruby_class.transaction(config, schema, task_count) do |task_source_hash|
66
66
  java_task_source = DataSource.from_ruby_hash(task_source_hash).to_java
67
67
  java_commit_reports = java_control.run(java_task_source)
68
68
  java_commit_reports.map {|java_commit_report|
@@ -73,10 +73,10 @@ module Embulk
73
73
  return DataSource.from_ruby_hash(config_diff_hash).to_java
74
74
  end
75
75
 
76
- def resume(java_task_source, java_schema, processor_count, java_control)
76
+ def resume(java_task_source, java_schema, task_count, java_control)
77
77
  task_source = DataSource.from_java(java_task_source)
78
78
  schema = Schema.from_java(java_schema)
79
- config_diff_hash = @ruby_class.resume(task_source, schema, processor_count) do |task_source_hash,columns,processor_count|
79
+ config_diff_hash = @ruby_class.resume(task_source, schema, task_count) do |task_source_hash,columns,task_count|
80
80
  java_task_source = DataSource.from_ruby_hash(task_source_hash).to_java
81
81
  java_commit_reports = java_control.run(java_task_source)
82
82
  java_commit_reports.map {|java_commit_report|
@@ -87,11 +87,11 @@ module Embulk
87
87
  return DataSource.from_ruby_hash(config_diff_hash).to_java
88
88
  end
89
89
 
90
- def cleanup(java_task_source, java_schema, processor_count, java_commit_reports)
90
+ def cleanup(java_task_source, java_schema, task_count, java_commit_reports)
91
91
  task_source = DataSource.from_java(java_task_source)
92
92
  schema = Schema.from_java(java_schema)
93
93
  commit_reports = java_commit_reports.map {|c| DataSource.from_java(c) }
94
- @ruby_class.cleanup(task_source, schema, processor_count, commit_reports)
94
+ @ruby_class.cleanup(task_source, schema, task_count, commit_reports)
95
95
  return nil
96
96
  end
97
97
 
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = "0.4.3"
2
+ VERSION = '0.4.4'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.3
4
+ version: 0.4.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-18 00:00:00.000000000 Z
11
+ date: 2015-02-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -152,6 +152,7 @@ files:
152
152
  - embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java
153
153
  - embulk-core/src/main/java/org/embulk/exec/ResumeState.java
154
154
  - embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java
155
+ - embulk-core/src/main/java/org/embulk/exec/SetCurrentThreadName.java
155
156
  - embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java
156
157
  - embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java
157
158
  - embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java
@@ -226,6 +227,7 @@ files:
226
227
  - embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java
227
228
  - embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java
228
229
  - embulk-core/src/main/java/org/embulk/spi/util/Newline.java
230
+ - embulk-core/src/main/java/org/embulk/spi/util/OutputStreamFileOutput.java
229
231
  - embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java
230
232
  - embulk-core/src/main/java/org/embulk/spi/util/Pages.java
231
233
  - embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java
@@ -268,6 +270,7 @@ files:
268
270
  - embulk-docs/src/release/release-0.4.1.rst
269
271
  - embulk-docs/src/release/release-0.4.2.rst
270
272
  - embulk-docs/src/release/release-0.4.3.rst
273
+ - embulk-docs/src/release/release-0.4.4.rst
271
274
  - embulk-standards/build.gradle
272
275
  - embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
273
276
  - embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
@@ -362,8 +365,8 @@ files:
362
365
  - classpath/bval-jsr303-0.5.jar
363
366
  - classpath/commons-beanutils-core-1.8.3.jar
364
367
  - classpath/commons-lang3-3.1.jar
365
- - classpath/embulk-core-0.4.3.jar
366
- - classpath/embulk-standards-0.4.3.jar
368
+ - classpath/embulk-core-0.4.4.jar
369
+ - classpath/embulk-standards-0.4.4.jar
367
370
  - classpath/guava-18.0.jar
368
371
  - classpath/guice-3.0.jar
369
372
  - classpath/guice-multibindings-3.0.jar