embulk 0.4.3 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +8 -11
- data/build.gradle +22 -2
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +7 -0
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +8 -6
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +54 -46
- data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +20 -3
- data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +41 -10
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/SetCurrentThreadName.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +5 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +24 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +4 -4
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +8 -8
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +4 -4
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +8 -8
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +4 -4
- data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +4 -4
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +32 -8
- data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +3 -4
- data/embulk-core/src/main/java/org/embulk/spi/util/OutputStreamFileOutput.java +88 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +1 -1
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +5 -5
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +4 -4
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.4.4.rst +39 -0
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +32 -7
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +9 -9
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +6 -6
- data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +5 -5
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +5 -5
- data/lib/embulk/command/embulk_run.rb +14 -2
- data/lib/embulk/data/new/java/file_input.java.erb +7 -7
- data/lib/embulk/data/new/java/file_output.java.erb +5 -5
- data/lib/embulk/data/new/java/input.java.erb +6 -6
- data/lib/embulk/data/new/java/output.java.erb +5 -5
- data/lib/embulk/data_source.rb +3 -3
- data/lib/embulk/guess_plugin.rb +5 -5
- data/lib/embulk/input_plugin.rb +7 -7
- data/lib/embulk/output_plugin.rb +7 -7
- data/lib/embulk/version.rb +1 -1
- metadata +7 -4
@@ -27,13 +27,13 @@ public class <%= java_class_name %>
|
|
27
27
|
}
|
28
28
|
|
29
29
|
@Override
|
30
|
-
public ConfigDiff transaction(ConfigSource config, int
|
30
|
+
public ConfigDiff transaction(ConfigSource config, int taskCount,
|
31
31
|
FileOutputPlugin.Control control)
|
32
32
|
{
|
33
33
|
PluginTask task = config.loadConfig(PluginTask.class);
|
34
34
|
|
35
35
|
// retryable (idempotent) output:
|
36
|
-
// return resume(task.dump(),
|
36
|
+
// return resume(task.dump(), taskCount, control);
|
37
37
|
|
38
38
|
// non-retryable (non-idempotent) output:
|
39
39
|
control.run(task.dump());
|
@@ -42,7 +42,7 @@ public class <%= java_class_name %>
|
|
42
42
|
|
43
43
|
@Override
|
44
44
|
public ConfigDiff resume(TaskSource taskSource,
|
45
|
-
int
|
45
|
+
int taskCount,
|
46
46
|
FileOutputPlugin.Control control)
|
47
47
|
{
|
48
48
|
throw new UnsupportedOperationException("<%= name %> output plugin does not support resuming");
|
@@ -50,13 +50,13 @@ public class <%= java_class_name %>
|
|
50
50
|
|
51
51
|
@Override
|
52
52
|
public void cleanup(TaskSource taskSource,
|
53
|
-
int
|
53
|
+
int taskCount,
|
54
54
|
List<CommitReport> successCommitReports)
|
55
55
|
{
|
56
56
|
}
|
57
57
|
|
58
58
|
@Override
|
59
|
-
public TransactionalFileOutput open(TaskSource taskSource, final int
|
59
|
+
public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex)
|
60
60
|
{
|
61
61
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
62
62
|
|
@@ -38,27 +38,27 @@ public class <%= java_class_name %>
|
|
38
38
|
PluginTask task = config.loadConfig(PluginTask.class);
|
39
39
|
|
40
40
|
Schema schema = task.getColumns().toSchema();
|
41
|
-
int
|
41
|
+
int taskCount = 1; // number of run() method calls
|
42
42
|
|
43
|
-
return resume(task.dump(), schema,
|
43
|
+
return resume(task.dump(), schema, taskCount, control);
|
44
44
|
}
|
45
45
|
|
46
46
|
public ConfigDiff resume(TaskSource taskSource,
|
47
|
-
Schema schema, int
|
47
|
+
Schema schema, int taskCount,
|
48
48
|
InputPlugin.Control control)
|
49
49
|
{
|
50
|
-
control.run(taskSource, schema,
|
50
|
+
control.run(taskSource, schema, taskCount);
|
51
51
|
return Exec.newConfigDiff();
|
52
52
|
}
|
53
53
|
|
54
54
|
public void cleanup(TaskSource taskSource,
|
55
|
-
Schema schema, int
|
55
|
+
Schema schema, int taskCount,
|
56
56
|
List<CommitReport> successCommitReports)
|
57
57
|
{
|
58
58
|
}
|
59
59
|
|
60
60
|
public CommitReport run(TaskSource taskSource,
|
61
|
-
Schema schema, int
|
61
|
+
Schema schema, int taskIndex,
|
62
62
|
PageOutput output)
|
63
63
|
{
|
64
64
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
@@ -29,13 +29,13 @@ public class <%= java_class_name %>
|
|
29
29
|
}
|
30
30
|
|
31
31
|
public ConfigDiff transaction(ConfigSource config,
|
32
|
-
Schema schema, int
|
32
|
+
Schema schema, int taskCount,
|
33
33
|
OutputPlugin.Control control)
|
34
34
|
{
|
35
35
|
PluginTask task = config.loadConfig(PluginTask.class);
|
36
36
|
|
37
37
|
// retryable (idempotent) output:
|
38
|
-
// return resume(task.dump(), schema,
|
38
|
+
// return resume(task.dump(), schema, taskCount, control);
|
39
39
|
|
40
40
|
// non-retryable (non-idempotent) output:
|
41
41
|
control.run(task.dump());
|
@@ -43,19 +43,19 @@ public class <%= java_class_name %>
|
|
43
43
|
}
|
44
44
|
|
45
45
|
public ConfigDiff resume(TaskSource taskSource,
|
46
|
-
Schema schema, int
|
46
|
+
Schema schema, int taskCount,
|
47
47
|
OutputPlugin.Control control)
|
48
48
|
{
|
49
49
|
throw new UnsupportedOperationException("<%= name %> output plugin does not support resuming");
|
50
50
|
}
|
51
51
|
|
52
52
|
public void cleanup(TaskSource taskSource,
|
53
|
-
Schema schema, int
|
53
|
+
Schema schema, int taskCount,
|
54
54
|
List<CommitReport> successCommitReports)
|
55
55
|
{
|
56
56
|
}
|
57
57
|
|
58
|
-
public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int
|
58
|
+
public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int taskIndex)
|
59
59
|
{
|
60
60
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
61
61
|
|
data/lib/embulk/data_source.rb
CHANGED
@@ -12,15 +12,15 @@ module Embulk
|
|
12
12
|
when :float
|
13
13
|
Float(v)
|
14
14
|
when :string
|
15
|
-
String(v)
|
15
|
+
String(v).dup
|
16
16
|
when :bool
|
17
17
|
!!v # TODO validation
|
18
18
|
when :hash
|
19
19
|
raise ArgumentError, "Invalid value for :hash" unless v.is_a?(Hash)
|
20
|
-
v
|
20
|
+
DataSource.new.merge!(v)
|
21
21
|
when :array
|
22
22
|
raise ArgumentError, "Invalid value for :array" unless v.is_a?(Array)
|
23
|
-
v
|
23
|
+
v.dup
|
24
24
|
else
|
25
25
|
unless type.respond_to?(:load)
|
26
26
|
raise ArgumentError, "Unknown type #{type.to_s.dump}"
|
data/lib/embulk/guess_plugin.rb
CHANGED
@@ -47,7 +47,7 @@ module Embulk
|
|
47
47
|
def guess(config, sample)
|
48
48
|
# TODO pure-ruby LineDecoder implementation?
|
49
49
|
begin
|
50
|
-
|
50
|
+
parser_task = config.param("parser", :hash, default: {}).load_config(Java::LineDecoder::DecoderTask)
|
51
51
|
rescue
|
52
52
|
# TODO log?
|
53
53
|
p $!
|
@@ -55,7 +55,7 @@ module Embulk
|
|
55
55
|
return DataSource.new
|
56
56
|
end
|
57
57
|
|
58
|
-
decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]),
|
58
|
+
decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), parser_task)
|
59
59
|
sample_text = ''
|
60
60
|
while decoder.nextFile
|
61
61
|
first = true
|
@@ -63,7 +63,7 @@ module Embulk
|
|
63
63
|
if first
|
64
64
|
first = false
|
65
65
|
else
|
66
|
-
sample_text <<
|
66
|
+
sample_text << parser_task.getNewline().getString()
|
67
67
|
end
|
68
68
|
sample_text << line
|
69
69
|
end
|
@@ -81,7 +81,7 @@ module Embulk
|
|
81
81
|
def guess(config, sample)
|
82
82
|
# TODO pure-ruby LineDecoder implementation?
|
83
83
|
begin
|
84
|
-
|
84
|
+
parser_task = config.param("parser", :hash, default: {}).load_config(Java::LineDecoder::DecoderTask)
|
85
85
|
rescue
|
86
86
|
# TODO log?
|
87
87
|
p $!
|
@@ -89,7 +89,7 @@ module Embulk
|
|
89
89
|
return DataSource.new
|
90
90
|
end
|
91
91
|
|
92
|
-
decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]),
|
92
|
+
decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), parser_task)
|
93
93
|
sample_lines = []
|
94
94
|
while decoder.nextFile
|
95
95
|
while line = decoder.poll
|
data/lib/embulk/input_plugin.rb
CHANGED
@@ -46,10 +46,10 @@ module Embulk
|
|
46
46
|
|
47
47
|
def transaction(java_config, java_control)
|
48
48
|
config = DataSource.from_java(java_config)
|
49
|
-
config_diff_hash = @ruby_class.transaction(config) do |task_source_hash,columns,
|
49
|
+
config_diff_hash = @ruby_class.transaction(config) do |task_source_hash,columns,task_count|
|
50
50
|
java_task_source = DataSource.from_ruby_hash(task_source_hash).to_java
|
51
51
|
java_schema = Schema.new(columns).to_java
|
52
|
-
java_commit_reports = java_control.run(java_task_source, java_schema,
|
52
|
+
java_commit_reports = java_control.run(java_task_source, java_schema, task_count)
|
53
53
|
java_commit_reports.map {|java_commit_report|
|
54
54
|
DataSource.from_java(java_commit_report)
|
55
55
|
}
|
@@ -58,13 +58,13 @@ module Embulk
|
|
58
58
|
return DataSource.from_ruby_hash(config_diff_hash).to_java
|
59
59
|
end
|
60
60
|
|
61
|
-
def resume(java_task_source, java_schema,
|
61
|
+
def resume(java_task_source, java_schema, task_count, java_control)
|
62
62
|
task_source = DataSource.from_java(java_task_source)
|
63
63
|
schema = Schema.from_java(java_schema)
|
64
|
-
config_diff_hash = @ruby_class.resume(task_source, schema,
|
64
|
+
config_diff_hash = @ruby_class.resume(task_source, schema, task_count) do |task_source_hash,columns,task_count|
|
65
65
|
java_task_source = DataSource.from_ruby_hash(task_source_hash).to_java
|
66
66
|
java_schema = Schema.new(columns).to_java
|
67
|
-
java_commit_reports = java_control.run(java_task_source, java_schema,
|
67
|
+
java_commit_reports = java_control.run(java_task_source, java_schema, task_count)
|
68
68
|
java_commit_reports.map {|java_commit_report|
|
69
69
|
DataSource.from_java(java_commit_report)
|
70
70
|
}
|
@@ -73,11 +73,11 @@ module Embulk
|
|
73
73
|
return DataSource.from_ruby_hash(config_diff_hash).to_java
|
74
74
|
end
|
75
75
|
|
76
|
-
def cleanup(java_task_source, java_schema,
|
76
|
+
def cleanup(java_task_source, java_schema, task_count, java_commit_reports)
|
77
77
|
task_source = DataSource.from_java(java_task_source)
|
78
78
|
schema = Schema.from_java(java_schema)
|
79
79
|
commit_reports = java_commit_reports.map {|c| DataSource.from_java(c) }
|
80
|
-
@ruby_class.cleanup(task_source, schema,
|
80
|
+
@ruby_class.cleanup(task_source, schema, task_count, commit_reports)
|
81
81
|
return nil
|
82
82
|
end
|
83
83
|
|
data/lib/embulk/output_plugin.rb
CHANGED
@@ -5,7 +5,7 @@ module Embulk
|
|
5
5
|
require 'embulk/page'
|
6
6
|
|
7
7
|
class OutputPlugin
|
8
|
-
def self.transaction(config, schema,
|
8
|
+
def self.transaction(config, schema, task_count, &control)
|
9
9
|
yield(config)
|
10
10
|
return {}
|
11
11
|
end
|
@@ -59,10 +59,10 @@ module Embulk
|
|
59
59
|
@ruby_class = ruby_class
|
60
60
|
end
|
61
61
|
|
62
|
-
def transaction(java_config, java_schema,
|
62
|
+
def transaction(java_config, java_schema, task_count, java_control)
|
63
63
|
config = DataSource.from_java(java_config)
|
64
64
|
schema = Schema.from_java(java_schema)
|
65
|
-
config_diff_hash = @ruby_class.transaction(config, schema,
|
65
|
+
config_diff_hash = @ruby_class.transaction(config, schema, task_count) do |task_source_hash|
|
66
66
|
java_task_source = DataSource.from_ruby_hash(task_source_hash).to_java
|
67
67
|
java_commit_reports = java_control.run(java_task_source)
|
68
68
|
java_commit_reports.map {|java_commit_report|
|
@@ -73,10 +73,10 @@ module Embulk
|
|
73
73
|
return DataSource.from_ruby_hash(config_diff_hash).to_java
|
74
74
|
end
|
75
75
|
|
76
|
-
def resume(java_task_source, java_schema,
|
76
|
+
def resume(java_task_source, java_schema, task_count, java_control)
|
77
77
|
task_source = DataSource.from_java(java_task_source)
|
78
78
|
schema = Schema.from_java(java_schema)
|
79
|
-
config_diff_hash = @ruby_class.resume(task_source, schema,
|
79
|
+
config_diff_hash = @ruby_class.resume(task_source, schema, task_count) do |task_source_hash,columns,task_count|
|
80
80
|
java_task_source = DataSource.from_ruby_hash(task_source_hash).to_java
|
81
81
|
java_commit_reports = java_control.run(java_task_source)
|
82
82
|
java_commit_reports.map {|java_commit_report|
|
@@ -87,11 +87,11 @@ module Embulk
|
|
87
87
|
return DataSource.from_ruby_hash(config_diff_hash).to_java
|
88
88
|
end
|
89
89
|
|
90
|
-
def cleanup(java_task_source, java_schema,
|
90
|
+
def cleanup(java_task_source, java_schema, task_count, java_commit_reports)
|
91
91
|
task_source = DataSource.from_java(java_task_source)
|
92
92
|
schema = Schema.from_java(java_schema)
|
93
93
|
commit_reports = java_commit_reports.map {|c| DataSource.from_java(c) }
|
94
|
-
@ruby_class.cleanup(task_source, schema,
|
94
|
+
@ruby_class.cleanup(task_source, schema, task_count, commit_reports)
|
95
95
|
return nil
|
96
96
|
end
|
97
97
|
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -152,6 +152,7 @@ files:
|
|
152
152
|
- embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java
|
153
153
|
- embulk-core/src/main/java/org/embulk/exec/ResumeState.java
|
154
154
|
- embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java
|
155
|
+
- embulk-core/src/main/java/org/embulk/exec/SetCurrentThreadName.java
|
155
156
|
- embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java
|
156
157
|
- embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java
|
157
158
|
- embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java
|
@@ -226,6 +227,7 @@ files:
|
|
226
227
|
- embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java
|
227
228
|
- embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java
|
228
229
|
- embulk-core/src/main/java/org/embulk/spi/util/Newline.java
|
230
|
+
- embulk-core/src/main/java/org/embulk/spi/util/OutputStreamFileOutput.java
|
229
231
|
- embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java
|
230
232
|
- embulk-core/src/main/java/org/embulk/spi/util/Pages.java
|
231
233
|
- embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java
|
@@ -268,6 +270,7 @@ files:
|
|
268
270
|
- embulk-docs/src/release/release-0.4.1.rst
|
269
271
|
- embulk-docs/src/release/release-0.4.2.rst
|
270
272
|
- embulk-docs/src/release/release-0.4.3.rst
|
273
|
+
- embulk-docs/src/release/release-0.4.4.rst
|
271
274
|
- embulk-standards/build.gradle
|
272
275
|
- embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
|
273
276
|
- embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
|
@@ -362,8 +365,8 @@ files:
|
|
362
365
|
- classpath/bval-jsr303-0.5.jar
|
363
366
|
- classpath/commons-beanutils-core-1.8.3.jar
|
364
367
|
- classpath/commons-lang3-3.1.jar
|
365
|
-
- classpath/embulk-core-0.4.
|
366
|
-
- classpath/embulk-standards-0.4.
|
368
|
+
- classpath/embulk-core-0.4.4.jar
|
369
|
+
- classpath/embulk-standards-0.4.4.jar
|
367
370
|
- classpath/guava-18.0.jar
|
368
371
|
- classpath/guice-3.0.jar
|
369
372
|
- classpath/guice-multibindings-3.0.jar
|