embulk 0.4.3 → 0.4.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +8 -11
- data/build.gradle +22 -2
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +7 -0
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +8 -6
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +54 -46
- data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +20 -3
- data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +41 -10
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/SetCurrentThreadName.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +5 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +24 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +4 -4
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +8 -8
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +4 -4
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +8 -8
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +4 -4
- data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +4 -4
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +32 -8
- data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +3 -4
- data/embulk-core/src/main/java/org/embulk/spi/util/OutputStreamFileOutput.java +88 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +1 -1
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +5 -5
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +4 -4
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.4.4.rst +39 -0
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +32 -7
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +9 -9
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +6 -6
- data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +5 -5
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +5 -5
- data/lib/embulk/command/embulk_run.rb +14 -2
- data/lib/embulk/data/new/java/file_input.java.erb +7 -7
- data/lib/embulk/data/new/java/file_output.java.erb +5 -5
- data/lib/embulk/data/new/java/input.java.erb +6 -6
- data/lib/embulk/data/new/java/output.java.erb +5 -5
- data/lib/embulk/data_source.rb +3 -3
- data/lib/embulk/guess_plugin.rb +5 -5
- data/lib/embulk/input_plugin.rb +7 -7
- data/lib/embulk/output_plugin.rb +7 -7
- data/lib/embulk/version.rb +1 -1
- metadata +7 -4
@@ -27,13 +27,13 @@ public class <%= java_class_name %>
|
|
27
27
|
}
|
28
28
|
|
29
29
|
@Override
|
30
|
-
public ConfigDiff transaction(ConfigSource config, int
|
30
|
+
public ConfigDiff transaction(ConfigSource config, int taskCount,
|
31
31
|
FileOutputPlugin.Control control)
|
32
32
|
{
|
33
33
|
PluginTask task = config.loadConfig(PluginTask.class);
|
34
34
|
|
35
35
|
// retryable (idempotent) output:
|
36
|
-
// return resume(task.dump(),
|
36
|
+
// return resume(task.dump(), taskCount, control);
|
37
37
|
|
38
38
|
// non-retryable (non-idempotent) output:
|
39
39
|
control.run(task.dump());
|
@@ -42,7 +42,7 @@ public class <%= java_class_name %>
|
|
42
42
|
|
43
43
|
@Override
|
44
44
|
public ConfigDiff resume(TaskSource taskSource,
|
45
|
-
int
|
45
|
+
int taskCount,
|
46
46
|
FileOutputPlugin.Control control)
|
47
47
|
{
|
48
48
|
throw new UnsupportedOperationException("<%= name %> output plugin does not support resuming");
|
@@ -50,13 +50,13 @@ public class <%= java_class_name %>
|
|
50
50
|
|
51
51
|
@Override
|
52
52
|
public void cleanup(TaskSource taskSource,
|
53
|
-
int
|
53
|
+
int taskCount,
|
54
54
|
List<CommitReport> successCommitReports)
|
55
55
|
{
|
56
56
|
}
|
57
57
|
|
58
58
|
@Override
|
59
|
-
public TransactionalFileOutput open(TaskSource taskSource, final int
|
59
|
+
public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex)
|
60
60
|
{
|
61
61
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
62
62
|
|
@@ -38,27 +38,27 @@ public class <%= java_class_name %>
|
|
38
38
|
PluginTask task = config.loadConfig(PluginTask.class);
|
39
39
|
|
40
40
|
Schema schema = task.getColumns().toSchema();
|
41
|
-
int
|
41
|
+
int taskCount = 1; // number of run() method calls
|
42
42
|
|
43
|
-
return resume(task.dump(), schema,
|
43
|
+
return resume(task.dump(), schema, taskCount, control);
|
44
44
|
}
|
45
45
|
|
46
46
|
public ConfigDiff resume(TaskSource taskSource,
|
47
|
-
Schema schema, int
|
47
|
+
Schema schema, int taskCount,
|
48
48
|
InputPlugin.Control control)
|
49
49
|
{
|
50
|
-
control.run(taskSource, schema,
|
50
|
+
control.run(taskSource, schema, taskCount);
|
51
51
|
return Exec.newConfigDiff();
|
52
52
|
}
|
53
53
|
|
54
54
|
public void cleanup(TaskSource taskSource,
|
55
|
-
Schema schema, int
|
55
|
+
Schema schema, int taskCount,
|
56
56
|
List<CommitReport> successCommitReports)
|
57
57
|
{
|
58
58
|
}
|
59
59
|
|
60
60
|
public CommitReport run(TaskSource taskSource,
|
61
|
-
Schema schema, int
|
61
|
+
Schema schema, int taskIndex,
|
62
62
|
PageOutput output)
|
63
63
|
{
|
64
64
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
@@ -29,13 +29,13 @@ public class <%= java_class_name %>
|
|
29
29
|
}
|
30
30
|
|
31
31
|
public ConfigDiff transaction(ConfigSource config,
|
32
|
-
Schema schema, int
|
32
|
+
Schema schema, int taskCount,
|
33
33
|
OutputPlugin.Control control)
|
34
34
|
{
|
35
35
|
PluginTask task = config.loadConfig(PluginTask.class);
|
36
36
|
|
37
37
|
// retryable (idempotent) output:
|
38
|
-
// return resume(task.dump(), schema,
|
38
|
+
// return resume(task.dump(), schema, taskCount, control);
|
39
39
|
|
40
40
|
// non-retryable (non-idempotent) output:
|
41
41
|
control.run(task.dump());
|
@@ -43,19 +43,19 @@ public class <%= java_class_name %>
|
|
43
43
|
}
|
44
44
|
|
45
45
|
public ConfigDiff resume(TaskSource taskSource,
|
46
|
-
Schema schema, int
|
46
|
+
Schema schema, int taskCount,
|
47
47
|
OutputPlugin.Control control)
|
48
48
|
{
|
49
49
|
throw new UnsupportedOperationException("<%= name %> output plugin does not support resuming");
|
50
50
|
}
|
51
51
|
|
52
52
|
public void cleanup(TaskSource taskSource,
|
53
|
-
Schema schema, int
|
53
|
+
Schema schema, int taskCount,
|
54
54
|
List<CommitReport> successCommitReports)
|
55
55
|
{
|
56
56
|
}
|
57
57
|
|
58
|
-
public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int
|
58
|
+
public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int taskIndex)
|
59
59
|
{
|
60
60
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
61
61
|
|
data/lib/embulk/data_source.rb
CHANGED
@@ -12,15 +12,15 @@ module Embulk
|
|
12
12
|
when :float
|
13
13
|
Float(v)
|
14
14
|
when :string
|
15
|
-
String(v)
|
15
|
+
String(v).dup
|
16
16
|
when :bool
|
17
17
|
!!v # TODO validation
|
18
18
|
when :hash
|
19
19
|
raise ArgumentError, "Invalid value for :hash" unless v.is_a?(Hash)
|
20
|
-
v
|
20
|
+
DataSource.new.merge!(v)
|
21
21
|
when :array
|
22
22
|
raise ArgumentError, "Invalid value for :array" unless v.is_a?(Array)
|
23
|
-
v
|
23
|
+
v.dup
|
24
24
|
else
|
25
25
|
unless type.respond_to?(:load)
|
26
26
|
raise ArgumentError, "Unknown type #{type.to_s.dump}"
|
data/lib/embulk/guess_plugin.rb
CHANGED
@@ -47,7 +47,7 @@ module Embulk
|
|
47
47
|
def guess(config, sample)
|
48
48
|
# TODO pure-ruby LineDecoder implementation?
|
49
49
|
begin
|
50
|
-
|
50
|
+
parser_task = config.param("parser", :hash, default: {}).load_config(Java::LineDecoder::DecoderTask)
|
51
51
|
rescue
|
52
52
|
# TODO log?
|
53
53
|
p $!
|
@@ -55,7 +55,7 @@ module Embulk
|
|
55
55
|
return DataSource.new
|
56
56
|
end
|
57
57
|
|
58
|
-
decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]),
|
58
|
+
decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), parser_task)
|
59
59
|
sample_text = ''
|
60
60
|
while decoder.nextFile
|
61
61
|
first = true
|
@@ -63,7 +63,7 @@ module Embulk
|
|
63
63
|
if first
|
64
64
|
first = false
|
65
65
|
else
|
66
|
-
sample_text <<
|
66
|
+
sample_text << parser_task.getNewline().getString()
|
67
67
|
end
|
68
68
|
sample_text << line
|
69
69
|
end
|
@@ -81,7 +81,7 @@ module Embulk
|
|
81
81
|
def guess(config, sample)
|
82
82
|
# TODO pure-ruby LineDecoder implementation?
|
83
83
|
begin
|
84
|
-
|
84
|
+
parser_task = config.param("parser", :hash, default: {}).load_config(Java::LineDecoder::DecoderTask)
|
85
85
|
rescue
|
86
86
|
# TODO log?
|
87
87
|
p $!
|
@@ -89,7 +89,7 @@ module Embulk
|
|
89
89
|
return DataSource.new
|
90
90
|
end
|
91
91
|
|
92
|
-
decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]),
|
92
|
+
decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), parser_task)
|
93
93
|
sample_lines = []
|
94
94
|
while decoder.nextFile
|
95
95
|
while line = decoder.poll
|
data/lib/embulk/input_plugin.rb
CHANGED
@@ -46,10 +46,10 @@ module Embulk
|
|
46
46
|
|
47
47
|
def transaction(java_config, java_control)
|
48
48
|
config = DataSource.from_java(java_config)
|
49
|
-
config_diff_hash = @ruby_class.transaction(config) do |task_source_hash,columns,
|
49
|
+
config_diff_hash = @ruby_class.transaction(config) do |task_source_hash,columns,task_count|
|
50
50
|
java_task_source = DataSource.from_ruby_hash(task_source_hash).to_java
|
51
51
|
java_schema = Schema.new(columns).to_java
|
52
|
-
java_commit_reports = java_control.run(java_task_source, java_schema,
|
52
|
+
java_commit_reports = java_control.run(java_task_source, java_schema, task_count)
|
53
53
|
java_commit_reports.map {|java_commit_report|
|
54
54
|
DataSource.from_java(java_commit_report)
|
55
55
|
}
|
@@ -58,13 +58,13 @@ module Embulk
|
|
58
58
|
return DataSource.from_ruby_hash(config_diff_hash).to_java
|
59
59
|
end
|
60
60
|
|
61
|
-
def resume(java_task_source, java_schema,
|
61
|
+
def resume(java_task_source, java_schema, task_count, java_control)
|
62
62
|
task_source = DataSource.from_java(java_task_source)
|
63
63
|
schema = Schema.from_java(java_schema)
|
64
|
-
config_diff_hash = @ruby_class.resume(task_source, schema,
|
64
|
+
config_diff_hash = @ruby_class.resume(task_source, schema, task_count) do |task_source_hash,columns,task_count|
|
65
65
|
java_task_source = DataSource.from_ruby_hash(task_source_hash).to_java
|
66
66
|
java_schema = Schema.new(columns).to_java
|
67
|
-
java_commit_reports = java_control.run(java_task_source, java_schema,
|
67
|
+
java_commit_reports = java_control.run(java_task_source, java_schema, task_count)
|
68
68
|
java_commit_reports.map {|java_commit_report|
|
69
69
|
DataSource.from_java(java_commit_report)
|
70
70
|
}
|
@@ -73,11 +73,11 @@ module Embulk
|
|
73
73
|
return DataSource.from_ruby_hash(config_diff_hash).to_java
|
74
74
|
end
|
75
75
|
|
76
|
-
def cleanup(java_task_source, java_schema,
|
76
|
+
def cleanup(java_task_source, java_schema, task_count, java_commit_reports)
|
77
77
|
task_source = DataSource.from_java(java_task_source)
|
78
78
|
schema = Schema.from_java(java_schema)
|
79
79
|
commit_reports = java_commit_reports.map {|c| DataSource.from_java(c) }
|
80
|
-
@ruby_class.cleanup(task_source, schema,
|
80
|
+
@ruby_class.cleanup(task_source, schema, task_count, commit_reports)
|
81
81
|
return nil
|
82
82
|
end
|
83
83
|
|
data/lib/embulk/output_plugin.rb
CHANGED
@@ -5,7 +5,7 @@ module Embulk
|
|
5
5
|
require 'embulk/page'
|
6
6
|
|
7
7
|
class OutputPlugin
|
8
|
-
def self.transaction(config, schema,
|
8
|
+
def self.transaction(config, schema, task_count, &control)
|
9
9
|
yield(config)
|
10
10
|
return {}
|
11
11
|
end
|
@@ -59,10 +59,10 @@ module Embulk
|
|
59
59
|
@ruby_class = ruby_class
|
60
60
|
end
|
61
61
|
|
62
|
-
def transaction(java_config, java_schema,
|
62
|
+
def transaction(java_config, java_schema, task_count, java_control)
|
63
63
|
config = DataSource.from_java(java_config)
|
64
64
|
schema = Schema.from_java(java_schema)
|
65
|
-
config_diff_hash = @ruby_class.transaction(config, schema,
|
65
|
+
config_diff_hash = @ruby_class.transaction(config, schema, task_count) do |task_source_hash|
|
66
66
|
java_task_source = DataSource.from_ruby_hash(task_source_hash).to_java
|
67
67
|
java_commit_reports = java_control.run(java_task_source)
|
68
68
|
java_commit_reports.map {|java_commit_report|
|
@@ -73,10 +73,10 @@ module Embulk
|
|
73
73
|
return DataSource.from_ruby_hash(config_diff_hash).to_java
|
74
74
|
end
|
75
75
|
|
76
|
-
def resume(java_task_source, java_schema,
|
76
|
+
def resume(java_task_source, java_schema, task_count, java_control)
|
77
77
|
task_source = DataSource.from_java(java_task_source)
|
78
78
|
schema = Schema.from_java(java_schema)
|
79
|
-
config_diff_hash = @ruby_class.resume(task_source, schema,
|
79
|
+
config_diff_hash = @ruby_class.resume(task_source, schema, task_count) do |task_source_hash,columns,task_count|
|
80
80
|
java_task_source = DataSource.from_ruby_hash(task_source_hash).to_java
|
81
81
|
java_commit_reports = java_control.run(java_task_source)
|
82
82
|
java_commit_reports.map {|java_commit_report|
|
@@ -87,11 +87,11 @@ module Embulk
|
|
87
87
|
return DataSource.from_ruby_hash(config_diff_hash).to_java
|
88
88
|
end
|
89
89
|
|
90
|
-
def cleanup(java_task_source, java_schema,
|
90
|
+
def cleanup(java_task_source, java_schema, task_count, java_commit_reports)
|
91
91
|
task_source = DataSource.from_java(java_task_source)
|
92
92
|
schema = Schema.from_java(java_schema)
|
93
93
|
commit_reports = java_commit_reports.map {|c| DataSource.from_java(c) }
|
94
|
-
@ruby_class.cleanup(task_source, schema,
|
94
|
+
@ruby_class.cleanup(task_source, schema, task_count, commit_reports)
|
95
95
|
return nil
|
96
96
|
end
|
97
97
|
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -152,6 +152,7 @@ files:
|
|
152
152
|
- embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java
|
153
153
|
- embulk-core/src/main/java/org/embulk/exec/ResumeState.java
|
154
154
|
- embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java
|
155
|
+
- embulk-core/src/main/java/org/embulk/exec/SetCurrentThreadName.java
|
155
156
|
- embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java
|
156
157
|
- embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java
|
157
158
|
- embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java
|
@@ -226,6 +227,7 @@ files:
|
|
226
227
|
- embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java
|
227
228
|
- embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java
|
228
229
|
- embulk-core/src/main/java/org/embulk/spi/util/Newline.java
|
230
|
+
- embulk-core/src/main/java/org/embulk/spi/util/OutputStreamFileOutput.java
|
229
231
|
- embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java
|
230
232
|
- embulk-core/src/main/java/org/embulk/spi/util/Pages.java
|
231
233
|
- embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java
|
@@ -268,6 +270,7 @@ files:
|
|
268
270
|
- embulk-docs/src/release/release-0.4.1.rst
|
269
271
|
- embulk-docs/src/release/release-0.4.2.rst
|
270
272
|
- embulk-docs/src/release/release-0.4.3.rst
|
273
|
+
- embulk-docs/src/release/release-0.4.4.rst
|
271
274
|
- embulk-standards/build.gradle
|
272
275
|
- embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
|
273
276
|
- embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
|
@@ -362,8 +365,8 @@ files:
|
|
362
365
|
- classpath/bval-jsr303-0.5.jar
|
363
366
|
- classpath/commons-beanutils-core-1.8.3.jar
|
364
367
|
- classpath/commons-lang3-3.1.jar
|
365
|
-
- classpath/embulk-core-0.4.
|
366
|
-
- classpath/embulk-standards-0.4.
|
368
|
+
- classpath/embulk-core-0.4.4.jar
|
369
|
+
- classpath/embulk-standards-0.4.4.jar
|
367
370
|
- classpath/guava-18.0.jar
|
368
371
|
- classpath/guice-3.0.jar
|
369
372
|
- classpath/guice-multibindings-3.0.jar
|