embulk 0.8.35 → 0.8.36
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/build.gradle +1 -1
- data/embulk-cli/src/main/java/org/embulk/cli/EmbulkExample.java +5 -1
- data/embulk-cli/src/main/java/org/embulk/cli/EmbulkRun.java +12 -0
- data/embulk-core/src/main/java/org/embulk/EmbulkRunner.java +2 -2
- data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoader.java +802 -17
- data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoaderFactory.java +8 -1
- data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoaderModule.java +33 -2
- data/embulk-core/src/main/java/org/embulk/plugin/jar/JarPluginLoader.java +32 -5
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +1 -6
- data/embulk-core/src/main/java/org/embulk/spi/json/RubyValueApi.java +39 -1
- data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +21 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +21 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetterFactory.java +43 -9
- data/embulk-core/src/main/java/org/embulk/spi/util/DynamicPageBuilder.java +46 -8
- data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +19 -1
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +11 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java +12 -1
- data/embulk-core/src/main/resources/embulk/parent_first_packages.properties +1 -0
- data/embulk-docs/build.gradle +8 -0
- data/embulk-docs/src/built-in.rst +47 -35
- data/embulk-docs/src/index.rst +9 -1
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.8.36.rst +32 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +22 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +34 -1
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +8 -2
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +76 -0
- data/lib/embulk/guess/schema_guess.rb +1 -1
- data/lib/embulk/input_plugin.rb +8 -1
- data/lib/embulk/page_builder.rb +38 -5
- data/lib/embulk/schema.rb +5 -6
- data/lib/embulk/version.rb +1 -1
- data/test/guess/test_schema_guess.rb +18 -0
- metadata +7 -6
data/lib/embulk/input_plugin.rb
CHANGED
@@ -34,6 +34,10 @@ module Embulk
|
|
34
34
|
def init
|
35
35
|
end
|
36
36
|
|
37
|
+
def self.create_page_builder(task_source: nil, schema: nil, processor_index: nil, java_page_output: nil)
|
38
|
+
return PageBuilder.new(schema, java_page_output)
|
39
|
+
end
|
40
|
+
|
37
41
|
def run
|
38
42
|
raise NotImplementedError, "InputPlugin#run must be implemented"
|
39
43
|
end
|
@@ -95,7 +99,10 @@ module Embulk
|
|
95
99
|
def run(java_task_source, java_schema, processor_index, java_output)
|
96
100
|
task_source = DataSource.from_java(java_task_source)
|
97
101
|
schema = Schema.from_java(java_schema)
|
98
|
-
page_builder =
|
102
|
+
page_builder = @ruby_class.create_page_builder(task_source: task_source,
|
103
|
+
schema: schema,
|
104
|
+
processor_index: processor_index,
|
105
|
+
java_page_output: java_output)
|
99
106
|
begin
|
100
107
|
task_report_hash = @ruby_class.new(task_source, schema, processor_index, page_builder).run
|
101
108
|
return DataSource.from_ruby_hash(task_report_hash).to_java
|
data/lib/embulk/page_builder.rb
CHANGED
@@ -1,15 +1,48 @@
|
|
1
1
|
module Embulk
|
2
2
|
|
3
3
|
org.embulk.spi.util.dynamic.AbstractDynamicColumnSetter.module_eval do
|
4
|
-
|
4
|
+
def set(ruby_object)
|
5
|
+
# Using |java_send| so that it calls Java's correct overloaded |set| method, not Ruby's itself.
|
6
|
+
if ruby_object.nil?
|
7
|
+
self.java_send(:setNull, [])
|
8
|
+
elsif ruby_object == true || ruby_object == false
|
9
|
+
self.java_send(:set, [::Java::boolean], ruby_object.to_java(:boolean))
|
10
|
+
elsif ruby_object.kind_of?(Integer)
|
11
|
+
self.java_send(:set, [::Java::long], ruby_object.to_java(:long))
|
12
|
+
elsif ruby_object.kind_of?(Float)
|
13
|
+
self.java_send(:set, [::Java::double], ruby_object.to_java(:double))
|
14
|
+
elsif ruby_object.kind_of?(String)
|
15
|
+
self.java_send(:set, [::Java::java.lang.String], ruby_object.to_java(::Java::java.lang.String))
|
16
|
+
elsif ruby_object.kind_of?(Time)
|
17
|
+
self.java_send(:set, [::Java::org.embulk.spi.time.Timestamp],
|
18
|
+
::Java::org.embulk.spi.time.Timestamp.ofEpochSecond(ruby_object.to_i, ruby_object.nsec))
|
19
|
+
else
|
20
|
+
self.java_send(:set, [::Java::org.msgpack.value.Value],
|
21
|
+
::Java::org.msgpack.core.MessagePack.newDefaultUnpacker(ruby_object.to_msgpack.to_java_bytes).unpackValue())
|
22
|
+
end
|
23
|
+
end
|
5
24
|
end
|
6
25
|
|
7
26
|
class PageBuilder
|
8
|
-
def initialize(schema, java_page_output)
|
27
|
+
def initialize(schema, java_page_output=nil, java_dynamic_page_builder: nil)
|
28
|
+
if java_page_output.nil? && java_dynamic_page_builder.nil?
|
29
|
+
raise ArgumentError, 'PageBuilder.new must take either of java_page_output or java_dynamic_page_builder'
|
30
|
+
end
|
31
|
+
if java_page_output && java_dynamic_page_builder
|
32
|
+
raise ArgumentError, 'PageBuilder.new must not take both java_page_output and java_dynamic_page_builder'
|
33
|
+
end
|
34
|
+
|
9
35
|
require 'msgpack' # used at DynamicPageBuilder.set(Value)
|
10
|
-
|
11
|
-
|
12
|
-
|
36
|
+
|
37
|
+
if java_page_output
|
38
|
+
# TODO get task as an argument
|
39
|
+
task = Java::SPI::Exec.newConfigSource.load_config(Java::DynamicPageBuilder::BuilderTask.java_class)
|
40
|
+
@page_builder = Java::DynamicPageBuilder.createWithTimestampMetadataFromBuilderTask(
|
41
|
+
task, Java::Injected::BufferAllocator, schema.to_java, java_page_output)
|
42
|
+
else
|
43
|
+
@page_builder = java_dynamic_page_builder
|
44
|
+
end
|
45
|
+
|
13
46
|
@schema = schema
|
14
47
|
end
|
15
48
|
|
data/lib/embulk/schema.rb
CHANGED
@@ -20,7 +20,6 @@ module Embulk
|
|
20
20
|
each do |column|
|
21
21
|
idx = column.index
|
22
22
|
column_script =
|
23
|
-
"value_api = ::Embulk::Java::SPI::Json::RubyValueApi\n" <<
|
24
23
|
"if reader.isNull(#{idx})\n" <<
|
25
24
|
"record << nil\n" <<
|
26
25
|
"else\n" <<
|
@@ -34,9 +33,9 @@ module Embulk
|
|
34
33
|
when :string
|
35
34
|
"record << reader.getString(#{idx})"
|
36
35
|
when :timestamp
|
37
|
-
"record << reader.getTimestamp(#{idx}).
|
36
|
+
"record << (java_timestamp = reader.getTimestamp(#{idx}); ruby_time = Java::org.jruby.RubyTime.new(JRuby.runtime, JRuby.runtime.getClass('Time'), Java::org.joda.time.DateTime.new(java_timestamp.toEpochMilli())).gmtime().to_java(Java::org.jruby.RubyTime); ruby_time.setNSec(java_timestamp.getNano()); ruby_time)"
|
38
37
|
when :json
|
39
|
-
"record << MessagePack.unpack(
|
38
|
+
"record << MessagePack.unpack(String.from_java_bytes((::Java::org.msgpack.core.MessagePack.newDefaultBufferPacker()).packValue(reader.getJson(#{idx})).toMessageBuffer().toByteArray()))"
|
40
39
|
else
|
41
40
|
raise "Unknown type #{column.type.inspect}"
|
42
41
|
end <<
|
@@ -49,7 +48,6 @@ module Embulk
|
|
49
48
|
|
50
49
|
record_writer_script = "lambda do |builder,record|\n"
|
51
50
|
record_writer_script << "java_timestamp_class = ::Embulk::Java::Timestamp\n"
|
52
|
-
record_writer_script << "value_api = ::Embulk::Java::SPI::Json::RubyValueApi\n"
|
53
51
|
each do |column|
|
54
52
|
idx = column.index
|
55
53
|
column_script =
|
@@ -66,9 +64,10 @@ module Embulk
|
|
66
64
|
when :string
|
67
65
|
"builder.setString(#{idx}, record[#{idx}])"
|
68
66
|
when :timestamp
|
69
|
-
|
67
|
+
# It was originally expecting that `record[#{idx}]` was a Ruby Time object. Does it really happen?
|
68
|
+
"(ruby_time = record[#{idx}].to_java(Java::org.jruby.RubyTime); msec = ruby_time.getDateTime().getMillis(); builder.setTimestamp(#{idx}, java_timestamp_class.ofEpochSecond(msec / 1000, ruby_time.getNSec() + (msec % 1000) * 1000000)))"
|
70
69
|
when :json
|
71
|
-
"builder.setJson(#{idx},
|
70
|
+
"builder.setJson(#{idx}, ::Java::org.msgpack.core.MessagePack.newDefaultUnpacker(MessagePack.pack(record[#{idx}]).to_java_bytes).unpackValue())"
|
72
71
|
else
|
73
72
|
raise "Unknown type #{column.type.inspect}"
|
74
73
|
end <<
|
data/lib/embulk/version.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
module Embulk
|
4
4
|
@@warned = false
|
5
5
|
|
6
|
-
VERSION_INTERNAL = '0.8.
|
6
|
+
VERSION_INTERNAL = '0.8.36'
|
7
7
|
|
8
8
|
DEPRECATED_MESSAGE = 'Embulk::VERSION in (J)Ruby is deprecated. Use org.embulk.EmbulkVersion::VERSION instead. If this message is from a plugin, please tell this to the author of the plugin!'
|
9
9
|
def self.const_missing(name)
|
@@ -26,4 +26,22 @@ class SchemaGuessTest < ::Test::Unit::TestCase
|
|
26
26
|
{"a" => "12345678"},
|
27
27
|
]))
|
28
28
|
end
|
29
|
+
|
30
|
+
def test_boolean
|
31
|
+
%w[
|
32
|
+
true false t f
|
33
|
+
yes no y n
|
34
|
+
on off
|
35
|
+
].each do |str|
|
36
|
+
# If at least one of three kinds of boolean strings (i.e., downcase, upcase, capitalize) is
|
37
|
+
# mistakenly detected as "string," the guesser concludes the column type is "string."
|
38
|
+
assert_equal(
|
39
|
+
[C.new(0, "a", :boolean)],
|
40
|
+
G.from_hash_records([
|
41
|
+
{"a" => str.downcase},
|
42
|
+
{"a" => str.upcase},
|
43
|
+
{"a" => str.capitalize},
|
44
|
+
]))
|
45
|
+
end
|
46
|
+
end
|
29
47
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.36
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-10-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: jruby-jars
|
@@ -115,10 +115,10 @@ files:
|
|
115
115
|
- classpath/commons-compress-1.10.jar
|
116
116
|
- classpath/commons-lang-2.4.jar
|
117
117
|
- classpath/commons-lang3-3.4.jar
|
118
|
-
- classpath/embulk-cli-0.8.
|
119
|
-
- classpath/embulk-core-0.8.
|
120
|
-
- classpath/embulk-jruby-strptime-0.8.
|
121
|
-
- classpath/embulk-standards-0.8.
|
118
|
+
- classpath/embulk-cli-0.8.36.jar
|
119
|
+
- classpath/embulk-core-0.8.36.jar
|
120
|
+
- classpath/embulk-jruby-strptime-0.8.36.jar
|
121
|
+
- classpath/embulk-standards-0.8.36.jar
|
122
122
|
- classpath/guava-18.0.jar
|
123
123
|
- classpath/guice-4.0.jar
|
124
124
|
- classpath/guice-bootstrap-0.1.1.jar
|
@@ -525,6 +525,7 @@ files:
|
|
525
525
|
- embulk-docs/src/release/release-0.8.33.rst
|
526
526
|
- embulk-docs/src/release/release-0.8.34.rst
|
527
527
|
- embulk-docs/src/release/release-0.8.35.rst
|
528
|
+
- embulk-docs/src/release/release-0.8.36.rst
|
528
529
|
- embulk-docs/src/release/release-0.8.4.rst
|
529
530
|
- embulk-docs/src/release/release-0.8.5.rst
|
530
531
|
- embulk-docs/src/release/release-0.8.6.rst
|