embulk 0.8.35-java → 0.8.36-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/build.gradle +1 -1
- data/embulk-cli/src/main/java/org/embulk/cli/EmbulkExample.java +5 -1
- data/embulk-cli/src/main/java/org/embulk/cli/EmbulkRun.java +12 -0
- data/embulk-core/src/main/java/org/embulk/EmbulkRunner.java +2 -2
- data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoader.java +802 -17
- data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoaderFactory.java +8 -1
- data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoaderModule.java +33 -2
- data/embulk-core/src/main/java/org/embulk/plugin/jar/JarPluginLoader.java +32 -5
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +1 -6
- data/embulk-core/src/main/java/org/embulk/spi/json/RubyValueApi.java +39 -1
- data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +21 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +21 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetterFactory.java +43 -9
- data/embulk-core/src/main/java/org/embulk/spi/util/DynamicPageBuilder.java +46 -8
- data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +19 -1
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +11 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java +12 -1
- data/embulk-core/src/main/resources/embulk/parent_first_packages.properties +1 -0
- data/embulk-docs/build.gradle +8 -0
- data/embulk-docs/src/built-in.rst +47 -35
- data/embulk-docs/src/index.rst +9 -1
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.8.36.rst +32 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +22 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +34 -1
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +8 -2
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +76 -0
- data/lib/embulk/guess/schema_guess.rb +1 -1
- data/lib/embulk/input_plugin.rb +8 -1
- data/lib/embulk/page_builder.rb +38 -5
- data/lib/embulk/schema.rb +5 -6
- data/lib/embulk/version.rb +1 -1
- data/test/guess/test_schema_guess.rb +18 -0
- metadata +7 -6
data/lib/embulk/input_plugin.rb
CHANGED
@@ -34,6 +34,10 @@ module Embulk
|
|
34
34
|
def init
|
35
35
|
end
|
36
36
|
|
37
|
+
def self.create_page_builder(task_source: nil, schema: nil, processor_index: nil, java_page_output: nil)
|
38
|
+
return PageBuilder.new(schema, java_page_output)
|
39
|
+
end
|
40
|
+
|
37
41
|
def run
|
38
42
|
raise NotImplementedError, "InputPlugin#run must be implemented"
|
39
43
|
end
|
@@ -95,7 +99,10 @@ module Embulk
|
|
95
99
|
def run(java_task_source, java_schema, processor_index, java_output)
|
96
100
|
task_source = DataSource.from_java(java_task_source)
|
97
101
|
schema = Schema.from_java(java_schema)
|
98
|
-
page_builder =
|
102
|
+
page_builder = @ruby_class.create_page_builder(task_source: task_source,
|
103
|
+
schema: schema,
|
104
|
+
processor_index: processor_index,
|
105
|
+
java_page_output: java_output)
|
99
106
|
begin
|
100
107
|
task_report_hash = @ruby_class.new(task_source, schema, processor_index, page_builder).run
|
101
108
|
return DataSource.from_ruby_hash(task_report_hash).to_java
|
data/lib/embulk/page_builder.rb
CHANGED
@@ -1,15 +1,48 @@
|
|
1
1
|
module Embulk
|
2
2
|
|
3
3
|
org.embulk.spi.util.dynamic.AbstractDynamicColumnSetter.module_eval do
|
4
|
-
|
4
|
+
def set(ruby_object)
|
5
|
+
# Using |java_send| so that it calls Java's correct overloaded |set| method, not Ruby's itself.
|
6
|
+
if ruby_object.nil?
|
7
|
+
self.java_send(:setNull, [])
|
8
|
+
elsif ruby_object == true || ruby_object == false
|
9
|
+
self.java_send(:set, [::Java::boolean], ruby_object.to_java(:boolean))
|
10
|
+
elsif ruby_object.kind_of?(Integer)
|
11
|
+
self.java_send(:set, [::Java::long], ruby_object.to_java(:long))
|
12
|
+
elsif ruby_object.kind_of?(Float)
|
13
|
+
self.java_send(:set, [::Java::double], ruby_object.to_java(:double))
|
14
|
+
elsif ruby_object.kind_of?(String)
|
15
|
+
self.java_send(:set, [::Java::java.lang.String], ruby_object.to_java(::Java::java.lang.String))
|
16
|
+
elsif ruby_object.kind_of?(Time)
|
17
|
+
self.java_send(:set, [::Java::org.embulk.spi.time.Timestamp],
|
18
|
+
::Java::org.embulk.spi.time.Timestamp.ofEpochSecond(ruby_object.to_i, ruby_object.nsec))
|
19
|
+
else
|
20
|
+
self.java_send(:set, [::Java::org.msgpack.value.Value],
|
21
|
+
::Java::org.msgpack.core.MessagePack.newDefaultUnpacker(ruby_object.to_msgpack.to_java_bytes).unpackValue())
|
22
|
+
end
|
23
|
+
end
|
5
24
|
end
|
6
25
|
|
7
26
|
class PageBuilder
|
8
|
-
def initialize(schema, java_page_output)
|
27
|
+
def initialize(schema, java_page_output=nil, java_dynamic_page_builder: nil)
|
28
|
+
if java_page_output.nil? && java_dynamic_page_builder.nil?
|
29
|
+
raise ArgumentError, 'PageBuilder.new must take either of java_page_output or java_dynamic_page_builder'
|
30
|
+
end
|
31
|
+
if java_page_output && java_dynamic_page_builder
|
32
|
+
raise ArgumentError, 'PageBuilder.new must not take both java_page_output and java_dynamic_page_builder'
|
33
|
+
end
|
34
|
+
|
9
35
|
require 'msgpack' # used at DynamicPageBuilder.set(Value)
|
10
|
-
|
11
|
-
|
12
|
-
|
36
|
+
|
37
|
+
if java_page_output
|
38
|
+
# TODO get task as an argument
|
39
|
+
task = Java::SPI::Exec.newConfigSource.load_config(Java::DynamicPageBuilder::BuilderTask.java_class)
|
40
|
+
@page_builder = Java::DynamicPageBuilder.createWithTimestampMetadataFromBuilderTask(
|
41
|
+
task, Java::Injected::BufferAllocator, schema.to_java, java_page_output)
|
42
|
+
else
|
43
|
+
@page_builder = java_dynamic_page_builder
|
44
|
+
end
|
45
|
+
|
13
46
|
@schema = schema
|
14
47
|
end
|
15
48
|
|
data/lib/embulk/schema.rb
CHANGED
@@ -20,7 +20,6 @@ module Embulk
|
|
20
20
|
each do |column|
|
21
21
|
idx = column.index
|
22
22
|
column_script =
|
23
|
-
"value_api = ::Embulk::Java::SPI::Json::RubyValueApi\n" <<
|
24
23
|
"if reader.isNull(#{idx})\n" <<
|
25
24
|
"record << nil\n" <<
|
26
25
|
"else\n" <<
|
@@ -34,9 +33,9 @@ module Embulk
|
|
34
33
|
when :string
|
35
34
|
"record << reader.getString(#{idx})"
|
36
35
|
when :timestamp
|
37
|
-
"record << reader.getTimestamp(#{idx}).
|
36
|
+
"record << (java_timestamp = reader.getTimestamp(#{idx}); ruby_time = Java::org.jruby.RubyTime.new(JRuby.runtime, JRuby.runtime.getClass('Time'), Java::org.joda.time.DateTime.new(java_timestamp.toEpochMilli())).gmtime().to_java(Java::org.jruby.RubyTime); ruby_time.setNSec(java_timestamp.getNano()); ruby_time)"
|
38
37
|
when :json
|
39
|
-
"record << MessagePack.unpack(
|
38
|
+
"record << MessagePack.unpack(String.from_java_bytes((::Java::org.msgpack.core.MessagePack.newDefaultBufferPacker()).packValue(reader.getJson(#{idx})).toMessageBuffer().toByteArray()))"
|
40
39
|
else
|
41
40
|
raise "Unknown type #{column.type.inspect}"
|
42
41
|
end <<
|
@@ -49,7 +48,6 @@ module Embulk
|
|
49
48
|
|
50
49
|
record_writer_script = "lambda do |builder,record|\n"
|
51
50
|
record_writer_script << "java_timestamp_class = ::Embulk::Java::Timestamp\n"
|
52
|
-
record_writer_script << "value_api = ::Embulk::Java::SPI::Json::RubyValueApi\n"
|
53
51
|
each do |column|
|
54
52
|
idx = column.index
|
55
53
|
column_script =
|
@@ -66,9 +64,10 @@ module Embulk
|
|
66
64
|
when :string
|
67
65
|
"builder.setString(#{idx}, record[#{idx}])"
|
68
66
|
when :timestamp
|
69
|
-
|
67
|
+
# It was originally expecting that `record[#{idx}]` was a Ruby Time object. Does it really happen?
|
68
|
+
"(ruby_time = record[#{idx}].to_java(Java::org.jruby.RubyTime); msec = ruby_time.getDateTime().getMillis(); builder.setTimestamp(#{idx}, java_timestamp_class.ofEpochSecond(msec / 1000, ruby_time.getNSec() + (msec % 1000) * 1000000)))"
|
70
69
|
when :json
|
71
|
-
"builder.setJson(#{idx},
|
70
|
+
"builder.setJson(#{idx}, ::Java::org.msgpack.core.MessagePack.newDefaultUnpacker(MessagePack.pack(record[#{idx}]).to_java_bytes).unpackValue())"
|
72
71
|
else
|
73
72
|
raise "Unknown type #{column.type.inspect}"
|
74
73
|
end <<
|
data/lib/embulk/version.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
module Embulk
|
4
4
|
@@warned = false
|
5
5
|
|
6
|
-
VERSION_INTERNAL = '0.8.
|
6
|
+
VERSION_INTERNAL = '0.8.36'
|
7
7
|
|
8
8
|
DEPRECATED_MESSAGE = 'Embulk::VERSION in (J)Ruby is deprecated. Use org.embulk.EmbulkVersion::VERSION instead. If this message is from a plugin, please tell this to the author of the plugin!'
|
9
9
|
def self.const_missing(name)
|
@@ -26,4 +26,22 @@ class SchemaGuessTest < ::Test::Unit::TestCase
|
|
26
26
|
{"a" => "12345678"},
|
27
27
|
]))
|
28
28
|
end
|
29
|
+
|
30
|
+
def test_boolean
|
31
|
+
%w[
|
32
|
+
true false t f
|
33
|
+
yes no y n
|
34
|
+
on off
|
35
|
+
].each do |str|
|
36
|
+
# If at least one of three kinds of boolean strings (i.e., downcase, upcase, capitalize) is
|
37
|
+
# mistakenly detected as "string," the guesser concludes the column type is "string."
|
38
|
+
assert_equal(
|
39
|
+
[C.new(0, "a", :boolean)],
|
40
|
+
G.from_hash_records([
|
41
|
+
{"a" => str.downcase},
|
42
|
+
{"a" => str.upcase},
|
43
|
+
{"a" => str.capitalize},
|
44
|
+
]))
|
45
|
+
end
|
46
|
+
end
|
29
47
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.36
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-10-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -157,10 +157,10 @@ files:
|
|
157
157
|
- classpath/commons-compress-1.10.jar
|
158
158
|
- classpath/commons-lang-2.4.jar
|
159
159
|
- classpath/commons-lang3-3.4.jar
|
160
|
-
- classpath/embulk-cli-0.8.
|
161
|
-
- classpath/embulk-core-0.8.
|
162
|
-
- classpath/embulk-jruby-strptime-0.8.
|
163
|
-
- classpath/embulk-standards-0.8.
|
160
|
+
- classpath/embulk-cli-0.8.36.jar
|
161
|
+
- classpath/embulk-core-0.8.36.jar
|
162
|
+
- classpath/embulk-jruby-strptime-0.8.36.jar
|
163
|
+
- classpath/embulk-standards-0.8.36.jar
|
164
164
|
- classpath/guava-18.0.jar
|
165
165
|
- classpath/guice-4.0.jar
|
166
166
|
- classpath/guice-bootstrap-0.1.1.jar
|
@@ -567,6 +567,7 @@ files:
|
|
567
567
|
- embulk-docs/src/release/release-0.8.33.rst
|
568
568
|
- embulk-docs/src/release/release-0.8.34.rst
|
569
569
|
- embulk-docs/src/release/release-0.8.35.rst
|
570
|
+
- embulk-docs/src/release/release-0.8.36.rst
|
570
571
|
- embulk-docs/src/release/release-0.8.4.rst
|
571
572
|
- embulk-docs/src/release/release-0.8.5.rst
|
572
573
|
- embulk-docs/src/release/release-0.8.6.rst
|