embulk 0.8.35-java → 0.8.36-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -0
  3. data/build.gradle +1 -1
  4. data/embulk-cli/src/main/java/org/embulk/cli/EmbulkExample.java +5 -1
  5. data/embulk-cli/src/main/java/org/embulk/cli/EmbulkRun.java +12 -0
  6. data/embulk-core/src/main/java/org/embulk/EmbulkRunner.java +2 -2
  7. data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoader.java +802 -17
  8. data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoaderFactory.java +8 -1
  9. data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoaderModule.java +33 -2
  10. data/embulk-core/src/main/java/org/embulk/plugin/jar/JarPluginLoader.java +32 -5
  11. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +1 -6
  12. data/embulk-core/src/main/java/org/embulk/spi/json/RubyValueApi.java +39 -1
  13. data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +21 -0
  14. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +21 -0
  15. data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetterFactory.java +43 -9
  16. data/embulk-core/src/main/java/org/embulk/spi/util/DynamicPageBuilder.java +46 -8
  17. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +19 -1
  18. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +11 -0
  19. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java +12 -1
  20. data/embulk-core/src/main/resources/embulk/parent_first_packages.properties +1 -0
  21. data/embulk-docs/build.gradle +8 -0
  22. data/embulk-docs/src/built-in.rst +47 -35
  23. data/embulk-docs/src/index.rst +9 -1
  24. data/embulk-docs/src/release.rst +1 -0
  25. data/embulk-docs/src/release/release-0.8.36.rst +32 -0
  26. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +22 -0
  27. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +34 -1
  28. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +8 -2
  29. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +76 -0
  30. data/lib/embulk/guess/schema_guess.rb +1 -1
  31. data/lib/embulk/input_plugin.rb +8 -1
  32. data/lib/embulk/page_builder.rb +38 -5
  33. data/lib/embulk/schema.rb +5 -6
  34. data/lib/embulk/version.rb +1 -1
  35. data/test/guess/test_schema_guess.rb +18 -0
  36. metadata +7 -6
@@ -105,7 +105,7 @@ module Embulk::Guess
105
105
  FALSE_STRINGS = Hash[%w[
106
106
  false False FALSE
107
107
  no No NO
108
- f N n N
108
+ f F n N
109
109
  off Off OFF
110
110
  ].map {|k| [k, true] }]
111
111
 
@@ -34,6 +34,10 @@ module Embulk
34
34
  def init
35
35
  end
36
36
 
37
+ def self.create_page_builder(task_source: nil, schema: nil, processor_index: nil, java_page_output: nil)
38
+ return PageBuilder.new(schema, java_page_output)
39
+ end
40
+
37
41
  def run
38
42
  raise NotImplementedError, "InputPlugin#run must be implemented"
39
43
  end
@@ -95,7 +99,10 @@ module Embulk
95
99
  def run(java_task_source, java_schema, processor_index, java_output)
96
100
  task_source = DataSource.from_java(java_task_source)
97
101
  schema = Schema.from_java(java_schema)
98
- page_builder = PageBuilder.new(schema, java_output)
102
+ page_builder = @ruby_class.create_page_builder(task_source: task_source,
103
+ schema: schema,
104
+ processor_index: processor_index,
105
+ java_page_output: java_output)
99
106
  begin
100
107
  task_report_hash = @ruby_class.new(task_source, schema, processor_index, page_builder).run
101
108
  return DataSource.from_ruby_hash(task_report_hash).to_java
@@ -1,15 +1,48 @@
1
1
  module Embulk
2
2
 
3
3
  org.embulk.spi.util.dynamic.AbstractDynamicColumnSetter.module_eval do
4
- alias_method(:set, :setRubyObject)
4
+ def set(ruby_object)
5
+ # Using |java_send| so that it calls Java's correct overloaded |set| method, not Ruby's itself.
6
+ if ruby_object.nil?
7
+ self.java_send(:setNull, [])
8
+ elsif ruby_object == true || ruby_object == false
9
+ self.java_send(:set, [::Java::boolean], ruby_object.to_java(:boolean))
10
+ elsif ruby_object.kind_of?(Integer)
11
+ self.java_send(:set, [::Java::long], ruby_object.to_java(:long))
12
+ elsif ruby_object.kind_of?(Float)
13
+ self.java_send(:set, [::Java::double], ruby_object.to_java(:double))
14
+ elsif ruby_object.kind_of?(String)
15
+ self.java_send(:set, [::Java::java.lang.String], ruby_object.to_java(::Java::java.lang.String))
16
+ elsif ruby_object.kind_of?(Time)
17
+ self.java_send(:set, [::Java::org.embulk.spi.time.Timestamp],
18
+ ::Java::org.embulk.spi.time.Timestamp.ofEpochSecond(ruby_object.to_i, ruby_object.nsec))
19
+ else
20
+ self.java_send(:set, [::Java::org.msgpack.value.Value],
21
+ ::Java::org.msgpack.core.MessagePack.newDefaultUnpacker(ruby_object.to_msgpack.to_java_bytes).unpackValue())
22
+ end
23
+ end
5
24
  end
6
25
 
7
26
  class PageBuilder
8
- def initialize(schema, java_page_output)
27
+ def initialize(schema, java_page_output=nil, java_dynamic_page_builder: nil)
28
+ if java_page_output.nil? && java_dynamic_page_builder.nil?
29
+ raise ArgumentError, 'PageBuilder.new must take either of java_page_output or java_dynamic_page_builder'
30
+ end
31
+ if java_page_output && java_dynamic_page_builder
32
+ raise ArgumentError, 'PageBuilder.new must not take both java_page_output and java_dynamic_page_builder'
33
+ end
34
+
9
35
  require 'msgpack' # used at DynamicPageBuilder.set(Value)
10
- # TODO get task as an argument
11
- task = Java::SPI::Exec.newConfigSource.load_config(Java::DynamicPageBuilder::BuilderTask.java_class)
12
- @page_builder = Java::DynamicPageBuilder.new(task, Java::Injected::BufferAllocator, schema.to_java, java_page_output)
36
+
37
+ if java_page_output
38
+ # TODO get task as an argument
39
+ task = Java::SPI::Exec.newConfigSource.load_config(Java::DynamicPageBuilder::BuilderTask.java_class)
40
+ @page_builder = Java::DynamicPageBuilder.createWithTimestampMetadataFromBuilderTask(
41
+ task, Java::Injected::BufferAllocator, schema.to_java, java_page_output)
42
+ else
43
+ @page_builder = java_dynamic_page_builder
44
+ end
45
+
13
46
  @schema = schema
14
47
  end
15
48
 
data/lib/embulk/schema.rb CHANGED
@@ -20,7 +20,6 @@ module Embulk
20
20
  each do |column|
21
21
  idx = column.index
22
22
  column_script =
23
- "value_api = ::Embulk::Java::SPI::Json::RubyValueApi\n" <<
24
23
  "if reader.isNull(#{idx})\n" <<
25
24
  "record << nil\n" <<
26
25
  "else\n" <<
@@ -34,9 +33,9 @@ module Embulk
34
33
  when :string
35
34
  "record << reader.getString(#{idx})"
36
35
  when :timestamp
37
- "record << reader.getTimestamp(#{idx}).getRubyTime(JRuby.runtime)"
36
+ "record << (java_timestamp = reader.getTimestamp(#{idx}); ruby_time = Java::org.jruby.RubyTime.new(JRuby.runtime, JRuby.runtime.getClass('Time'), Java::org.joda.time.DateTime.new(java_timestamp.toEpochMilli())).gmtime().to_java(Java::org.jruby.RubyTime); ruby_time.setNSec(java_timestamp.getNano()); ruby_time)"
38
37
  when :json
39
- "record << MessagePack.unpack(value_api.toMessagePack(JRuby.runtime, reader.getJson(#{idx})))"
38
+ "record << MessagePack.unpack(String.from_java_bytes((::Java::org.msgpack.core.MessagePack.newDefaultBufferPacker()).packValue(reader.getJson(#{idx})).toMessageBuffer().toByteArray()))"
40
39
  else
41
40
  raise "Unknown type #{column.type.inspect}"
42
41
  end <<
@@ -49,7 +48,6 @@ module Embulk
49
48
 
50
49
  record_writer_script = "lambda do |builder,record|\n"
51
50
  record_writer_script << "java_timestamp_class = ::Embulk::Java::Timestamp\n"
52
- record_writer_script << "value_api = ::Embulk::Java::SPI::Json::RubyValueApi\n"
53
51
  each do |column|
54
52
  idx = column.index
55
53
  column_script =
@@ -66,9 +64,10 @@ module Embulk
66
64
  when :string
67
65
  "builder.setString(#{idx}, record[#{idx}])"
68
66
  when :timestamp
69
- "builder.setTimestamp(#{idx}, java_timestamp_class.fromRubyTime(record[#{idx}]))"
67
+ # It was originally expecting that `record[#{idx}]` was a Ruby Time object. Does it really happen?
68
+ "(ruby_time = record[#{idx}].to_java(Java::org.jruby.RubyTime); msec = ruby_time.getDateTime().getMillis(); builder.setTimestamp(#{idx}, java_timestamp_class.ofEpochSecond(msec / 1000, ruby_time.getNSec() + (msec % 1000) * 1000000)))"
70
69
  when :json
71
- "builder.setJson(#{idx}, value_api.fromMessagePack(MessagePack.pack(record[#{idx}])))"
70
+ "builder.setJson(#{idx}, ::Java::org.msgpack.core.MessagePack.newDefaultUnpacker(MessagePack.pack(record[#{idx}]).to_java_bytes).unpackValue())"
72
71
  else
73
72
  raise "Unknown type #{column.type.inspect}"
74
73
  end <<
@@ -3,7 +3,7 @@
3
3
  module Embulk
4
4
  @@warned = false
5
5
 
6
- VERSION_INTERNAL = '0.8.35'
6
+ VERSION_INTERNAL = '0.8.36'
7
7
 
8
8
  DEPRECATED_MESSAGE = 'Embulk::VERSION in (J)Ruby is deprecated. Use org.embulk.EmbulkVersion::VERSION instead. If this message is from a plugin, please tell this to the author of the plugin!'
9
9
  def self.const_missing(name)
@@ -26,4 +26,22 @@ class SchemaGuessTest < ::Test::Unit::TestCase
26
26
  {"a" => "12345678"},
27
27
  ]))
28
28
  end
29
+
30
+ def test_boolean
31
+ %w[
32
+ true false t f
33
+ yes no y n
34
+ on off
35
+ ].each do |str|
36
+ # If at least one of three kinds of boolean strings (i.e., downcase, upcase, capitalize) is
37
+ # mistakenly detected as "string," the guesser concludes the column type is "string."
38
+ assert_equal(
39
+ [C.new(0, "a", :boolean)],
40
+ G.from_hash_records([
41
+ {"a" => str.downcase},
42
+ {"a" => str.upcase},
43
+ {"a" => str.capitalize},
44
+ ]))
45
+ end
46
+ end
29
47
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.35
4
+ version: 0.8.36
5
5
  platform: java
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-09-26 00:00:00.000000000 Z
11
+ date: 2017-10-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -157,10 +157,10 @@ files:
157
157
  - classpath/commons-compress-1.10.jar
158
158
  - classpath/commons-lang-2.4.jar
159
159
  - classpath/commons-lang3-3.4.jar
160
- - classpath/embulk-cli-0.8.35.jar
161
- - classpath/embulk-core-0.8.35.jar
162
- - classpath/embulk-jruby-strptime-0.8.35.jar
163
- - classpath/embulk-standards-0.8.35.jar
160
+ - classpath/embulk-cli-0.8.36.jar
161
+ - classpath/embulk-core-0.8.36.jar
162
+ - classpath/embulk-jruby-strptime-0.8.36.jar
163
+ - classpath/embulk-standards-0.8.36.jar
164
164
  - classpath/guava-18.0.jar
165
165
  - classpath/guice-4.0.jar
166
166
  - classpath/guice-bootstrap-0.1.1.jar
@@ -567,6 +567,7 @@ files:
567
567
  - embulk-docs/src/release/release-0.8.33.rst
568
568
  - embulk-docs/src/release/release-0.8.34.rst
569
569
  - embulk-docs/src/release/release-0.8.35.rst
570
+ - embulk-docs/src/release/release-0.8.36.rst
570
571
  - embulk-docs/src/release/release-0.8.4.rst
571
572
  - embulk-docs/src/release/release-0.8.5.rst
572
573
  - embulk-docs/src/release/release-0.8.6.rst