embulk 0.8.35 → 0.8.36

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -0
  3. data/build.gradle +1 -1
  4. data/embulk-cli/src/main/java/org/embulk/cli/EmbulkExample.java +5 -1
  5. data/embulk-cli/src/main/java/org/embulk/cli/EmbulkRun.java +12 -0
  6. data/embulk-core/src/main/java/org/embulk/EmbulkRunner.java +2 -2
  7. data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoader.java +802 -17
  8. data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoaderFactory.java +8 -1
  9. data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoaderModule.java +33 -2
  10. data/embulk-core/src/main/java/org/embulk/plugin/jar/JarPluginLoader.java +32 -5
  11. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +1 -6
  12. data/embulk-core/src/main/java/org/embulk/spi/json/RubyValueApi.java +39 -1
  13. data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +21 -0
  14. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +21 -0
  15. data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetterFactory.java +43 -9
  16. data/embulk-core/src/main/java/org/embulk/spi/util/DynamicPageBuilder.java +46 -8
  17. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +19 -1
  18. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +11 -0
  19. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java +12 -1
  20. data/embulk-core/src/main/resources/embulk/parent_first_packages.properties +1 -0
  21. data/embulk-docs/build.gradle +8 -0
  22. data/embulk-docs/src/built-in.rst +47 -35
  23. data/embulk-docs/src/index.rst +9 -1
  24. data/embulk-docs/src/release.rst +1 -0
  25. data/embulk-docs/src/release/release-0.8.36.rst +32 -0
  26. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +22 -0
  27. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +34 -1
  28. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +8 -2
  29. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +76 -0
  30. data/lib/embulk/guess/schema_guess.rb +1 -1
  31. data/lib/embulk/input_plugin.rb +8 -1
  32. data/lib/embulk/page_builder.rb +38 -5
  33. data/lib/embulk/schema.rb +5 -6
  34. data/lib/embulk/version.rb +1 -1
  35. data/test/guess/test_schema_guess.rb +18 -0
  36. metadata +7 -6
@@ -105,7 +105,7 @@ module Embulk::Guess
105
105
  FALSE_STRINGS = Hash[%w[
106
106
  false False FALSE
107
107
  no No NO
108
- f N n N
108
+ f F n N
109
109
  off Off OFF
110
110
  ].map {|k| [k, true] }]
111
111
 
@@ -34,6 +34,10 @@ module Embulk
34
34
  def init
35
35
  end
36
36
 
37
+ def self.create_page_builder(task_source: nil, schema: nil, processor_index: nil, java_page_output: nil)
38
+ return PageBuilder.new(schema, java_page_output)
39
+ end
40
+
37
41
  def run
38
42
  raise NotImplementedError, "InputPlugin#run must be implemented"
39
43
  end
@@ -95,7 +99,10 @@ module Embulk
95
99
  def run(java_task_source, java_schema, processor_index, java_output)
96
100
  task_source = DataSource.from_java(java_task_source)
97
101
  schema = Schema.from_java(java_schema)
98
- page_builder = PageBuilder.new(schema, java_output)
102
+ page_builder = @ruby_class.create_page_builder(task_source: task_source,
103
+ schema: schema,
104
+ processor_index: processor_index,
105
+ java_page_output: java_output)
99
106
  begin
100
107
  task_report_hash = @ruby_class.new(task_source, schema, processor_index, page_builder).run
101
108
  return DataSource.from_ruby_hash(task_report_hash).to_java
@@ -1,15 +1,48 @@
1
1
  module Embulk
2
2
 
3
3
  org.embulk.spi.util.dynamic.AbstractDynamicColumnSetter.module_eval do
4
- alias_method(:set, :setRubyObject)
4
+ def set(ruby_object)
5
+ # Using |java_send| so that it calls Java's correct overloaded |set| method, not Ruby's itself.
6
+ if ruby_object.nil?
7
+ self.java_send(:setNull, [])
8
+ elsif ruby_object == true || ruby_object == false
9
+ self.java_send(:set, [::Java::boolean], ruby_object.to_java(:boolean))
10
+ elsif ruby_object.kind_of?(Integer)
11
+ self.java_send(:set, [::Java::long], ruby_object.to_java(:long))
12
+ elsif ruby_object.kind_of?(Float)
13
+ self.java_send(:set, [::Java::double], ruby_object.to_java(:double))
14
+ elsif ruby_object.kind_of?(String)
15
+ self.java_send(:set, [::Java::java.lang.String], ruby_object.to_java(::Java::java.lang.String))
16
+ elsif ruby_object.kind_of?(Time)
17
+ self.java_send(:set, [::Java::org.embulk.spi.time.Timestamp],
18
+ ::Java::org.embulk.spi.time.Timestamp.ofEpochSecond(ruby_object.to_i, ruby_object.nsec))
19
+ else
20
+ self.java_send(:set, [::Java::org.msgpack.value.Value],
21
+ ::Java::org.msgpack.core.MessagePack.newDefaultUnpacker(ruby_object.to_msgpack.to_java_bytes).unpackValue())
22
+ end
23
+ end
5
24
  end
6
25
 
7
26
  class PageBuilder
8
- def initialize(schema, java_page_output)
27
+ def initialize(schema, java_page_output=nil, java_dynamic_page_builder: nil)
28
+ if java_page_output.nil? && java_dynamic_page_builder.nil?
29
+ raise ArgumentError, 'PageBuilder.new must take either of java_page_output or java_dynamic_page_builder'
30
+ end
31
+ if java_page_output && java_dynamic_page_builder
32
+ raise ArgumentError, 'PageBuilder.new must not take both java_page_output and java_dynamic_page_builder'
33
+ end
34
+
9
35
  require 'msgpack' # used at DynamicPageBuilder.set(Value)
10
- # TODO get task as an argument
11
- task = Java::SPI::Exec.newConfigSource.load_config(Java::DynamicPageBuilder::BuilderTask.java_class)
12
- @page_builder = Java::DynamicPageBuilder.new(task, Java::Injected::BufferAllocator, schema.to_java, java_page_output)
36
+
37
+ if java_page_output
38
+ # TODO get task as an argument
39
+ task = Java::SPI::Exec.newConfigSource.load_config(Java::DynamicPageBuilder::BuilderTask.java_class)
40
+ @page_builder = Java::DynamicPageBuilder.createWithTimestampMetadataFromBuilderTask(
41
+ task, Java::Injected::BufferAllocator, schema.to_java, java_page_output)
42
+ else
43
+ @page_builder = java_dynamic_page_builder
44
+ end
45
+
13
46
  @schema = schema
14
47
  end
15
48
 
data/lib/embulk/schema.rb CHANGED
@@ -20,7 +20,6 @@ module Embulk
20
20
  each do |column|
21
21
  idx = column.index
22
22
  column_script =
23
- "value_api = ::Embulk::Java::SPI::Json::RubyValueApi\n" <<
24
23
  "if reader.isNull(#{idx})\n" <<
25
24
  "record << nil\n" <<
26
25
  "else\n" <<
@@ -34,9 +33,9 @@ module Embulk
34
33
  when :string
35
34
  "record << reader.getString(#{idx})"
36
35
  when :timestamp
37
- "record << reader.getTimestamp(#{idx}).getRubyTime(JRuby.runtime)"
36
+ "record << (java_timestamp = reader.getTimestamp(#{idx}); ruby_time = Java::org.jruby.RubyTime.new(JRuby.runtime, JRuby.runtime.getClass('Time'), Java::org.joda.time.DateTime.new(java_timestamp.toEpochMilli())).gmtime().to_java(Java::org.jruby.RubyTime); ruby_time.setNSec(java_timestamp.getNano()); ruby_time)"
38
37
  when :json
39
- "record << MessagePack.unpack(value_api.toMessagePack(JRuby.runtime, reader.getJson(#{idx})))"
38
+ "record << MessagePack.unpack(String.from_java_bytes((::Java::org.msgpack.core.MessagePack.newDefaultBufferPacker()).packValue(reader.getJson(#{idx})).toMessageBuffer().toByteArray()))"
40
39
  else
41
40
  raise "Unknown type #{column.type.inspect}"
42
41
  end <<
@@ -49,7 +48,6 @@ module Embulk
49
48
 
50
49
  record_writer_script = "lambda do |builder,record|\n"
51
50
  record_writer_script << "java_timestamp_class = ::Embulk::Java::Timestamp\n"
52
- record_writer_script << "value_api = ::Embulk::Java::SPI::Json::RubyValueApi\n"
53
51
  each do |column|
54
52
  idx = column.index
55
53
  column_script =
@@ -66,9 +64,10 @@ module Embulk
66
64
  when :string
67
65
  "builder.setString(#{idx}, record[#{idx}])"
68
66
  when :timestamp
69
- "builder.setTimestamp(#{idx}, java_timestamp_class.fromRubyTime(record[#{idx}]))"
67
+ # It was originally expecting that `record[#{idx}]` was a Ruby Time object. Does it really happen?
68
+ "(ruby_time = record[#{idx}].to_java(Java::org.jruby.RubyTime); msec = ruby_time.getDateTime().getMillis(); builder.setTimestamp(#{idx}, java_timestamp_class.ofEpochSecond(msec / 1000, ruby_time.getNSec() + (msec % 1000) * 1000000)))"
70
69
  when :json
71
- "builder.setJson(#{idx}, value_api.fromMessagePack(MessagePack.pack(record[#{idx}])))"
70
+ "builder.setJson(#{idx}, ::Java::org.msgpack.core.MessagePack.newDefaultUnpacker(MessagePack.pack(record[#{idx}]).to_java_bytes).unpackValue())"
72
71
  else
73
72
  raise "Unknown type #{column.type.inspect}"
74
73
  end <<
@@ -3,7 +3,7 @@
3
3
  module Embulk
4
4
  @@warned = false
5
5
 
6
- VERSION_INTERNAL = '0.8.35'
6
+ VERSION_INTERNAL = '0.8.36'
7
7
 
8
8
  DEPRECATED_MESSAGE = 'Embulk::VERSION in (J)Ruby is deprecated. Use org.embulk.EmbulkVersion::VERSION instead. If this message is from a plugin, please tell this to the author of the plugin!'
9
9
  def self.const_missing(name)
@@ -26,4 +26,22 @@ class SchemaGuessTest < ::Test::Unit::TestCase
26
26
  {"a" => "12345678"},
27
27
  ]))
28
28
  end
29
+
30
+ def test_boolean
31
+ %w[
32
+ true false t f
33
+ yes no y n
34
+ on off
35
+ ].each do |str|
36
+ # If at least one of three kinds of boolean strings (i.e., downcase, upcase, capitalize) is
37
+ # mistakenly detected as "string," the guesser concludes the column type is "string."
38
+ assert_equal(
39
+ [C.new(0, "a", :boolean)],
40
+ G.from_hash_records([
41
+ {"a" => str.downcase},
42
+ {"a" => str.upcase},
43
+ {"a" => str.capitalize},
44
+ ]))
45
+ end
46
+ end
29
47
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.35
4
+ version: 0.8.36
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-09-26 00:00:00.000000000 Z
11
+ date: 2017-10-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: jruby-jars
@@ -115,10 +115,10 @@ files:
115
115
  - classpath/commons-compress-1.10.jar
116
116
  - classpath/commons-lang-2.4.jar
117
117
  - classpath/commons-lang3-3.4.jar
118
- - classpath/embulk-cli-0.8.35.jar
119
- - classpath/embulk-core-0.8.35.jar
120
- - classpath/embulk-jruby-strptime-0.8.35.jar
121
- - classpath/embulk-standards-0.8.35.jar
118
+ - classpath/embulk-cli-0.8.36.jar
119
+ - classpath/embulk-core-0.8.36.jar
120
+ - classpath/embulk-jruby-strptime-0.8.36.jar
121
+ - classpath/embulk-standards-0.8.36.jar
122
122
  - classpath/guava-18.0.jar
123
123
  - classpath/guice-4.0.jar
124
124
  - classpath/guice-bootstrap-0.1.1.jar
@@ -525,6 +525,7 @@ files:
525
525
  - embulk-docs/src/release/release-0.8.33.rst
526
526
  - embulk-docs/src/release/release-0.8.34.rst
527
527
  - embulk-docs/src/release/release-0.8.35.rst
528
+ - embulk-docs/src/release/release-0.8.36.rst
528
529
  - embulk-docs/src/release/release-0.8.4.rst
529
530
  - embulk-docs/src/release/release-0.8.5.rst
530
531
  - embulk-docs/src/release/release-0.8.6.rst