embulk 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/.travis.yml +16 -0
- data/Gemfile +0 -1
- data/README.md +37 -19
- data/Rakefile +5 -37
- data/bin/embulk +1 -1
- data/build.gradle +178 -95
- data/embulk-core/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +11 -10
- data/embulk-core/src/main/java/org/embulk/config/ConfigDiff.java +26 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigInject.java +14 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +2 -2
- data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +4 -3
- data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +5 -3
- data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +1 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecutionResult.java +6 -6
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +19 -19
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +61 -36
- data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +4 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +16 -1
- data/embulk-core/src/main/java/org/embulk/spi/Column.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/{SchemaVisitor.java → ColumnVisitor.java} +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/PluginClassLoader.java +80 -0
- data/embulk-core/src/main/java/org/embulk/spi/Schema.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +6 -6
- data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +6 -6
- data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +1 -1
- data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +1 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +2 -2
- data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +1 -1
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +3 -3
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +4 -4
- data/embulk-docs/Makefile +178 -0
- data/embulk-docs/build.gradle +20 -0
- data/embulk-docs/make.bat +243 -0
- data/embulk-docs/push-gh-pages.sh +29 -0
- data/embulk-docs/src/conf.py +260 -0
- data/embulk-docs/src/index.rst +19 -0
- data/embulk-docs/src/release.rst +14 -0
- data/embulk-docs/src/release/release-0.1.0.rst +8 -0
- data/embulk-docs/src/release/release-0.2.0.rst +16 -0
- data/embulk-docs/src/release/release-0.2.1.rst +19 -0
- data/embulk-docs/src/release/release-0.3.0.rst +34 -0
- data/embulk-docs/src/release/release-0.3.1.rst +11 -0
- data/embulk-docs/src/release/release-0.3.2.rst +15 -0
- data/embulk-docs/src/release/release-0.4.0.rst +74 -0
- data/embulk-standards/build.gradle +0 -1
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +2 -2
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +12 -3
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +2 -2
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +49 -26
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +16 -17
- data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +4 -4
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +0 -1
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +5 -5
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/lib/embulk/buffer.rb +2 -2
- data/lib/embulk/column.rb +6 -6
- data/lib/embulk/command/embulk_example.rb +1 -1
- data/lib/embulk/command/embulk_new_plugin.rb +87 -0
- data/lib/embulk/command/embulk_run.rb +84 -26
- data/lib/embulk/data/bundle/Gemfile +12 -20
- data/lib/embulk/data/bundle/embulk/{filter_example.rb → filter/example.rb} +3 -3
- data/lib/embulk/data/bundle/embulk/{input_example.rb → input/example.rb} +3 -3
- data/lib/embulk/data/bundle/embulk/{output_example.rb → output/example.rb} +3 -3
- data/lib/embulk/data/new/LICENSE.txt +21 -0
- data/lib/embulk/data/new/README.md.erb +75 -0
- data/lib/embulk/data/new/gitignore.erb +12 -0
- data/lib/embulk/data/new/java/build.gradle.erb +57 -0
- data/lib/embulk/data/new/java/decoder.java.erb +40 -0
- data/lib/embulk/data/new/java/encoder.java.erb +40 -0
- data/lib/embulk/data/new/java/file_input.java.erb +64 -0
- data/lib/embulk/data/new/java/file_output.java.erb +66 -0
- data/lib/embulk/data/new/java/filter.java.erb +47 -0
- data/lib/embulk/data/new/java/formatter.java.erb +45 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/lib/embulk/data/new/java/gradlew +164 -0
- data/lib/embulk/data/new/java/gradlew.bat +90 -0
- data/lib/embulk/data/new/java/input.java.erb +69 -0
- data/lib/embulk/data/new/java/output.java.erb +65 -0
- data/lib/embulk/data/new/java/parser.java.erb +51 -0
- data/lib/embulk/data/new/java/plugin_loader.rb.erb +3 -0
- data/lib/embulk/data/new/java/test.java.erb +5 -0
- data/lib/embulk/data/new/ruby/Gemfile +2 -0
- data/lib/embulk/data/new/ruby/Rakefile +1 -0
- data/lib/embulk/data/new/ruby/filter.rb.erb +39 -0
- data/lib/embulk/data/new/ruby/gemspec.erb +19 -0
- data/lib/embulk/data/new/ruby/input.rb.erb +47 -0
- data/lib/embulk/data/new/ruby/output.rb.erb +59 -0
- data/lib/embulk/data/package_data.rb +64 -0
- data/lib/embulk/data_source.rb +2 -2
- data/lib/embulk/decoder_plugin.rb +27 -0
- data/lib/embulk/encoder_plugin.rb +27 -0
- data/lib/embulk/error.rb +3 -0
- data/lib/embulk/file_input_plugin.rb +27 -0
- data/lib/embulk/file_output_plugin.rb +27 -0
- data/lib/embulk/filter_plugin.rb +28 -9
- data/lib/embulk/formatter_plugin.rb +105 -0
- data/lib/embulk/guess_csv.rb +10 -1
- data/lib/embulk/guess_plugin.rb +22 -27
- data/lib/embulk/input_plugin.rb +34 -20
- data/lib/embulk/java/bootstrap.rb +5 -0
- data/lib/embulk/java/imports.rb +7 -0
- data/lib/embulk/java_plugin.rb +84 -0
- data/lib/embulk/output_plugin.rb +35 -19
- data/lib/embulk/page.rb +1 -1
- data/lib/embulk/page_builder.rb +1 -1
- data/lib/embulk/parser_plugin.rb +76 -0
- data/lib/embulk/plugin.rb +130 -65
- data/lib/embulk/plugin_registry.rb +19 -8
- data/lib/embulk/schema.rb +4 -4
- data/lib/embulk/version.rb +1 -1
- data/settings.gradle +1 -0
- metadata +123 -90
- data/ChangeLog +0 -46
- data/embulk-cli/pom.xml +0 -94
- data/embulk-core/pom.xml +0 -148
- data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +0 -26
- data/embulk-standards/pom.xml +0 -68
- data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +0 -250
- data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +0 -43
- data/pom.xml +0 -541
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
module Embulk
|
|
2
|
+
|
|
3
|
+
class PackageData
|
|
4
|
+
if __FILE__ =~ /^classpath:/ || __FILE__.include?('!/')
|
|
5
|
+
# data is in embulk-core jar
|
|
6
|
+
resource_class = org.embulk.command.Runner.java_class
|
|
7
|
+
JAVA_RESOURCE = true
|
|
8
|
+
RESOURCE_URL = resource_class.resource("/embulk/data")
|
|
9
|
+
else
|
|
10
|
+
JAVA_RESOURCE = false
|
|
11
|
+
FILE_BASE_PATH = File.join(Embulk.home('lib'), 'embulk', 'data')
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def initialize(base_name, dest_dir, erb_binding=nil)
|
|
15
|
+
require 'fileutils'
|
|
16
|
+
@base_name = base_name
|
|
17
|
+
@dest_dir = dest_dir
|
|
18
|
+
@erb_binding = erb_binding
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def path(src)
|
|
22
|
+
if JAVA_RESOURCE
|
|
23
|
+
"#{RESOURCE_URL}/#{@base_name}/#{src}"
|
|
24
|
+
else
|
|
25
|
+
File.join(FILE_BASE_PATH, @base_name, src)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def content(src)
|
|
30
|
+
File.read(path(src))
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def erb(src)
|
|
34
|
+
require 'erb'
|
|
35
|
+
ERB.new(content(src), nil, '%').result(@erb_binding)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def cp(src, dest_name)
|
|
39
|
+
path = dest_path_message(dest_name)
|
|
40
|
+
FileUtils.cp path(src), path
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def cp_erb(src, dest_name)
|
|
44
|
+
path = dest_path_message(dest_name)
|
|
45
|
+
File.open(path, "w") {|f| f.write erb(src) }
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def dest_path(dest_name)
|
|
49
|
+
File.join(@dest_dir, *dest_name.split('/'))
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def dest_path_message(dest_name)
|
|
53
|
+
path = dest_path(dest_name)
|
|
54
|
+
puts " Creating #{path}"
|
|
55
|
+
FileUtils.mkdir_p File.dirname(path)
|
|
56
|
+
path
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def set_executable(dest_name)
|
|
60
|
+
File.chmod(0755, dest_path(dest_name))
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
end
|
data/lib/embulk/data_source.rb
CHANGED
|
@@ -39,7 +39,7 @@ module Embulk
|
|
|
39
39
|
end
|
|
40
40
|
|
|
41
41
|
if Embulk.java?
|
|
42
|
-
def self.
|
|
42
|
+
def self.from_java(java_data_source_impl)
|
|
43
43
|
json = java_data_source_impl.toString
|
|
44
44
|
new.merge!(JSON.parse(json))
|
|
45
45
|
end
|
|
@@ -48,7 +48,7 @@ module Embulk
|
|
|
48
48
|
new.merge!(hash)
|
|
49
49
|
end
|
|
50
50
|
|
|
51
|
-
def
|
|
51
|
+
def to_java
|
|
52
52
|
json = to_json
|
|
53
53
|
Java::Injected::ModelManager.readObject(Java::DataSourceImpl.java_class, json.to_java)
|
|
54
54
|
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
module Embulk
|
|
2
|
+
|
|
3
|
+
require 'embulk/data_source'
|
|
4
|
+
|
|
5
|
+
class DecoderPlugin
|
|
6
|
+
def self.transaction(config, &control)
|
|
7
|
+
raise NotImplementedError, "DecoderPlugin.transaction(config, &control) must be implemented"
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
# TODO
|
|
11
|
+
|
|
12
|
+
if Embulk.java?
|
|
13
|
+
# TODO new_java
|
|
14
|
+
|
|
15
|
+
def self.from_java(java_class)
|
|
16
|
+
JavaPlugin.ruby_adapter_class(java_class, DecoderPlugin, RubyAdapter)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
module RubyAdapter
|
|
20
|
+
module ClassMethods
|
|
21
|
+
end
|
|
22
|
+
# TODO
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
module Embulk
|
|
2
|
+
|
|
3
|
+
require 'embulk/data_source'
|
|
4
|
+
|
|
5
|
+
class EncoderPlugin
|
|
6
|
+
def self.transaction(config, &control)
|
|
7
|
+
raise NotImplementedError, "EncoderPlugin.transaction(config, &control) must be implemented"
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
# TODO
|
|
11
|
+
|
|
12
|
+
if Embulk.java?
|
|
13
|
+
# TODO new_java
|
|
14
|
+
|
|
15
|
+
def self.from_java(java_class)
|
|
16
|
+
JavaPlugin.ruby_adapter_class(java_class, EncoderPlugin, RubyAdapter)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
module RubyAdapter
|
|
20
|
+
module ClassMethods
|
|
21
|
+
end
|
|
22
|
+
# TODO
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
end
|
data/lib/embulk/error.rb
CHANGED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
module Embulk
|
|
2
|
+
|
|
3
|
+
class FileInputPlugin
|
|
4
|
+
# TODO transaction, resume, cleanup
|
|
5
|
+
# TODO run
|
|
6
|
+
|
|
7
|
+
if Embulk.java?
|
|
8
|
+
# TODO to_java
|
|
9
|
+
|
|
10
|
+
def self.from_java(java_class)
|
|
11
|
+
JavaPlugin.ruby_adapter_class(java_class, FileInputPlugin, RubyAdapter)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
module RubyAdapter
|
|
15
|
+
module ClassMethods
|
|
16
|
+
def new_java
|
|
17
|
+
Java::FileInputRunner.new(Java.injector.getInstance(java_class))
|
|
18
|
+
end
|
|
19
|
+
# TODO transaction, resume, cleanup
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# TODO run
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
module Embulk
|
|
2
|
+
|
|
3
|
+
class FileOutputPlugin
|
|
4
|
+
# TODO transaction, resume, cleanup
|
|
5
|
+
# TODO add, finish, close, abort, commit
|
|
6
|
+
|
|
7
|
+
if Embulk.java?
|
|
8
|
+
# TODO to_java
|
|
9
|
+
|
|
10
|
+
def self.from_java(java_class)
|
|
11
|
+
JavaPlugin.ruby_adapter_class(java_class, FileOutputPlugin, RubyAdapter)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
module RubyAdapter
|
|
15
|
+
module ClassMethods
|
|
16
|
+
def new_java
|
|
17
|
+
Java::FileOutputRunner.new(Java.injector.getInstance(java_class))
|
|
18
|
+
end
|
|
19
|
+
# TODO transaction, resume, cleanup
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# TODO add, finish, close, abort, commit
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
end
|
data/lib/embulk/filter_plugin.rb
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
module Embulk
|
|
2
2
|
|
|
3
|
+
require 'embulk/data_source'
|
|
4
|
+
require 'embulk/schema'
|
|
5
|
+
require 'embulk/page'
|
|
6
|
+
require 'embulk/page_builder'
|
|
7
|
+
|
|
3
8
|
class FilterPlugin
|
|
4
9
|
def self.transaction(config, in_schema, &control)
|
|
5
10
|
yield(config)
|
|
@@ -11,10 +16,14 @@ module Embulk
|
|
|
11
16
|
@in_schema = in_schema
|
|
12
17
|
@out_schema = out_schema
|
|
13
18
|
@page_builder = page_builder
|
|
19
|
+
init
|
|
14
20
|
end
|
|
15
21
|
|
|
16
22
|
attr_reader :task, :in_schema, :out_schema, :page_builder
|
|
17
23
|
|
|
24
|
+
def init
|
|
25
|
+
end
|
|
26
|
+
|
|
18
27
|
def add(page)
|
|
19
28
|
raise NotImplementedError, "FilterPlugin#add(page) must be implemented"
|
|
20
29
|
end
|
|
@@ -26,7 +35,7 @@ module Embulk
|
|
|
26
35
|
end
|
|
27
36
|
|
|
28
37
|
if Embulk.java?
|
|
29
|
-
def self.
|
|
38
|
+
def self.new_java
|
|
30
39
|
JavaAdapter.new(self)
|
|
31
40
|
end
|
|
32
41
|
|
|
@@ -38,20 +47,20 @@ module Embulk
|
|
|
38
47
|
end
|
|
39
48
|
|
|
40
49
|
def transaction(java_config, java_in_schema, java_control)
|
|
41
|
-
config = DataSource.
|
|
42
|
-
in_schema = Schema.
|
|
50
|
+
config = DataSource.from_java(java_config)
|
|
51
|
+
in_schema = Schema.from_java(java_in_schema)
|
|
43
52
|
@ruby_class.transaction(config, in_schema) do |task_source_hash, out_columns|
|
|
44
|
-
java_task_source = DataSource.from_ruby_hash(task_source_hash).
|
|
45
|
-
java_out_schemas = Schema.new(out_columns).
|
|
53
|
+
java_task_source = DataSource.from_ruby_hash(task_source_hash).to_java
|
|
54
|
+
java_out_schemas = Schema.new(out_columns).to_java
|
|
46
55
|
java_control.run(java_task_source, java_out_schemas)
|
|
47
56
|
end
|
|
48
57
|
nil
|
|
49
58
|
end
|
|
50
59
|
|
|
51
60
|
def open(java_task_source, java_in_schema, java_out_schema, java_output)
|
|
52
|
-
task_source = DataSource.
|
|
53
|
-
in_schema = Schema.
|
|
54
|
-
out_schema = Schema.
|
|
61
|
+
task_source = DataSource.from_java(java_task_source)
|
|
62
|
+
in_schema = Schema.from_java(java_in_schema)
|
|
63
|
+
out_schema = Schema.from_java(java_out_schema)
|
|
55
64
|
page_builder = PageBuilder.new(out_schema, java_output)
|
|
56
65
|
ruby_object = @ruby_class.new(task_source, in_schema, out_schema, page_builder)
|
|
57
66
|
return OutputAdapter.new(ruby_object, in_schema, page_builder)
|
|
@@ -77,10 +86,20 @@ module Embulk
|
|
|
77
86
|
def close
|
|
78
87
|
@ruby_object.close
|
|
79
88
|
ensure
|
|
80
|
-
|
|
89
|
+
@page_builder.close
|
|
81
90
|
end
|
|
82
91
|
end
|
|
83
92
|
end
|
|
93
|
+
|
|
94
|
+
def self.from_java(java_class)
|
|
95
|
+
JavaPlugin.ruby_adapter_class(java_class, FilterPlugin, RubyAdapter)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
module RubyAdapter
|
|
99
|
+
module ClassMethods
|
|
100
|
+
end
|
|
101
|
+
# TODO
|
|
102
|
+
end
|
|
84
103
|
end
|
|
85
104
|
end
|
|
86
105
|
end
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
module Embulk
|
|
2
|
+
|
|
3
|
+
require 'embulk/data_source'
|
|
4
|
+
require 'embulk/schema'
|
|
5
|
+
require 'embulk/page'
|
|
6
|
+
#require 'embulk/file_output' TODO not implemented
|
|
7
|
+
|
|
8
|
+
class FormatterPlugin
|
|
9
|
+
def self.transaction(config, schema, &control)
|
|
10
|
+
yield(config)
|
|
11
|
+
return {}
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def initialize(task, schema, file_output)
|
|
15
|
+
@task = task
|
|
16
|
+
@schema = schema
|
|
17
|
+
@file_output
|
|
18
|
+
init
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
attr_reader :task, :schema, :file_output
|
|
22
|
+
|
|
23
|
+
def init
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def add(page)
|
|
27
|
+
raise NotImplementedError, "FormatterPlugin#add(page) must be implemented"
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def finish
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def close
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
if Embulk.java?
|
|
37
|
+
def self.new_java
|
|
38
|
+
JavaAdapter.new(self)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
class JavaAdapter
|
|
42
|
+
include Java::FormatterPlugin
|
|
43
|
+
|
|
44
|
+
def initialize(ruby_class)
|
|
45
|
+
@ruby_class = ruby_class
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def transaction(java_config, java_schema, java_control)
|
|
49
|
+
config = DataSource.from_java(java_config)
|
|
50
|
+
schema = Schema.from_java(java_schema)
|
|
51
|
+
@ruby_class.transaction(config, schema) do |task_source_hash|
|
|
52
|
+
java_task_source = DataSource.from_ruby_hash(task_source_hash).to_java
|
|
53
|
+
java_control.run(java_task_source)
|
|
54
|
+
end
|
|
55
|
+
nil
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def open(java_task_source, java_schema, java_file_output)
|
|
59
|
+
task_source = DataSource.from_java(java_task_source)
|
|
60
|
+
schema = Schema.from_java(java_schema)
|
|
61
|
+
file_output = FileOutput.from_java(java_file_output)
|
|
62
|
+
ruby_object = @ruby_class.new(task_source, schema, file_output)
|
|
63
|
+
return OutputAdapter.new(ruby_object, schema, file_output)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
class OutputAdapter
|
|
67
|
+
include Java::TransactionalPageOutput
|
|
68
|
+
|
|
69
|
+
def initialize(ruby_object, schema, file_output)
|
|
70
|
+
@ruby_object = ruby_object
|
|
71
|
+
@schema = schema
|
|
72
|
+
@file_output = file_output
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def add(java_page)
|
|
76
|
+
# TODO reuse page reader
|
|
77
|
+
@ruby_object.add Page.new(java_page, @schema)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def finish
|
|
81
|
+
@ruby_object.finish
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def close
|
|
85
|
+
@ruby_object.close
|
|
86
|
+
ensure
|
|
87
|
+
@file_output.close
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def self.from_java(java_class)
|
|
93
|
+
JavaPlugin.ruby_adapter_class(java_class, FormatterPlugin, RubyAdapter)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
module RubyAdapter
|
|
97
|
+
module ClassMethods
|
|
98
|
+
# TODO transaction, resume, cleanup
|
|
99
|
+
end
|
|
100
|
+
# TODO add, finish, close
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
end
|
data/lib/embulk/guess_csv.rb
CHANGED
|
@@ -12,6 +12,15 @@ module Embulk
|
|
|
12
12
|
"\"", "'"
|
|
13
13
|
]
|
|
14
14
|
|
|
15
|
+
# CsvParserPlugin.TRUE_STRINGS
|
|
16
|
+
TRUE_STRINGS = Hash[*%w[
|
|
17
|
+
true True TRUE
|
|
18
|
+
yes Yes YES
|
|
19
|
+
y Y
|
|
20
|
+
on On ON
|
|
21
|
+
1
|
|
22
|
+
].map {|k| [k, true] }]
|
|
23
|
+
|
|
15
24
|
def guess_lines(config, sample_lines)
|
|
16
25
|
delim = guess_delimiter(sample_lines)
|
|
17
26
|
unless delim
|
|
@@ -152,7 +161,7 @@ module Embulk
|
|
|
152
161
|
end
|
|
153
162
|
|
|
154
163
|
def guess_type(str)
|
|
155
|
-
if [
|
|
164
|
+
if TRUE_STRINGS[str]
|
|
156
165
|
return "boolean"
|
|
157
166
|
end
|
|
158
167
|
|
data/lib/embulk/guess_plugin.rb
CHANGED
|
@@ -6,43 +6,38 @@ module Embulk
|
|
|
6
6
|
end
|
|
7
7
|
|
|
8
8
|
if Embulk.java?
|
|
9
|
-
def self.
|
|
9
|
+
def self.new_java
|
|
10
10
|
JavaAdapter.new(new)
|
|
11
11
|
end
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
class RubyAdapter < Embulk::GuessPlugin
|
|
18
|
-
def initialized(java_guess)
|
|
19
|
-
@java_guess = java_guess
|
|
20
|
-
end
|
|
13
|
+
class JavaAdapter
|
|
14
|
+
include Java::GuessPlugin
|
|
21
15
|
|
|
22
|
-
def
|
|
23
|
-
|
|
24
|
-
java_sample = sample.java_object
|
|
25
|
-
java_next_config = @java_guess.guess(java_config, java_sample)
|
|
26
|
-
return DataSource.from_java_object(java_next_config)
|
|
16
|
+
def initialize(ruby_guess)
|
|
17
|
+
@ruby_guess = ruby_guess
|
|
27
18
|
end
|
|
28
19
|
|
|
29
|
-
def
|
|
30
|
-
|
|
20
|
+
def guess(java_config, java_sample)
|
|
21
|
+
config = DataSource.from_java(java_config)
|
|
22
|
+
sample = Buffer.from_java(java_sample)
|
|
23
|
+
config_diff_hash = @ruby_guess.guess(config, sample)
|
|
24
|
+
return DataSource.from_ruby_hash(config_diff_hash).to_java
|
|
31
25
|
end
|
|
32
26
|
end
|
|
33
27
|
|
|
34
|
-
|
|
35
|
-
|
|
28
|
+
def self.from_java(java_class)
|
|
29
|
+
JavaPlugin.ruby_adapter(java_class, GuessPlugin, RubyAdapter)
|
|
30
|
+
end
|
|
36
31
|
|
|
37
|
-
|
|
38
|
-
|
|
32
|
+
module RubyAdapter
|
|
33
|
+
module ClassMethods
|
|
39
34
|
end
|
|
40
35
|
|
|
41
|
-
def guess(
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
return DataSource.
|
|
36
|
+
def guess(config, sample)
|
|
37
|
+
java_config = config.to_java
|
|
38
|
+
java_sample = sample.to_java
|
|
39
|
+
java_config_diff = java_object.guess(java_config, java_sample)
|
|
40
|
+
return DataSource.from_java(java_config_diff)
|
|
46
41
|
end
|
|
47
42
|
end
|
|
48
43
|
end
|
|
@@ -60,7 +55,7 @@ module Embulk
|
|
|
60
55
|
return DataSource.new
|
|
61
56
|
end
|
|
62
57
|
|
|
63
|
-
decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.
|
|
58
|
+
decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), task)
|
|
64
59
|
sample_text = ''
|
|
65
60
|
while decoder.nextFile
|
|
66
61
|
first = true
|
|
@@ -94,7 +89,7 @@ module Embulk
|
|
|
94
89
|
return DataSource.new
|
|
95
90
|
end
|
|
96
91
|
|
|
97
|
-
decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.
|
|
92
|
+
decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), task)
|
|
98
93
|
sample_lines = []
|
|
99
94
|
while decoder.nextFile
|
|
100
95
|
while line = decoder.poll
|