embulk 0.3.2 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -13
- data/.travis.yml +16 -0
- data/Gemfile +0 -1
- data/README.md +37 -19
- data/Rakefile +5 -37
- data/bin/embulk +1 -1
- data/build.gradle +178 -95
- data/embulk-core/build.gradle +1 -1
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +11 -10
- data/embulk-core/src/main/java/org/embulk/config/ConfigDiff.java +26 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigInject.java +14 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +2 -2
- data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +4 -3
- data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +5 -3
- data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +1 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecutionResult.java +6 -6
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +19 -19
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +61 -36
- data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +4 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +16 -1
- data/embulk-core/src/main/java/org/embulk/spi/Column.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/{SchemaVisitor.java → ColumnVisitor.java} +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +3 -3
- data/embulk-core/src/main/java/org/embulk/spi/PluginClassLoader.java +80 -0
- data/embulk-core/src/main/java/org/embulk/spi/Schema.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +1 -1
- data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +2 -2
- data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +6 -6
- data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +6 -6
- data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +1 -1
- data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +1 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +2 -2
- data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +1 -1
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +3 -3
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +4 -4
- data/embulk-docs/Makefile +178 -0
- data/embulk-docs/build.gradle +20 -0
- data/embulk-docs/make.bat +243 -0
- data/embulk-docs/push-gh-pages.sh +29 -0
- data/embulk-docs/src/conf.py +260 -0
- data/embulk-docs/src/index.rst +19 -0
- data/embulk-docs/src/release.rst +14 -0
- data/embulk-docs/src/release/release-0.1.0.rst +8 -0
- data/embulk-docs/src/release/release-0.2.0.rst +16 -0
- data/embulk-docs/src/release/release-0.2.1.rst +19 -0
- data/embulk-docs/src/release/release-0.3.0.rst +34 -0
- data/embulk-docs/src/release/release-0.3.1.rst +11 -0
- data/embulk-docs/src/release/release-0.3.2.rst +15 -0
- data/embulk-docs/src/release/release-0.4.0.rst +74 -0
- data/embulk-standards/build.gradle +0 -1
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +2 -2
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +12 -3
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +2 -2
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +49 -26
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +16 -17
- data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +4 -4
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +0 -1
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +5 -5
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/lib/embulk/buffer.rb +2 -2
- data/lib/embulk/column.rb +6 -6
- data/lib/embulk/command/embulk_example.rb +1 -1
- data/lib/embulk/command/embulk_new_plugin.rb +87 -0
- data/lib/embulk/command/embulk_run.rb +84 -26
- data/lib/embulk/data/bundle/Gemfile +12 -20
- data/lib/embulk/data/bundle/embulk/{filter_example.rb → filter/example.rb} +3 -3
- data/lib/embulk/data/bundle/embulk/{input_example.rb → input/example.rb} +3 -3
- data/lib/embulk/data/bundle/embulk/{output_example.rb → output/example.rb} +3 -3
- data/lib/embulk/data/new/LICENSE.txt +21 -0
- data/lib/embulk/data/new/README.md.erb +75 -0
- data/lib/embulk/data/new/gitignore.erb +12 -0
- data/lib/embulk/data/new/java/build.gradle.erb +57 -0
- data/lib/embulk/data/new/java/decoder.java.erb +40 -0
- data/lib/embulk/data/new/java/encoder.java.erb +40 -0
- data/lib/embulk/data/new/java/file_input.java.erb +64 -0
- data/lib/embulk/data/new/java/file_output.java.erb +66 -0
- data/lib/embulk/data/new/java/filter.java.erb +47 -0
- data/lib/embulk/data/new/java/formatter.java.erb +45 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/lib/embulk/data/new/java/gradlew +164 -0
- data/lib/embulk/data/new/java/gradlew.bat +90 -0
- data/lib/embulk/data/new/java/input.java.erb +69 -0
- data/lib/embulk/data/new/java/output.java.erb +65 -0
- data/lib/embulk/data/new/java/parser.java.erb +51 -0
- data/lib/embulk/data/new/java/plugin_loader.rb.erb +3 -0
- data/lib/embulk/data/new/java/test.java.erb +5 -0
- data/lib/embulk/data/new/ruby/Gemfile +2 -0
- data/lib/embulk/data/new/ruby/Rakefile +1 -0
- data/lib/embulk/data/new/ruby/filter.rb.erb +39 -0
- data/lib/embulk/data/new/ruby/gemspec.erb +19 -0
- data/lib/embulk/data/new/ruby/input.rb.erb +47 -0
- data/lib/embulk/data/new/ruby/output.rb.erb +59 -0
- data/lib/embulk/data/package_data.rb +64 -0
- data/lib/embulk/data_source.rb +2 -2
- data/lib/embulk/decoder_plugin.rb +27 -0
- data/lib/embulk/encoder_plugin.rb +27 -0
- data/lib/embulk/error.rb +3 -0
- data/lib/embulk/file_input_plugin.rb +27 -0
- data/lib/embulk/file_output_plugin.rb +27 -0
- data/lib/embulk/filter_plugin.rb +28 -9
- data/lib/embulk/formatter_plugin.rb +105 -0
- data/lib/embulk/guess_csv.rb +10 -1
- data/lib/embulk/guess_plugin.rb +22 -27
- data/lib/embulk/input_plugin.rb +34 -20
- data/lib/embulk/java/bootstrap.rb +5 -0
- data/lib/embulk/java/imports.rb +7 -0
- data/lib/embulk/java_plugin.rb +84 -0
- data/lib/embulk/output_plugin.rb +35 -19
- data/lib/embulk/page.rb +1 -1
- data/lib/embulk/page_builder.rb +1 -1
- data/lib/embulk/parser_plugin.rb +76 -0
- data/lib/embulk/plugin.rb +130 -65
- data/lib/embulk/plugin_registry.rb +19 -8
- data/lib/embulk/schema.rb +4 -4
- data/lib/embulk/version.rb +1 -1
- data/settings.gradle +1 -0
- metadata +123 -90
- data/ChangeLog +0 -46
- data/embulk-cli/pom.xml +0 -94
- data/embulk-core/pom.xml +0 -148
- data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +0 -26
- data/embulk-standards/pom.xml +0 -68
- data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +0 -250
- data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +0 -43
- data/pom.xml +0 -541
@@ -0,0 +1,64 @@
|
|
1
|
+
module Embulk
|
2
|
+
|
3
|
+
class PackageData
|
4
|
+
if __FILE__ =~ /^classpath:/ || __FILE__.include?('!/')
|
5
|
+
# data is in embulk-core jar
|
6
|
+
resource_class = org.embulk.command.Runner.java_class
|
7
|
+
JAVA_RESOURCE = true
|
8
|
+
RESOURCE_URL = resource_class.resource("/embulk/data")
|
9
|
+
else
|
10
|
+
JAVA_RESOURCE = false
|
11
|
+
FILE_BASE_PATH = File.join(Embulk.home('lib'), 'embulk', 'data')
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(base_name, dest_dir, erb_binding=nil)
|
15
|
+
require 'fileutils'
|
16
|
+
@base_name = base_name
|
17
|
+
@dest_dir = dest_dir
|
18
|
+
@erb_binding = erb_binding
|
19
|
+
end
|
20
|
+
|
21
|
+
def path(src)
|
22
|
+
if JAVA_RESOURCE
|
23
|
+
"#{RESOURCE_URL}/#{@base_name}/#{src}"
|
24
|
+
else
|
25
|
+
File.join(FILE_BASE_PATH, @base_name, src)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def content(src)
|
30
|
+
File.read(path(src))
|
31
|
+
end
|
32
|
+
|
33
|
+
def erb(src)
|
34
|
+
require 'erb'
|
35
|
+
ERB.new(content(src), nil, '%').result(@erb_binding)
|
36
|
+
end
|
37
|
+
|
38
|
+
def cp(src, dest_name)
|
39
|
+
path = dest_path_message(dest_name)
|
40
|
+
FileUtils.cp path(src), path
|
41
|
+
end
|
42
|
+
|
43
|
+
def cp_erb(src, dest_name)
|
44
|
+
path = dest_path_message(dest_name)
|
45
|
+
File.open(path, "w") {|f| f.write erb(src) }
|
46
|
+
end
|
47
|
+
|
48
|
+
def dest_path(dest_name)
|
49
|
+
File.join(@dest_dir, *dest_name.split('/'))
|
50
|
+
end
|
51
|
+
|
52
|
+
def dest_path_message(dest_name)
|
53
|
+
path = dest_path(dest_name)
|
54
|
+
puts " Creating #{path}"
|
55
|
+
FileUtils.mkdir_p File.dirname(path)
|
56
|
+
path
|
57
|
+
end
|
58
|
+
|
59
|
+
def set_executable(dest_name)
|
60
|
+
File.chmod(0755, dest_path(dest_name))
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
data/lib/embulk/data_source.rb
CHANGED
@@ -39,7 +39,7 @@ module Embulk
|
|
39
39
|
end
|
40
40
|
|
41
41
|
if Embulk.java?
|
42
|
-
def self.
|
42
|
+
def self.from_java(java_data_source_impl)
|
43
43
|
json = java_data_source_impl.toString
|
44
44
|
new.merge!(JSON.parse(json))
|
45
45
|
end
|
@@ -48,7 +48,7 @@ module Embulk
|
|
48
48
|
new.merge!(hash)
|
49
49
|
end
|
50
50
|
|
51
|
-
def
|
51
|
+
def to_java
|
52
52
|
json = to_json
|
53
53
|
Java::Injected::ModelManager.readObject(Java::DataSourceImpl.java_class, json.to_java)
|
54
54
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Embulk
|
2
|
+
|
3
|
+
require 'embulk/data_source'
|
4
|
+
|
5
|
+
class DecoderPlugin
|
6
|
+
def self.transaction(config, &control)
|
7
|
+
raise NotImplementedError, "DecoderPlugin.transaction(config, &control) must be implemented"
|
8
|
+
end
|
9
|
+
|
10
|
+
# TODO
|
11
|
+
|
12
|
+
if Embulk.java?
|
13
|
+
# TODO new_java
|
14
|
+
|
15
|
+
def self.from_java(java_class)
|
16
|
+
JavaPlugin.ruby_adapter_class(java_class, DecoderPlugin, RubyAdapter)
|
17
|
+
end
|
18
|
+
|
19
|
+
module RubyAdapter
|
20
|
+
module ClassMethods
|
21
|
+
end
|
22
|
+
# TODO
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Embulk
|
2
|
+
|
3
|
+
require 'embulk/data_source'
|
4
|
+
|
5
|
+
class EncoderPlugin
|
6
|
+
def self.transaction(config, &control)
|
7
|
+
raise NotImplementedError, "EncoderPlugin.transaction(config, &control) must be implemented"
|
8
|
+
end
|
9
|
+
|
10
|
+
# TODO
|
11
|
+
|
12
|
+
if Embulk.java?
|
13
|
+
# TODO new_java
|
14
|
+
|
15
|
+
def self.from_java(java_class)
|
16
|
+
JavaPlugin.ruby_adapter_class(java_class, EncoderPlugin, RubyAdapter)
|
17
|
+
end
|
18
|
+
|
19
|
+
module RubyAdapter
|
20
|
+
module ClassMethods
|
21
|
+
end
|
22
|
+
# TODO
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
data/lib/embulk/error.rb
CHANGED
@@ -0,0 +1,27 @@
|
|
1
|
+
module Embulk
|
2
|
+
|
3
|
+
class FileInputPlugin
|
4
|
+
# TODO transaction, resume, cleanup
|
5
|
+
# TODO run
|
6
|
+
|
7
|
+
if Embulk.java?
|
8
|
+
# TODO to_java
|
9
|
+
|
10
|
+
def self.from_java(java_class)
|
11
|
+
JavaPlugin.ruby_adapter_class(java_class, FileInputPlugin, RubyAdapter)
|
12
|
+
end
|
13
|
+
|
14
|
+
module RubyAdapter
|
15
|
+
module ClassMethods
|
16
|
+
def new_java
|
17
|
+
Java::FileInputRunner.new(Java.injector.getInstance(java_class))
|
18
|
+
end
|
19
|
+
# TODO transaction, resume, cleanup
|
20
|
+
end
|
21
|
+
|
22
|
+
# TODO run
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Embulk
|
2
|
+
|
3
|
+
class FileOutputPlugin
|
4
|
+
# TODO transaction, resume, cleanup
|
5
|
+
# TODO add, finish, close, abort, commit
|
6
|
+
|
7
|
+
if Embulk.java?
|
8
|
+
# TODO to_java
|
9
|
+
|
10
|
+
def self.from_java(java_class)
|
11
|
+
JavaPlugin.ruby_adapter_class(java_class, FileOutputPlugin, RubyAdapter)
|
12
|
+
end
|
13
|
+
|
14
|
+
module RubyAdapter
|
15
|
+
module ClassMethods
|
16
|
+
def new_java
|
17
|
+
Java::FileOutputRunner.new(Java.injector.getInstance(java_class))
|
18
|
+
end
|
19
|
+
# TODO transaction, resume, cleanup
|
20
|
+
end
|
21
|
+
|
22
|
+
# TODO add, finish, close, abort, commit
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
data/lib/embulk/filter_plugin.rb
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
module Embulk
|
2
2
|
|
3
|
+
require 'embulk/data_source'
|
4
|
+
require 'embulk/schema'
|
5
|
+
require 'embulk/page'
|
6
|
+
require 'embulk/page_builder'
|
7
|
+
|
3
8
|
class FilterPlugin
|
4
9
|
def self.transaction(config, in_schema, &control)
|
5
10
|
yield(config)
|
@@ -11,10 +16,14 @@ module Embulk
|
|
11
16
|
@in_schema = in_schema
|
12
17
|
@out_schema = out_schema
|
13
18
|
@page_builder = page_builder
|
19
|
+
init
|
14
20
|
end
|
15
21
|
|
16
22
|
attr_reader :task, :in_schema, :out_schema, :page_builder
|
17
23
|
|
24
|
+
def init
|
25
|
+
end
|
26
|
+
|
18
27
|
def add(page)
|
19
28
|
raise NotImplementedError, "FilterPlugin#add(page) must be implemented"
|
20
29
|
end
|
@@ -26,7 +35,7 @@ module Embulk
|
|
26
35
|
end
|
27
36
|
|
28
37
|
if Embulk.java?
|
29
|
-
def self.
|
38
|
+
def self.new_java
|
30
39
|
JavaAdapter.new(self)
|
31
40
|
end
|
32
41
|
|
@@ -38,20 +47,20 @@ module Embulk
|
|
38
47
|
end
|
39
48
|
|
40
49
|
def transaction(java_config, java_in_schema, java_control)
|
41
|
-
config = DataSource.
|
42
|
-
in_schema = Schema.
|
50
|
+
config = DataSource.from_java(java_config)
|
51
|
+
in_schema = Schema.from_java(java_in_schema)
|
43
52
|
@ruby_class.transaction(config, in_schema) do |task_source_hash, out_columns|
|
44
|
-
java_task_source = DataSource.from_ruby_hash(task_source_hash).
|
45
|
-
java_out_schemas = Schema.new(out_columns).
|
53
|
+
java_task_source = DataSource.from_ruby_hash(task_source_hash).to_java
|
54
|
+
java_out_schemas = Schema.new(out_columns).to_java
|
46
55
|
java_control.run(java_task_source, java_out_schemas)
|
47
56
|
end
|
48
57
|
nil
|
49
58
|
end
|
50
59
|
|
51
60
|
def open(java_task_source, java_in_schema, java_out_schema, java_output)
|
52
|
-
task_source = DataSource.
|
53
|
-
in_schema = Schema.
|
54
|
-
out_schema = Schema.
|
61
|
+
task_source = DataSource.from_java(java_task_source)
|
62
|
+
in_schema = Schema.from_java(java_in_schema)
|
63
|
+
out_schema = Schema.from_java(java_out_schema)
|
55
64
|
page_builder = PageBuilder.new(out_schema, java_output)
|
56
65
|
ruby_object = @ruby_class.new(task_source, in_schema, out_schema, page_builder)
|
57
66
|
return OutputAdapter.new(ruby_object, in_schema, page_builder)
|
@@ -77,10 +86,20 @@ module Embulk
|
|
77
86
|
def close
|
78
87
|
@ruby_object.close
|
79
88
|
ensure
|
80
|
-
|
89
|
+
@page_builder.close
|
81
90
|
end
|
82
91
|
end
|
83
92
|
end
|
93
|
+
|
94
|
+
def self.from_java(java_class)
|
95
|
+
JavaPlugin.ruby_adapter_class(java_class, FilterPlugin, RubyAdapter)
|
96
|
+
end
|
97
|
+
|
98
|
+
module RubyAdapter
|
99
|
+
module ClassMethods
|
100
|
+
end
|
101
|
+
# TODO
|
102
|
+
end
|
84
103
|
end
|
85
104
|
end
|
86
105
|
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module Embulk
|
2
|
+
|
3
|
+
require 'embulk/data_source'
|
4
|
+
require 'embulk/schema'
|
5
|
+
require 'embulk/page'
|
6
|
+
#require 'embulk/file_output' TODO not implemented
|
7
|
+
|
8
|
+
class FormatterPlugin
|
9
|
+
def self.transaction(config, schema, &control)
|
10
|
+
yield(config)
|
11
|
+
return {}
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(task, schema, file_output)
|
15
|
+
@task = task
|
16
|
+
@schema = schema
|
17
|
+
@file_output
|
18
|
+
init
|
19
|
+
end
|
20
|
+
|
21
|
+
attr_reader :task, :schema, :file_output
|
22
|
+
|
23
|
+
def init
|
24
|
+
end
|
25
|
+
|
26
|
+
def add(page)
|
27
|
+
raise NotImplementedError, "FormatterPlugin#add(page) must be implemented"
|
28
|
+
end
|
29
|
+
|
30
|
+
def finish
|
31
|
+
end
|
32
|
+
|
33
|
+
def close
|
34
|
+
end
|
35
|
+
|
36
|
+
if Embulk.java?
|
37
|
+
def self.new_java
|
38
|
+
JavaAdapter.new(self)
|
39
|
+
end
|
40
|
+
|
41
|
+
class JavaAdapter
|
42
|
+
include Java::FormatterPlugin
|
43
|
+
|
44
|
+
def initialize(ruby_class)
|
45
|
+
@ruby_class = ruby_class
|
46
|
+
end
|
47
|
+
|
48
|
+
def transaction(java_config, java_schema, java_control)
|
49
|
+
config = DataSource.from_java(java_config)
|
50
|
+
schema = Schema.from_java(java_schema)
|
51
|
+
@ruby_class.transaction(config, schema) do |task_source_hash|
|
52
|
+
java_task_source = DataSource.from_ruby_hash(task_source_hash).to_java
|
53
|
+
java_control.run(java_task_source)
|
54
|
+
end
|
55
|
+
nil
|
56
|
+
end
|
57
|
+
|
58
|
+
def open(java_task_source, java_schema, java_file_output)
|
59
|
+
task_source = DataSource.from_java(java_task_source)
|
60
|
+
schema = Schema.from_java(java_schema)
|
61
|
+
file_output = FileOutput.from_java(java_file_output)
|
62
|
+
ruby_object = @ruby_class.new(task_source, schema, file_output)
|
63
|
+
return OutputAdapter.new(ruby_object, schema, file_output)
|
64
|
+
end
|
65
|
+
|
66
|
+
class OutputAdapter
|
67
|
+
include Java::TransactionalPageOutput
|
68
|
+
|
69
|
+
def initialize(ruby_object, schema, file_output)
|
70
|
+
@ruby_object = ruby_object
|
71
|
+
@schema = schema
|
72
|
+
@file_output = file_output
|
73
|
+
end
|
74
|
+
|
75
|
+
def add(java_page)
|
76
|
+
# TODO reuse page reader
|
77
|
+
@ruby_object.add Page.new(java_page, @schema)
|
78
|
+
end
|
79
|
+
|
80
|
+
def finish
|
81
|
+
@ruby_object.finish
|
82
|
+
end
|
83
|
+
|
84
|
+
def close
|
85
|
+
@ruby_object.close
|
86
|
+
ensure
|
87
|
+
@file_output.close
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def self.from_java(java_class)
|
93
|
+
JavaPlugin.ruby_adapter_class(java_class, FormatterPlugin, RubyAdapter)
|
94
|
+
end
|
95
|
+
|
96
|
+
module RubyAdapter
|
97
|
+
module ClassMethods
|
98
|
+
# TODO transaction, resume, cleanup
|
99
|
+
end
|
100
|
+
# TODO add, finish, close
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
end
|
data/lib/embulk/guess_csv.rb
CHANGED
@@ -12,6 +12,15 @@ module Embulk
|
|
12
12
|
"\"", "'"
|
13
13
|
]
|
14
14
|
|
15
|
+
# CsvParserPlugin.TRUE_STRINGS
|
16
|
+
TRUE_STRINGS = Hash[*%w[
|
17
|
+
true True TRUE
|
18
|
+
yes Yes YES
|
19
|
+
y Y
|
20
|
+
on On ON
|
21
|
+
1
|
22
|
+
].map {|k| [k, true] }]
|
23
|
+
|
15
24
|
def guess_lines(config, sample_lines)
|
16
25
|
delim = guess_delimiter(sample_lines)
|
17
26
|
unless delim
|
@@ -152,7 +161,7 @@ module Embulk
|
|
152
161
|
end
|
153
162
|
|
154
163
|
def guess_type(str)
|
155
|
-
if [
|
164
|
+
if TRUE_STRINGS[str]
|
156
165
|
return "boolean"
|
157
166
|
end
|
158
167
|
|
data/lib/embulk/guess_plugin.rb
CHANGED
@@ -6,43 +6,38 @@ module Embulk
|
|
6
6
|
end
|
7
7
|
|
8
8
|
if Embulk.java?
|
9
|
-
def self.
|
9
|
+
def self.new_java
|
10
10
|
JavaAdapter.new(new)
|
11
11
|
end
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
end
|
16
|
-
|
17
|
-
class RubyAdapter < Embulk::GuessPlugin
|
18
|
-
def initialized(java_guess)
|
19
|
-
@java_guess = java_guess
|
20
|
-
end
|
13
|
+
class JavaAdapter
|
14
|
+
include Java::GuessPlugin
|
21
15
|
|
22
|
-
def
|
23
|
-
|
24
|
-
java_sample = sample.java_object
|
25
|
-
java_next_config = @java_guess.guess(java_config, java_sample)
|
26
|
-
return DataSource.from_java_object(java_next_config)
|
16
|
+
def initialize(ruby_guess)
|
17
|
+
@ruby_guess = ruby_guess
|
27
18
|
end
|
28
19
|
|
29
|
-
def
|
30
|
-
|
20
|
+
def guess(java_config, java_sample)
|
21
|
+
config = DataSource.from_java(java_config)
|
22
|
+
sample = Buffer.from_java(java_sample)
|
23
|
+
config_diff_hash = @ruby_guess.guess(config, sample)
|
24
|
+
return DataSource.from_ruby_hash(config_diff_hash).to_java
|
31
25
|
end
|
32
26
|
end
|
33
27
|
|
34
|
-
|
35
|
-
|
28
|
+
def self.from_java(java_class)
|
29
|
+
JavaPlugin.ruby_adapter(java_class, GuessPlugin, RubyAdapter)
|
30
|
+
end
|
36
31
|
|
37
|
-
|
38
|
-
|
32
|
+
module RubyAdapter
|
33
|
+
module ClassMethods
|
39
34
|
end
|
40
35
|
|
41
|
-
def guess(
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
return DataSource.
|
36
|
+
def guess(config, sample)
|
37
|
+
java_config = config.to_java
|
38
|
+
java_sample = sample.to_java
|
39
|
+
java_config_diff = java_object.guess(java_config, java_sample)
|
40
|
+
return DataSource.from_java(java_config_diff)
|
46
41
|
end
|
47
42
|
end
|
48
43
|
end
|
@@ -60,7 +55,7 @@ module Embulk
|
|
60
55
|
return DataSource.new
|
61
56
|
end
|
62
57
|
|
63
|
-
decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.
|
58
|
+
decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), task)
|
64
59
|
sample_text = ''
|
65
60
|
while decoder.nextFile
|
66
61
|
first = true
|
@@ -94,7 +89,7 @@ module Embulk
|
|
94
89
|
return DataSource.new
|
95
90
|
end
|
96
91
|
|
97
|
-
decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.
|
92
|
+
decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), task)
|
98
93
|
sample_lines = []
|
99
94
|
while decoder.nextFile
|
100
95
|
while line = decoder.poll
|