embulk 0.4.4 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/build.gradle +1 -1
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.4.5.rst +24 -0
- data/lib/embulk/buffer.rb +5 -0
- data/lib/embulk/command/embulk_run.rb +12 -10
- data/lib/embulk/data/new/ruby/filter.rb.erb +2 -2
- data/lib/embulk/data/new/ruby/formatter.rb.erb +47 -0
- data/lib/embulk/data/new/ruby/input.rb.erb +3 -3
- data/lib/embulk/data/new/ruby/output.rb.erb +2 -2
- data/lib/embulk/data/new/ruby/parser.rb.erb +42 -0
- data/lib/embulk/file_input.rb +83 -0
- data/lib/embulk/file_output.rb +51 -0
- data/lib/embulk/formatter_plugin.rb +3 -3
- data/lib/embulk/parser_plugin.rb +2 -2
- data/lib/embulk/plugin.rb +6 -8
- data/lib/embulk/version.rb +1 -1
- metadata +9 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 57cffd859609745fafc0e644e0e8466c0511c622
|
4
|
+
data.tar.gz: 5df593df4e75dce977ac7ab295f9fe089ababa1a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2c613261f18c551e6eb1ea9f748148422054d0818a76d8367de8eb68a071445686e871ce304d1413b10eca8570d220b4d53dbd6f3b7f060b008ce3c500903fe9
|
7
|
+
data.tar.gz: 0e6c8086818d6f7d81ac8009ee5fbf76d0aa0a34fc962af60f3647d189ca488cd3c4aa09c8b49f62b3551fcd9b4ad42e2582b69f6a56f2c78f9f6e06f6986439
|
data/README.md
CHANGED
@@ -24,7 +24,7 @@ You can release plugins to share your efforts of data cleaning, error handling,
|
|
24
24
|
The single-file package is the simplest way to try Embulk. You can download the latest embulk-VERSION.jar from [the releases page](https://bintray.com/embulk/maven/embulk/view#files) and run it with java:
|
25
25
|
|
26
26
|
```
|
27
|
-
wget https://bintray.com/artifact/download/embulk/maven/embulk-0.4.
|
27
|
+
wget https://bintray.com/artifact/download/embulk/maven/embulk-0.4.5.jar -O embulk.jar
|
28
28
|
java -jar embulk.jar --help
|
29
29
|
```
|
30
30
|
|
@@ -40,10 +40,10 @@ java -jar embulk.jar run config.yml
|
|
40
40
|
### Using plugins
|
41
41
|
|
42
42
|
You can use plugins to load data from/to various systems and file formats.
|
43
|
-
An example is [embulk-
|
43
|
+
An example is [embulk-output-postgres-json](https://github.com/frsyuki/embulk-output-postgres-json) plugin. It outputs data into PostgreSQL server using "json" column type.
|
44
44
|
|
45
45
|
```
|
46
|
-
java -jar embulk.jar gem install embulk-
|
46
|
+
java -jar embulk.jar gem install embulk-output-postgres-json
|
47
47
|
java -jar embulk.jar gem list
|
48
48
|
```
|
49
49
|
|
data/build.gradle
CHANGED
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,24 @@
|
|
1
|
+
Release 0.4.5
|
2
|
+
==================================
|
3
|
+
|
4
|
+
CLI
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Fixed a problem where ``embulk gem install foo --version x.y.z`` shows embulk's version.
|
8
|
+
|
9
|
+
Plugin SPI
|
10
|
+
------------------
|
11
|
+
|
12
|
+
* Added ParserPlugin SPI for JRuby. You can write parser plugins in Ruby.
|
13
|
+
* Added FormatterPlugin SPI for JRuby. You can write formatter plugins in Ruby.
|
14
|
+
|
15
|
+
|
16
|
+
Plugin API Changes
|
17
|
+
------------------
|
18
|
+
|
19
|
+
* Added ``Embulk::FileInput`` API used mainly by parser plugins.
|
20
|
+
* Added ``Embulk::FileOutput`` API used mainly by formatter plugins.
|
21
|
+
|
22
|
+
Release Date
|
23
|
+
------------------
|
24
|
+
2015-02-19
|
data/lib/embulk/buffer.rb
CHANGED
@@ -19,18 +19,20 @@ module Embulk
|
|
19
19
|
|
20
20
|
require 'embulk/version'
|
21
21
|
|
22
|
-
if argv.include?('--version')
|
23
|
-
puts "embulk #{Embulk::VERSION}"
|
24
|
-
exit 0
|
25
|
-
end
|
26
|
-
|
27
22
|
i = argv.find_index {|arg| arg !~ /^\-/ }
|
28
|
-
|
23
|
+
unless i
|
24
|
+
if argv.include?('--version')
|
25
|
+
puts "embulk #{Embulk::VERSION}"
|
26
|
+
exit 0
|
27
|
+
end
|
28
|
+
usage nil
|
29
|
+
end
|
29
30
|
subcmd = argv.slice!(i)
|
30
31
|
|
31
32
|
require 'java'
|
32
33
|
require 'optparse'
|
33
34
|
op = OptionParser.new
|
35
|
+
op.version = Embulk::VERSION
|
34
36
|
|
35
37
|
puts "#{Time.now.strftime("%Y-%m-%d %H:%M:%S,%3N %z")}: Embulk v#{Embulk::VERSION}"
|
36
38
|
|
@@ -127,8 +129,8 @@ categories:
|
|
127
129
|
ruby-filter Ruby record filter plugin (like "add-hostname")
|
128
130
|
#ruby-file-input Ruby file input plugin (like "ftp") # not implemented yet [#21]
|
129
131
|
#ruby-file-output Ruby file output plugin (like "ftp") # not implemented yet [#22]
|
130
|
-
|
131
|
-
|
132
|
+
ruby-parser Ruby file parser plugin (like "csv") # not implemented yet [#33]
|
133
|
+
ruby-formatter Ruby file formatter plugin (like "csv") # not implemented yet [#34]
|
132
134
|
#ruby-decoder Ruby file decoder plugin (like "gzip") # not implemented yet [#31]
|
133
135
|
#ruby-encoder Ruby file encoder plugin (like "gzip") # not implemented yet [#32]
|
134
136
|
java-input Java record input plugin (like "mysql")
|
@@ -259,8 +261,8 @@ examples:
|
|
259
261
|
when "ruby-filter" then [:ruby, :filter]
|
260
262
|
when "ruby-file-input" then raise "ruby-file-input is not implemented yet. See #21 on github." #[:ruby, :file_input]
|
261
263
|
when "ruby-file-output" then raise "ruby-file-output is not implemented yet. See #22 on github." #[:ruby, :file_output]
|
262
|
-
when "ruby-parser" then
|
263
|
-
when "ruby-formatter" then
|
264
|
+
when "ruby-parser" then [:ruby, :parser]
|
265
|
+
when "ruby-formatter" then [:ruby, :formatter]
|
264
266
|
when "ruby-decoder" then raise "ruby-decoder is not implemented yet. See #31 on github." #[:ruby, :decoder]
|
265
267
|
when "ruby-encoder" then raise "ruby-decoder is not implemented yet. See #32 on github." #[:ruby, :encoder]
|
266
268
|
else
|
@@ -7,8 +7,8 @@ module Embulk
|
|
7
7
|
def self.transaction(config, in_schema, &control)
|
8
8
|
# configuration code:
|
9
9
|
task = {
|
10
|
-
"property1" => config.param("property1", :string)
|
11
|
-
"property2" => config.param("property2", :integer, default: 0)
|
10
|
+
"property1" => config.param("property1", :string),
|
11
|
+
"property2" => config.param("property2", :integer, default: 0),
|
12
12
|
}
|
13
13
|
|
14
14
|
yield(task, out_columns)
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Embulk
|
2
|
+
module Formatter
|
3
|
+
|
4
|
+
class <%= ruby_class_name %> < FormatterPlugin
|
5
|
+
Plugin.register_formatter(<%= name.dump %>, self)
|
6
|
+
|
7
|
+
def self.transaction(config, schema, &control)
|
8
|
+
# configuration code:
|
9
|
+
task = {
|
10
|
+
"property1" => config.param("property1", :string),
|
11
|
+
"property2" => config.param("property2", :integer, default: 0),
|
12
|
+
}
|
13
|
+
|
14
|
+
yield(task)
|
15
|
+
end
|
16
|
+
|
17
|
+
def init
|
18
|
+
# initialization code:
|
19
|
+
@property1 = task["property1"]
|
20
|
+
@property2 = task["property2"]
|
21
|
+
|
22
|
+
# your data
|
23
|
+
@current_file == nil
|
24
|
+
@current_file_size = 0
|
25
|
+
end
|
26
|
+
|
27
|
+
def close
|
28
|
+
end
|
29
|
+
|
30
|
+
def add(page)
|
31
|
+
# output code:
|
32
|
+
page.each do |record|
|
33
|
+
if @current_file == nil || @current_file_size > 32*1024
|
34
|
+
@current_file = @file_output.next_file
|
35
|
+
@current_file_size = 0
|
36
|
+
end
|
37
|
+
@current_file.write "|mydata|"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def finish
|
42
|
+
@file_output.finish
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
@@ -7,13 +7,13 @@ module Embulk
|
|
7
7
|
def self.transaction(config, &control)
|
8
8
|
# configuration code:
|
9
9
|
task = {
|
10
|
-
"property1" => config.param("property1", :string)
|
11
|
-
"property2" => config.param("property2", :integer, default: 0)
|
10
|
+
"property1" => config.param("property1", :string),
|
11
|
+
"property2" => config.param("property2", :integer, default: 0),
|
12
12
|
}
|
13
13
|
|
14
14
|
columns = [
|
15
15
|
Column.new(0, "example", :string),
|
16
|
-
Column.new(1, "column", :
|
16
|
+
Column.new(1, "column", :long),
|
17
17
|
Column.new(2, "name", :double),
|
18
18
|
]
|
19
19
|
|
@@ -7,8 +7,8 @@ module Embulk
|
|
7
7
|
def self.transaction(config, schema, count, &control)
|
8
8
|
# configuration code:
|
9
9
|
task = {
|
10
|
-
"property1" => config.param("property1", :string)
|
11
|
-
"property2" => config.param("property2", :integer, default: 0)
|
10
|
+
"property1" => config.param("property1", :string),
|
11
|
+
"property2" => config.param("property2", :integer, default: 0),
|
12
12
|
}
|
13
13
|
|
14
14
|
# resumable output:
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Embulk
|
2
|
+
module Parser
|
3
|
+
|
4
|
+
class <%= ruby_class_name %> < ParserPlugin
|
5
|
+
Plugin.register_parser(<%= name.dump %>, self)
|
6
|
+
|
7
|
+
def self.transaction(config, &control)
|
8
|
+
# configuration code:
|
9
|
+
task = {
|
10
|
+
"property1" => config.param("property1", :string),
|
11
|
+
"property2" => config.param("property2", :integer, default: 0),
|
12
|
+
}
|
13
|
+
|
14
|
+
columns = [
|
15
|
+
Column.new(0, "example", :string),
|
16
|
+
Column.new(1, "column", :long),
|
17
|
+
Column.new(2, "name", :double),
|
18
|
+
]
|
19
|
+
|
20
|
+
yield(task, columns)
|
21
|
+
end
|
22
|
+
|
23
|
+
def init
|
24
|
+
# initialization code:
|
25
|
+
@property1 = task["property1"]
|
26
|
+
@property2 = task["property2"]
|
27
|
+
end
|
28
|
+
|
29
|
+
def run(file_input)
|
30
|
+
while file = file_input.next_file
|
31
|
+
file.each do |buffer|
|
32
|
+
# parsering code
|
33
|
+
record = ["col1", 2, 3.0]
|
34
|
+
@page_builder.add(record)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
@page_builder.finish
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
|
2
|
+
module Embulk
|
3
|
+
require 'embulk/buffer'
|
4
|
+
|
5
|
+
class FileInput
|
6
|
+
def initialize(java_file_input)
|
7
|
+
@java_file_input = java_file_input
|
8
|
+
@buffer = nil
|
9
|
+
end
|
10
|
+
|
11
|
+
def next_file
|
12
|
+
if @java_file_input.nextFile
|
13
|
+
return self
|
14
|
+
else
|
15
|
+
return nil
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def each(&block)
|
20
|
+
if @buffer
|
21
|
+
yield @buffer
|
22
|
+
@buffer = nil
|
23
|
+
end
|
24
|
+
|
25
|
+
while java_buffer = @java_file_input.poll
|
26
|
+
buffer = Buffer.from_java(java_buffer)
|
27
|
+
java_buffer.release
|
28
|
+
yield buffer
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def read(count=nil, dest=nil)
|
33
|
+
if count == nil
|
34
|
+
@buffer ||= Buffer.new
|
35
|
+
while java_buffer = @java_file_input.poll
|
36
|
+
@buffer << Buffer.from_java(java_buffer)
|
37
|
+
java_buffer.release
|
38
|
+
end
|
39
|
+
|
40
|
+
return nil if @buffer.empty? && count != 0
|
41
|
+
|
42
|
+
if dest
|
43
|
+
dest.replace(@buffer)
|
44
|
+
else
|
45
|
+
dest = @buffer
|
46
|
+
end
|
47
|
+
@buffer = nil
|
48
|
+
|
49
|
+
else
|
50
|
+
@buffer ||= Buffer.new
|
51
|
+
until @buffer.size >= count
|
52
|
+
java_buffer = @java_file_input.poll
|
53
|
+
break unless java_buffer
|
54
|
+
@buffer << Buffer.from_java(java_buffer)
|
55
|
+
java_buffer.release
|
56
|
+
end
|
57
|
+
|
58
|
+
return nil if @buffer.empty? && count != 0
|
59
|
+
|
60
|
+
if @buffer.size <= count
|
61
|
+
if dest
|
62
|
+
dest.replace(@buffer)
|
63
|
+
else
|
64
|
+
dest = @buffer
|
65
|
+
end
|
66
|
+
@buffer = nil
|
67
|
+
else
|
68
|
+
data = @buffer.slice!(0, count)
|
69
|
+
if dest
|
70
|
+
dest.replace(data)
|
71
|
+
else
|
72
|
+
dest = data
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
return dest
|
77
|
+
end
|
78
|
+
|
79
|
+
def close
|
80
|
+
@java_file_input.close
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
|
2
|
+
module Embulk
|
3
|
+
require 'embulk/buffer'
|
4
|
+
|
5
|
+
class FileOutput
|
6
|
+
def initialize(java_file_output)
|
7
|
+
@java_file_output = java_file_output
|
8
|
+
@buffer = Buffer.new
|
9
|
+
@buffer.force_encoding('ASCII-8BIT')
|
10
|
+
@flush_size = 32*1024
|
11
|
+
end
|
12
|
+
|
13
|
+
def next_file
|
14
|
+
flush
|
15
|
+
@java_file_output.nextFile
|
16
|
+
self
|
17
|
+
end
|
18
|
+
|
19
|
+
def write(buffer)
|
20
|
+
buffer.force_encoding('ASCII-8BIT') # TODO this is destructively change buffer
|
21
|
+
@buffer << buffer
|
22
|
+
if @buffer.size > @flush_size
|
23
|
+
flush
|
24
|
+
end
|
25
|
+
nil
|
26
|
+
end
|
27
|
+
|
28
|
+
def add(buffer)
|
29
|
+
flush
|
30
|
+
@java_file_output.add(Buffer.from_ruby_string(buffer))
|
31
|
+
nil
|
32
|
+
end
|
33
|
+
|
34
|
+
def flush
|
35
|
+
unless @buffer.empty?
|
36
|
+
@java_file_output.add(@buffer.to_java)
|
37
|
+
end
|
38
|
+
nil
|
39
|
+
end
|
40
|
+
|
41
|
+
def finish
|
42
|
+
flush
|
43
|
+
@java_file_output.finish
|
44
|
+
end
|
45
|
+
|
46
|
+
def close
|
47
|
+
@java_file_output.finish
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
@@ -3,7 +3,7 @@ module Embulk
|
|
3
3
|
require 'embulk/data_source'
|
4
4
|
require 'embulk/schema'
|
5
5
|
require 'embulk/page'
|
6
|
-
|
6
|
+
require 'embulk/file_output'
|
7
7
|
|
8
8
|
class FormatterPlugin
|
9
9
|
def self.transaction(config, schema, &control)
|
@@ -14,7 +14,7 @@ module Embulk
|
|
14
14
|
def initialize(task, schema, file_output)
|
15
15
|
@task = task
|
16
16
|
@schema = schema
|
17
|
-
@file_output
|
17
|
+
@file_output = file_output
|
18
18
|
init
|
19
19
|
end
|
20
20
|
|
@@ -58,7 +58,7 @@ module Embulk
|
|
58
58
|
def open(java_task_source, java_schema, java_file_output)
|
59
59
|
task_source = DataSource.from_java(java_task_source)
|
60
60
|
schema = Schema.from_java(java_schema)
|
61
|
-
file_output = FileOutput.
|
61
|
+
file_output = FileOutput.new(java_file_output)
|
62
62
|
ruby_object = @ruby_class.new(task_source, schema, file_output)
|
63
63
|
return OutputAdapter.new(ruby_object, schema, file_output)
|
64
64
|
end
|
data/lib/embulk/parser_plugin.rb
CHANGED
@@ -3,7 +3,7 @@ module Embulk
|
|
3
3
|
require 'embulk/data_source'
|
4
4
|
require 'embulk/schema'
|
5
5
|
require 'embulk/page_builder'
|
6
|
-
|
6
|
+
require 'embulk/file_input'
|
7
7
|
|
8
8
|
class ParserPlugin
|
9
9
|
def self.transaction(config, &control)
|
@@ -49,7 +49,7 @@ module Embulk
|
|
49
49
|
def run(java_task_source, java_schema, java_file_input, java_output)
|
50
50
|
task_source = DataSource.from_java(java_task_source)
|
51
51
|
schema = Schema.from_java(java_schema)
|
52
|
-
file_input = FileInput.
|
52
|
+
file_input = FileInput.new(java_file_input)
|
53
53
|
page_builder = PageBuilder.new(schema, java_output)
|
54
54
|
begin
|
55
55
|
@ruby_class.new(task_source, schema, page_builder).run(file_input)
|
data/lib/embulk/plugin.rb
CHANGED
@@ -36,15 +36,13 @@ module Embulk
|
|
36
36
|
register_plugin(:filter, type, klass, FilterPlugin)
|
37
37
|
end
|
38
38
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
#end
|
39
|
+
def register_parser(type, klass)
|
40
|
+
register_plugin(:parser, type, klass, ParserPlugin)
|
41
|
+
end
|
43
42
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
#end
|
43
|
+
def register_formatter(type, klass)
|
44
|
+
register_plugin(:formatter, type, klass, FormatterPlugin)
|
45
|
+
end
|
48
46
|
|
49
47
|
## TODO DecoderPlugin JRuby API is not written by anyone yet
|
50
48
|
#def register_decoder(type, klass)
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -271,6 +271,7 @@ files:
|
|
271
271
|
- embulk-docs/src/release/release-0.4.2.rst
|
272
272
|
- embulk-docs/src/release/release-0.4.3.rst
|
273
273
|
- embulk-docs/src/release/release-0.4.4.rst
|
274
|
+
- embulk-docs/src/release/release-0.4.5.rst
|
274
275
|
- embulk-standards/build.gradle
|
275
276
|
- embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
|
276
277
|
- embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
|
@@ -327,15 +328,19 @@ files:
|
|
327
328
|
- lib/embulk/data/new/ruby/Gemfile
|
328
329
|
- lib/embulk/data/new/ruby/Rakefile
|
329
330
|
- lib/embulk/data/new/ruby/filter.rb.erb
|
331
|
+
- lib/embulk/data/new/ruby/formatter.rb.erb
|
330
332
|
- lib/embulk/data/new/ruby/gemspec.erb
|
331
333
|
- lib/embulk/data/new/ruby/input.rb.erb
|
332
334
|
- lib/embulk/data/new/ruby/output.rb.erb
|
335
|
+
- lib/embulk/data/new/ruby/parser.rb.erb
|
333
336
|
- lib/embulk/data/package_data.rb
|
334
337
|
- lib/embulk/data_source.rb
|
335
338
|
- lib/embulk/decoder_plugin.rb
|
336
339
|
- lib/embulk/encoder_plugin.rb
|
337
340
|
- lib/embulk/error.rb
|
341
|
+
- lib/embulk/file_input.rb
|
338
342
|
- lib/embulk/file_input_plugin.rb
|
343
|
+
- lib/embulk/file_output.rb
|
339
344
|
- lib/embulk/file_output_plugin.rb
|
340
345
|
- lib/embulk/filter_plugin.rb
|
341
346
|
- lib/embulk/formatter_plugin.rb
|
@@ -365,8 +370,8 @@ files:
|
|
365
370
|
- classpath/bval-jsr303-0.5.jar
|
366
371
|
- classpath/commons-beanutils-core-1.8.3.jar
|
367
372
|
- classpath/commons-lang3-3.1.jar
|
368
|
-
- classpath/embulk-core-0.4.
|
369
|
-
- classpath/embulk-standards-0.4.
|
373
|
+
- classpath/embulk-core-0.4.5.jar
|
374
|
+
- classpath/embulk-standards-0.4.5.jar
|
370
375
|
- classpath/guava-18.0.jar
|
371
376
|
- classpath/guice-3.0.jar
|
372
377
|
- classpath/guice-multibindings-3.0.jar
|