embulk 0.4.4 → 0.4.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/build.gradle +1 -1
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.4.5.rst +24 -0
- data/lib/embulk/buffer.rb +5 -0
- data/lib/embulk/command/embulk_run.rb +12 -10
- data/lib/embulk/data/new/ruby/filter.rb.erb +2 -2
- data/lib/embulk/data/new/ruby/formatter.rb.erb +47 -0
- data/lib/embulk/data/new/ruby/input.rb.erb +3 -3
- data/lib/embulk/data/new/ruby/output.rb.erb +2 -2
- data/lib/embulk/data/new/ruby/parser.rb.erb +42 -0
- data/lib/embulk/file_input.rb +83 -0
- data/lib/embulk/file_output.rb +51 -0
- data/lib/embulk/formatter_plugin.rb +3 -3
- data/lib/embulk/parser_plugin.rb +2 -2
- data/lib/embulk/plugin.rb +6 -8
- data/lib/embulk/version.rb +1 -1
- metadata +9 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 57cffd859609745fafc0e644e0e8466c0511c622
|
4
|
+
data.tar.gz: 5df593df4e75dce977ac7ab295f9fe089ababa1a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2c613261f18c551e6eb1ea9f748148422054d0818a76d8367de8eb68a071445686e871ce304d1413b10eca8570d220b4d53dbd6f3b7f060b008ce3c500903fe9
|
7
|
+
data.tar.gz: 0e6c8086818d6f7d81ac8009ee5fbf76d0aa0a34fc962af60f3647d189ca488cd3c4aa09c8b49f62b3551fcd9b4ad42e2582b69f6a56f2c78f9f6e06f6986439
|
data/README.md
CHANGED
@@ -24,7 +24,7 @@ You can release plugins to share your efforts of data cleaning, error handling,
|
|
24
24
|
The single-file package is the simplest way to try Embulk. You can download the latest embulk-VERSION.jar from [the releases page](https://bintray.com/embulk/maven/embulk/view#files) and run it with java:
|
25
25
|
|
26
26
|
```
|
27
|
-
wget https://bintray.com/artifact/download/embulk/maven/embulk-0.4.
|
27
|
+
wget https://bintray.com/artifact/download/embulk/maven/embulk-0.4.5.jar -O embulk.jar
|
28
28
|
java -jar embulk.jar --help
|
29
29
|
```
|
30
30
|
|
@@ -40,10 +40,10 @@ java -jar embulk.jar run config.yml
|
|
40
40
|
### Using plugins
|
41
41
|
|
42
42
|
You can use plugins to load data from/to various systems and file formats.
|
43
|
-
An example is [embulk-
|
43
|
+
An example is [embulk-output-postgres-json](https://github.com/frsyuki/embulk-output-postgres-json) plugin. It outputs data into PostgreSQL server using "json" column type.
|
44
44
|
|
45
45
|
```
|
46
|
-
java -jar embulk.jar gem install embulk-
|
46
|
+
java -jar embulk.jar gem install embulk-output-postgres-json
|
47
47
|
java -jar embulk.jar gem list
|
48
48
|
```
|
49
49
|
|
data/build.gradle
CHANGED
data/embulk-docs/src/release.rst
CHANGED
@@ -0,0 +1,24 @@
|
|
1
|
+
Release 0.4.5
|
2
|
+
==================================
|
3
|
+
|
4
|
+
CLI
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Fixed a problem where ``embulk gem install foo --version x.y.z`` shows embulk's version.
|
8
|
+
|
9
|
+
Plugin SPI
|
10
|
+
------------------
|
11
|
+
|
12
|
+
* Added ParserPlugin SPI for JRuby. You can write parser plugins in Ruby.
|
13
|
+
* Added FormatterPlugin SPI for JRuby. You can write formatter plugins in Ruby.
|
14
|
+
|
15
|
+
|
16
|
+
Plugin API Changes
|
17
|
+
------------------
|
18
|
+
|
19
|
+
* Added ``Embulk::FileInput`` API used mainly by parser plugins.
|
20
|
+
* Added ``Embulk::FileOutput`` API used mainly by formatter plugins.
|
21
|
+
|
22
|
+
Release Date
|
23
|
+
------------------
|
24
|
+
2015-02-19
|
data/lib/embulk/buffer.rb
CHANGED
@@ -19,18 +19,20 @@ module Embulk
|
|
19
19
|
|
20
20
|
require 'embulk/version'
|
21
21
|
|
22
|
-
if argv.include?('--version')
|
23
|
-
puts "embulk #{Embulk::VERSION}"
|
24
|
-
exit 0
|
25
|
-
end
|
26
|
-
|
27
22
|
i = argv.find_index {|arg| arg !~ /^\-/ }
|
28
|
-
|
23
|
+
unless i
|
24
|
+
if argv.include?('--version')
|
25
|
+
puts "embulk #{Embulk::VERSION}"
|
26
|
+
exit 0
|
27
|
+
end
|
28
|
+
usage nil
|
29
|
+
end
|
29
30
|
subcmd = argv.slice!(i)
|
30
31
|
|
31
32
|
require 'java'
|
32
33
|
require 'optparse'
|
33
34
|
op = OptionParser.new
|
35
|
+
op.version = Embulk::VERSION
|
34
36
|
|
35
37
|
puts "#{Time.now.strftime("%Y-%m-%d %H:%M:%S,%3N %z")}: Embulk v#{Embulk::VERSION}"
|
36
38
|
|
@@ -127,8 +129,8 @@ categories:
|
|
127
129
|
ruby-filter Ruby record filter plugin (like "add-hostname")
|
128
130
|
#ruby-file-input Ruby file input plugin (like "ftp") # not implemented yet [#21]
|
129
131
|
#ruby-file-output Ruby file output plugin (like "ftp") # not implemented yet [#22]
|
130
|
-
|
131
|
-
|
132
|
+
ruby-parser Ruby file parser plugin (like "csv") # not implemented yet [#33]
|
133
|
+
ruby-formatter Ruby file formatter plugin (like "csv") # not implemented yet [#34]
|
132
134
|
#ruby-decoder Ruby file decoder plugin (like "gzip") # not implemented yet [#31]
|
133
135
|
#ruby-encoder Ruby file encoder plugin (like "gzip") # not implemented yet [#32]
|
134
136
|
java-input Java record input plugin (like "mysql")
|
@@ -259,8 +261,8 @@ examples:
|
|
259
261
|
when "ruby-filter" then [:ruby, :filter]
|
260
262
|
when "ruby-file-input" then raise "ruby-file-input is not implemented yet. See #21 on github." #[:ruby, :file_input]
|
261
263
|
when "ruby-file-output" then raise "ruby-file-output is not implemented yet. See #22 on github." #[:ruby, :file_output]
|
262
|
-
when "ruby-parser" then
|
263
|
-
when "ruby-formatter" then
|
264
|
+
when "ruby-parser" then [:ruby, :parser]
|
265
|
+
when "ruby-formatter" then [:ruby, :formatter]
|
264
266
|
when "ruby-decoder" then raise "ruby-decoder is not implemented yet. See #31 on github." #[:ruby, :decoder]
|
265
267
|
when "ruby-encoder" then raise "ruby-decoder is not implemented yet. See #32 on github." #[:ruby, :encoder]
|
266
268
|
else
|
@@ -7,8 +7,8 @@ module Embulk
|
|
7
7
|
def self.transaction(config, in_schema, &control)
|
8
8
|
# configuration code:
|
9
9
|
task = {
|
10
|
-
"property1" => config.param("property1", :string)
|
11
|
-
"property2" => config.param("property2", :integer, default: 0)
|
10
|
+
"property1" => config.param("property1", :string),
|
11
|
+
"property2" => config.param("property2", :integer, default: 0),
|
12
12
|
}
|
13
13
|
|
14
14
|
yield(task, out_columns)
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Embulk
|
2
|
+
module Formatter
|
3
|
+
|
4
|
+
class <%= ruby_class_name %> < FormatterPlugin
|
5
|
+
Plugin.register_formatter(<%= name.dump %>, self)
|
6
|
+
|
7
|
+
def self.transaction(config, schema, &control)
|
8
|
+
# configuration code:
|
9
|
+
task = {
|
10
|
+
"property1" => config.param("property1", :string),
|
11
|
+
"property2" => config.param("property2", :integer, default: 0),
|
12
|
+
}
|
13
|
+
|
14
|
+
yield(task)
|
15
|
+
end
|
16
|
+
|
17
|
+
def init
|
18
|
+
# initialization code:
|
19
|
+
@property1 = task["property1"]
|
20
|
+
@property2 = task["property2"]
|
21
|
+
|
22
|
+
# your data
|
23
|
+
@current_file == nil
|
24
|
+
@current_file_size = 0
|
25
|
+
end
|
26
|
+
|
27
|
+
def close
|
28
|
+
end
|
29
|
+
|
30
|
+
def add(page)
|
31
|
+
# output code:
|
32
|
+
page.each do |record|
|
33
|
+
if @current_file == nil || @current_file_size > 32*1024
|
34
|
+
@current_file = @file_output.next_file
|
35
|
+
@current_file_size = 0
|
36
|
+
end
|
37
|
+
@current_file.write "|mydata|"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def finish
|
42
|
+
@file_output.finish
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
@@ -7,13 +7,13 @@ module Embulk
|
|
7
7
|
def self.transaction(config, &control)
|
8
8
|
# configuration code:
|
9
9
|
task = {
|
10
|
-
"property1" => config.param("property1", :string)
|
11
|
-
"property2" => config.param("property2", :integer, default: 0)
|
10
|
+
"property1" => config.param("property1", :string),
|
11
|
+
"property2" => config.param("property2", :integer, default: 0),
|
12
12
|
}
|
13
13
|
|
14
14
|
columns = [
|
15
15
|
Column.new(0, "example", :string),
|
16
|
-
Column.new(1, "column", :
|
16
|
+
Column.new(1, "column", :long),
|
17
17
|
Column.new(2, "name", :double),
|
18
18
|
]
|
19
19
|
|
@@ -7,8 +7,8 @@ module Embulk
|
|
7
7
|
def self.transaction(config, schema, count, &control)
|
8
8
|
# configuration code:
|
9
9
|
task = {
|
10
|
-
"property1" => config.param("property1", :string)
|
11
|
-
"property2" => config.param("property2", :integer, default: 0)
|
10
|
+
"property1" => config.param("property1", :string),
|
11
|
+
"property2" => config.param("property2", :integer, default: 0),
|
12
12
|
}
|
13
13
|
|
14
14
|
# resumable output:
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Embulk
|
2
|
+
module Parser
|
3
|
+
|
4
|
+
class <%= ruby_class_name %> < ParserPlugin
|
5
|
+
Plugin.register_parser(<%= name.dump %>, self)
|
6
|
+
|
7
|
+
def self.transaction(config, &control)
|
8
|
+
# configuration code:
|
9
|
+
task = {
|
10
|
+
"property1" => config.param("property1", :string),
|
11
|
+
"property2" => config.param("property2", :integer, default: 0),
|
12
|
+
}
|
13
|
+
|
14
|
+
columns = [
|
15
|
+
Column.new(0, "example", :string),
|
16
|
+
Column.new(1, "column", :long),
|
17
|
+
Column.new(2, "name", :double),
|
18
|
+
]
|
19
|
+
|
20
|
+
yield(task, columns)
|
21
|
+
end
|
22
|
+
|
23
|
+
def init
|
24
|
+
# initialization code:
|
25
|
+
@property1 = task["property1"]
|
26
|
+
@property2 = task["property2"]
|
27
|
+
end
|
28
|
+
|
29
|
+
def run(file_input)
|
30
|
+
while file = file_input.next_file
|
31
|
+
file.each do |buffer|
|
32
|
+
# parsering code
|
33
|
+
record = ["col1", 2, 3.0]
|
34
|
+
@page_builder.add(record)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
@page_builder.finish
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
|
2
|
+
module Embulk
|
3
|
+
require 'embulk/buffer'
|
4
|
+
|
5
|
+
class FileInput
|
6
|
+
def initialize(java_file_input)
|
7
|
+
@java_file_input = java_file_input
|
8
|
+
@buffer = nil
|
9
|
+
end
|
10
|
+
|
11
|
+
def next_file
|
12
|
+
if @java_file_input.nextFile
|
13
|
+
return self
|
14
|
+
else
|
15
|
+
return nil
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def each(&block)
|
20
|
+
if @buffer
|
21
|
+
yield @buffer
|
22
|
+
@buffer = nil
|
23
|
+
end
|
24
|
+
|
25
|
+
while java_buffer = @java_file_input.poll
|
26
|
+
buffer = Buffer.from_java(java_buffer)
|
27
|
+
java_buffer.release
|
28
|
+
yield buffer
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def read(count=nil, dest=nil)
|
33
|
+
if count == nil
|
34
|
+
@buffer ||= Buffer.new
|
35
|
+
while java_buffer = @java_file_input.poll
|
36
|
+
@buffer << Buffer.from_java(java_buffer)
|
37
|
+
java_buffer.release
|
38
|
+
end
|
39
|
+
|
40
|
+
return nil if @buffer.empty? && count != 0
|
41
|
+
|
42
|
+
if dest
|
43
|
+
dest.replace(@buffer)
|
44
|
+
else
|
45
|
+
dest = @buffer
|
46
|
+
end
|
47
|
+
@buffer = nil
|
48
|
+
|
49
|
+
else
|
50
|
+
@buffer ||= Buffer.new
|
51
|
+
until @buffer.size >= count
|
52
|
+
java_buffer = @java_file_input.poll
|
53
|
+
break unless java_buffer
|
54
|
+
@buffer << Buffer.from_java(java_buffer)
|
55
|
+
java_buffer.release
|
56
|
+
end
|
57
|
+
|
58
|
+
return nil if @buffer.empty? && count != 0
|
59
|
+
|
60
|
+
if @buffer.size <= count
|
61
|
+
if dest
|
62
|
+
dest.replace(@buffer)
|
63
|
+
else
|
64
|
+
dest = @buffer
|
65
|
+
end
|
66
|
+
@buffer = nil
|
67
|
+
else
|
68
|
+
data = @buffer.slice!(0, count)
|
69
|
+
if dest
|
70
|
+
dest.replace(data)
|
71
|
+
else
|
72
|
+
dest = data
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
return dest
|
77
|
+
end
|
78
|
+
|
79
|
+
def close
|
80
|
+
@java_file_input.close
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
|
2
|
+
module Embulk
|
3
|
+
require 'embulk/buffer'
|
4
|
+
|
5
|
+
class FileOutput
|
6
|
+
def initialize(java_file_output)
|
7
|
+
@java_file_output = java_file_output
|
8
|
+
@buffer = Buffer.new
|
9
|
+
@buffer.force_encoding('ASCII-8BIT')
|
10
|
+
@flush_size = 32*1024
|
11
|
+
end
|
12
|
+
|
13
|
+
def next_file
|
14
|
+
flush
|
15
|
+
@java_file_output.nextFile
|
16
|
+
self
|
17
|
+
end
|
18
|
+
|
19
|
+
def write(buffer)
|
20
|
+
buffer.force_encoding('ASCII-8BIT') # TODO this is destructively change buffer
|
21
|
+
@buffer << buffer
|
22
|
+
if @buffer.size > @flush_size
|
23
|
+
flush
|
24
|
+
end
|
25
|
+
nil
|
26
|
+
end
|
27
|
+
|
28
|
+
def add(buffer)
|
29
|
+
flush
|
30
|
+
@java_file_output.add(Buffer.from_ruby_string(buffer))
|
31
|
+
nil
|
32
|
+
end
|
33
|
+
|
34
|
+
def flush
|
35
|
+
unless @buffer.empty?
|
36
|
+
@java_file_output.add(@buffer.to_java)
|
37
|
+
end
|
38
|
+
nil
|
39
|
+
end
|
40
|
+
|
41
|
+
def finish
|
42
|
+
flush
|
43
|
+
@java_file_output.finish
|
44
|
+
end
|
45
|
+
|
46
|
+
def close
|
47
|
+
@java_file_output.finish
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
@@ -3,7 +3,7 @@ module Embulk
|
|
3
3
|
require 'embulk/data_source'
|
4
4
|
require 'embulk/schema'
|
5
5
|
require 'embulk/page'
|
6
|
-
|
6
|
+
require 'embulk/file_output'
|
7
7
|
|
8
8
|
class FormatterPlugin
|
9
9
|
def self.transaction(config, schema, &control)
|
@@ -14,7 +14,7 @@ module Embulk
|
|
14
14
|
def initialize(task, schema, file_output)
|
15
15
|
@task = task
|
16
16
|
@schema = schema
|
17
|
-
@file_output
|
17
|
+
@file_output = file_output
|
18
18
|
init
|
19
19
|
end
|
20
20
|
|
@@ -58,7 +58,7 @@ module Embulk
|
|
58
58
|
def open(java_task_source, java_schema, java_file_output)
|
59
59
|
task_source = DataSource.from_java(java_task_source)
|
60
60
|
schema = Schema.from_java(java_schema)
|
61
|
-
file_output = FileOutput.
|
61
|
+
file_output = FileOutput.new(java_file_output)
|
62
62
|
ruby_object = @ruby_class.new(task_source, schema, file_output)
|
63
63
|
return OutputAdapter.new(ruby_object, schema, file_output)
|
64
64
|
end
|
data/lib/embulk/parser_plugin.rb
CHANGED
@@ -3,7 +3,7 @@ module Embulk
|
|
3
3
|
require 'embulk/data_source'
|
4
4
|
require 'embulk/schema'
|
5
5
|
require 'embulk/page_builder'
|
6
|
-
|
6
|
+
require 'embulk/file_input'
|
7
7
|
|
8
8
|
class ParserPlugin
|
9
9
|
def self.transaction(config, &control)
|
@@ -49,7 +49,7 @@ module Embulk
|
|
49
49
|
def run(java_task_source, java_schema, java_file_input, java_output)
|
50
50
|
task_source = DataSource.from_java(java_task_source)
|
51
51
|
schema = Schema.from_java(java_schema)
|
52
|
-
file_input = FileInput.
|
52
|
+
file_input = FileInput.new(java_file_input)
|
53
53
|
page_builder = PageBuilder.new(schema, java_output)
|
54
54
|
begin
|
55
55
|
@ruby_class.new(task_source, schema, page_builder).run(file_input)
|
data/lib/embulk/plugin.rb
CHANGED
@@ -36,15 +36,13 @@ module Embulk
|
|
36
36
|
register_plugin(:filter, type, klass, FilterPlugin)
|
37
37
|
end
|
38
38
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
#end
|
39
|
+
def register_parser(type, klass)
|
40
|
+
register_plugin(:parser, type, klass, ParserPlugin)
|
41
|
+
end
|
43
42
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
#end
|
43
|
+
def register_formatter(type, klass)
|
44
|
+
register_plugin(:formatter, type, klass, FormatterPlugin)
|
45
|
+
end
|
48
46
|
|
49
47
|
## TODO DecoderPlugin JRuby API is not written by anyone yet
|
50
48
|
#def register_decoder(type, klass)
|
data/lib/embulk/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -271,6 +271,7 @@ files:
|
|
271
271
|
- embulk-docs/src/release/release-0.4.2.rst
|
272
272
|
- embulk-docs/src/release/release-0.4.3.rst
|
273
273
|
- embulk-docs/src/release/release-0.4.4.rst
|
274
|
+
- embulk-docs/src/release/release-0.4.5.rst
|
274
275
|
- embulk-standards/build.gradle
|
275
276
|
- embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
|
276
277
|
- embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
|
@@ -327,15 +328,19 @@ files:
|
|
327
328
|
- lib/embulk/data/new/ruby/Gemfile
|
328
329
|
- lib/embulk/data/new/ruby/Rakefile
|
329
330
|
- lib/embulk/data/new/ruby/filter.rb.erb
|
331
|
+
- lib/embulk/data/new/ruby/formatter.rb.erb
|
330
332
|
- lib/embulk/data/new/ruby/gemspec.erb
|
331
333
|
- lib/embulk/data/new/ruby/input.rb.erb
|
332
334
|
- lib/embulk/data/new/ruby/output.rb.erb
|
335
|
+
- lib/embulk/data/new/ruby/parser.rb.erb
|
333
336
|
- lib/embulk/data/package_data.rb
|
334
337
|
- lib/embulk/data_source.rb
|
335
338
|
- lib/embulk/decoder_plugin.rb
|
336
339
|
- lib/embulk/encoder_plugin.rb
|
337
340
|
- lib/embulk/error.rb
|
341
|
+
- lib/embulk/file_input.rb
|
338
342
|
- lib/embulk/file_input_plugin.rb
|
343
|
+
- lib/embulk/file_output.rb
|
339
344
|
- lib/embulk/file_output_plugin.rb
|
340
345
|
- lib/embulk/filter_plugin.rb
|
341
346
|
- lib/embulk/formatter_plugin.rb
|
@@ -365,8 +370,8 @@ files:
|
|
365
370
|
- classpath/bval-jsr303-0.5.jar
|
366
371
|
- classpath/commons-beanutils-core-1.8.3.jar
|
367
372
|
- classpath/commons-lang3-3.1.jar
|
368
|
-
- classpath/embulk-core-0.4.
|
369
|
-
- classpath/embulk-standards-0.4.
|
373
|
+
- classpath/embulk-core-0.4.5.jar
|
374
|
+
- classpath/embulk-standards-0.4.5.jar
|
370
375
|
- classpath/guava-18.0.jar
|
371
376
|
- classpath/guice-3.0.jar
|
372
377
|
- classpath/guice-multibindings-3.0.jar
|