embulk 0.4.4 → 0.4.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 157c521d28f82705b9162cca7ca03e052d5e478f
4
- data.tar.gz: 5063ed2e3544fede216449ea7cebc11c427faf9c
3
+ metadata.gz: 57cffd859609745fafc0e644e0e8466c0511c622
4
+ data.tar.gz: 5df593df4e75dce977ac7ab295f9fe089ababa1a
5
5
  SHA512:
6
- metadata.gz: 694c3da663fbb60de1ac1529e9d17c561785bc0637713d3d6d325e11fdd69204e200bb359d2592b177cf5a05fb743b53d80083dfb9216807e3f1cc65a54de47b
7
- data.tar.gz: 3b4dcaf0704cce4a6bf6af89f8e1929412b7d85c54b59d98da84efdc24ef2d55dc28f4e347fe8b15f7f887c49c77a70a405ea3777723c5b2aed7f0c0a89fb1d2
6
+ metadata.gz: 2c613261f18c551e6eb1ea9f748148422054d0818a76d8367de8eb68a071445686e871ce304d1413b10eca8570d220b4d53dbd6f3b7f060b008ce3c500903fe9
7
+ data.tar.gz: 0e6c8086818d6f7d81ac8009ee5fbf76d0aa0a34fc962af60f3647d189ca488cd3c4aa09c8b49f62b3551fcd9b4ad42e2582b69f6a56f2c78f9f6e06f6986439
data/README.md CHANGED
@@ -24,7 +24,7 @@ You can release plugins to share your efforts of data cleaning, error handling,
24
24
  The single-file package is the simplest way to try Embulk. You can download the latest embulk-VERSION.jar from [the releases page](https://bintray.com/embulk/maven/embulk/view#files) and run it with java:
25
25
 
26
26
  ```
27
- wget https://bintray.com/artifact/download/embulk/maven/embulk-0.4.4.jar -O embulk.jar
27
+ wget https://bintray.com/artifact/download/embulk/maven/embulk-0.4.5.jar -O embulk.jar
28
28
  java -jar embulk.jar --help
29
29
  ```
30
30
 
@@ -40,10 +40,10 @@ java -jar embulk.jar run config.yml
40
40
  ### Using plugins
41
41
 
42
42
  You can use plugins to load data from/to various systems and file formats.
43
- An example is [embulk-plugin-postgres-json](https://github.com/frsyuki/embulk-plugin-postgres-json) plugin. It outputs data into PostgreSQL server using "json" column type.
43
+ An example is [embulk-output-postgres-json](https://github.com/frsyuki/embulk-output-postgres-json) plugin. It outputs data into PostgreSQL server using "json" column type.
44
44
 
45
45
  ```
46
- java -jar embulk.jar gem install embulk-plugin-postgres-json
46
+ java -jar embulk.jar gem install embulk-output-postgres-json
47
47
  java -jar embulk.jar gem list
48
48
  ```
49
49
 
@@ -11,7 +11,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
11
11
 
12
12
  allprojects {
13
13
  group = 'org.embulk'
14
- version = '0.4.4'
14
+ version = '0.4.5'
15
15
 
16
16
  apply plugin: 'java'
17
17
  apply plugin: 'maven-publish'
@@ -15,4 +15,5 @@ Release Notes
15
15
  release/release-0.4.2
16
16
  release/release-0.4.3
17
17
  release/release-0.4.4
18
+ release/release-0.4.5
18
19
 
@@ -0,0 +1,24 @@
1
+ Release 0.4.5
2
+ ==================================
3
+
4
+ CLI
5
+ ------------------
6
+
7
+ * Fixed a problem where ``embulk gem install foo --version x.y.z`` shows embulk's version.
8
+
9
+ Plugin SPI
10
+ ------------------
11
+
12
+ * Added ParserPlugin SPI for JRuby. You can write parser plugins in Ruby.
13
+ * Added FormatterPlugin SPI for JRuby. You can write formatter plugins in Ruby.
14
+
15
+
16
+ Plugin API Changes
17
+ ------------------
18
+
19
+ * Added ``Embulk::FileInput`` API used mainly by parser plugins.
20
+ * Added ``Embulk::FileOutput`` API used mainly by formatter plugins.
21
+
22
+ Release Date
23
+ ------------------
24
+ 2015-02-19
@@ -9,6 +9,11 @@ module Embulk
9
9
  buffer
10
10
  end
11
11
 
12
+ def self.from_ruby_string(string)
13
+ b = Buffer.new(string)
14
+ b.force_encoding('ASCII-8BIT')
15
+ end
16
+
12
17
  def to_java
13
18
  Java::Buffer.wrap(to_java_bytes)
14
19
  end
@@ -19,18 +19,20 @@ module Embulk
19
19
 
20
20
  require 'embulk/version'
21
21
 
22
- if argv.include?('--version')
23
- puts "embulk #{Embulk::VERSION}"
24
- exit 0
25
- end
26
-
27
22
  i = argv.find_index {|arg| arg !~ /^\-/ }
28
- usage nil unless i
23
+ unless i
24
+ if argv.include?('--version')
25
+ puts "embulk #{Embulk::VERSION}"
26
+ exit 0
27
+ end
28
+ usage nil
29
+ end
29
30
  subcmd = argv.slice!(i)
30
31
 
31
32
  require 'java'
32
33
  require 'optparse'
33
34
  op = OptionParser.new
35
+ op.version = Embulk::VERSION
34
36
 
35
37
  puts "#{Time.now.strftime("%Y-%m-%d %H:%M:%S,%3N %z")}: Embulk v#{Embulk::VERSION}"
36
38
 
@@ -127,8 +129,8 @@ categories:
127
129
  ruby-filter Ruby record filter plugin (like "add-hostname")
128
130
  #ruby-file-input Ruby file input plugin (like "ftp") # not implemented yet [#21]
129
131
  #ruby-file-output Ruby file output plugin (like "ftp") # not implemented yet [#22]
130
- #ruby-parser Ruby file parser plugin (like "csv") # not implemented yet [#33]
131
- #ruby-formatter Ruby file formatter plugin (like "csv") # not implemented yet [#34]
132
+ ruby-parser Ruby file parser plugin (like "csv") # not implemented yet [#33]
133
+ ruby-formatter Ruby file formatter plugin (like "csv") # not implemented yet [#34]
132
134
  #ruby-decoder Ruby file decoder plugin (like "gzip") # not implemented yet [#31]
133
135
  #ruby-encoder Ruby file encoder plugin (like "gzip") # not implemented yet [#32]
134
136
  java-input Java record input plugin (like "mysql")
@@ -259,8 +261,8 @@ examples:
259
261
  when "ruby-filter" then [:ruby, :filter]
260
262
  when "ruby-file-input" then raise "ruby-file-input is not implemented yet. See #21 on github." #[:ruby, :file_input]
261
263
  when "ruby-file-output" then raise "ruby-file-output is not implemented yet. See #22 on github." #[:ruby, :file_output]
262
- when "ruby-parser" then raise "ruby-parser is not implemented yet. See #33 on github." #[:ruby, :parser]
263
- when "ruby-formatter" then raise "ruby-formatter is not implemented yet. See #34 on github." #[:ruby, :formatter]
264
+ when "ruby-parser" then [:ruby, :parser]
265
+ when "ruby-formatter" then [:ruby, :formatter]
264
266
  when "ruby-decoder" then raise "ruby-decoder is not implemented yet. See #31 on github." #[:ruby, :decoder]
265
267
  when "ruby-encoder" then raise "ruby-decoder is not implemented yet. See #32 on github." #[:ruby, :encoder]
266
268
  else
@@ -7,8 +7,8 @@ module Embulk
7
7
  def self.transaction(config, in_schema, &control)
8
8
  # configuration code:
9
9
  task = {
10
- "property1" => config.param("property1", :string)
11
- "property2" => config.param("property2", :integer, default: 0)
10
+ "property1" => config.param("property1", :string),
11
+ "property2" => config.param("property2", :integer, default: 0),
12
12
  }
13
13
 
14
14
  yield(task, out_columns)
@@ -0,0 +1,47 @@
1
+ module Embulk
2
+ module Formatter
3
+
4
+ class <%= ruby_class_name %> < FormatterPlugin
5
+ Plugin.register_formatter(<%= name.dump %>, self)
6
+
7
+ def self.transaction(config, schema, &control)
8
+ # configuration code:
9
+ task = {
10
+ "property1" => config.param("property1", :string),
11
+ "property2" => config.param("property2", :integer, default: 0),
12
+ }
13
+
14
+ yield(task)
15
+ end
16
+
17
+ def init
18
+ # initialization code:
19
+ @property1 = task["property1"]
20
+ @property2 = task["property2"]
21
+
22
+ # your data
23
+ @current_file == nil
24
+ @current_file_size = 0
25
+ end
26
+
27
+ def close
28
+ end
29
+
30
+ def add(page)
31
+ # output code:
32
+ page.each do |record|
33
+ if @current_file == nil || @current_file_size > 32*1024
34
+ @current_file = @file_output.next_file
35
+ @current_file_size = 0
36
+ end
37
+ @current_file.write "|mydata|"
38
+ end
39
+ end
40
+
41
+ def finish
42
+ @file_output.finish
43
+ end
44
+ end
45
+
46
+ end
47
+ end
@@ -7,13 +7,13 @@ module Embulk
7
7
  def self.transaction(config, &control)
8
8
  # configuration code:
9
9
  task = {
10
- "property1" => config.param("property1", :string)
11
- "property2" => config.param("property2", :integer, default: 0)
10
+ "property1" => config.param("property1", :string),
11
+ "property2" => config.param("property2", :integer, default: 0),
12
12
  }
13
13
 
14
14
  columns = [
15
15
  Column.new(0, "example", :string),
16
- Column.new(1, "column", :integer),
16
+ Column.new(1, "column", :long),
17
17
  Column.new(2, "name", :double),
18
18
  ]
19
19
 
@@ -7,8 +7,8 @@ module Embulk
7
7
  def self.transaction(config, schema, count, &control)
8
8
  # configuration code:
9
9
  task = {
10
- "property1" => config.param("property1", :string)
11
- "property2" => config.param("property2", :integer, default: 0)
10
+ "property1" => config.param("property1", :string),
11
+ "property2" => config.param("property2", :integer, default: 0),
12
12
  }
13
13
 
14
14
  # resumable output:
@@ -0,0 +1,42 @@
1
+ module Embulk
2
+ module Parser
3
+
4
+ class <%= ruby_class_name %> < ParserPlugin
5
+ Plugin.register_parser(<%= name.dump %>, self)
6
+
7
+ def self.transaction(config, &control)
8
+ # configuration code:
9
+ task = {
10
+ "property1" => config.param("property1", :string),
11
+ "property2" => config.param("property2", :integer, default: 0),
12
+ }
13
+
14
+ columns = [
15
+ Column.new(0, "example", :string),
16
+ Column.new(1, "column", :long),
17
+ Column.new(2, "name", :double),
18
+ ]
19
+
20
+ yield(task, columns)
21
+ end
22
+
23
+ def init
24
+ # initialization code:
25
+ @property1 = task["property1"]
26
+ @property2 = task["property2"]
27
+ end
28
+
29
+ def run(file_input)
30
+ while file = file_input.next_file
31
+ file.each do |buffer|
32
+ # parsering code
33
+ record = ["col1", 2, 3.0]
34
+ @page_builder.add(record)
35
+ end
36
+ end
37
+ @page_builder.finish
38
+ end
39
+ end
40
+
41
+ end
42
+ end
@@ -0,0 +1,83 @@
1
+
2
+ module Embulk
3
+ require 'embulk/buffer'
4
+
5
+ class FileInput
6
+ def initialize(java_file_input)
7
+ @java_file_input = java_file_input
8
+ @buffer = nil
9
+ end
10
+
11
+ def next_file
12
+ if @java_file_input.nextFile
13
+ return self
14
+ else
15
+ return nil
16
+ end
17
+ end
18
+
19
+ def each(&block)
20
+ if @buffer
21
+ yield @buffer
22
+ @buffer = nil
23
+ end
24
+
25
+ while java_buffer = @java_file_input.poll
26
+ buffer = Buffer.from_java(java_buffer)
27
+ java_buffer.release
28
+ yield buffer
29
+ end
30
+ end
31
+
32
+ def read(count=nil, dest=nil)
33
+ if count == nil
34
+ @buffer ||= Buffer.new
35
+ while java_buffer = @java_file_input.poll
36
+ @buffer << Buffer.from_java(java_buffer)
37
+ java_buffer.release
38
+ end
39
+
40
+ return nil if @buffer.empty? && count != 0
41
+
42
+ if dest
43
+ dest.replace(@buffer)
44
+ else
45
+ dest = @buffer
46
+ end
47
+ @buffer = nil
48
+
49
+ else
50
+ @buffer ||= Buffer.new
51
+ until @buffer.size >= count
52
+ java_buffer = @java_file_input.poll
53
+ break unless java_buffer
54
+ @buffer << Buffer.from_java(java_buffer)
55
+ java_buffer.release
56
+ end
57
+
58
+ return nil if @buffer.empty? && count != 0
59
+
60
+ if @buffer.size <= count
61
+ if dest
62
+ dest.replace(@buffer)
63
+ else
64
+ dest = @buffer
65
+ end
66
+ @buffer = nil
67
+ else
68
+ data = @buffer.slice!(0, count)
69
+ if dest
70
+ dest.replace(data)
71
+ else
72
+ dest = data
73
+ end
74
+ end
75
+ end
76
+ return dest
77
+ end
78
+
79
+ def close
80
+ @java_file_input.close
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,51 @@
1
+
2
+ module Embulk
3
+ require 'embulk/buffer'
4
+
5
+ class FileOutput
6
+ def initialize(java_file_output)
7
+ @java_file_output = java_file_output
8
+ @buffer = Buffer.new
9
+ @buffer.force_encoding('ASCII-8BIT')
10
+ @flush_size = 32*1024
11
+ end
12
+
13
+ def next_file
14
+ flush
15
+ @java_file_output.nextFile
16
+ self
17
+ end
18
+
19
+ def write(buffer)
20
+ buffer.force_encoding('ASCII-8BIT') # TODO this is destructively change buffer
21
+ @buffer << buffer
22
+ if @buffer.size > @flush_size
23
+ flush
24
+ end
25
+ nil
26
+ end
27
+
28
+ def add(buffer)
29
+ flush
30
+ @java_file_output.add(Buffer.from_ruby_string(buffer))
31
+ nil
32
+ end
33
+
34
+ def flush
35
+ unless @buffer.empty?
36
+ @java_file_output.add(@buffer.to_java)
37
+ end
38
+ nil
39
+ end
40
+
41
+ def finish
42
+ flush
43
+ @java_file_output.finish
44
+ end
45
+
46
+ def close
47
+ @java_file_output.finish
48
+ end
49
+ end
50
+
51
+ end
@@ -3,7 +3,7 @@ module Embulk
3
3
  require 'embulk/data_source'
4
4
  require 'embulk/schema'
5
5
  require 'embulk/page'
6
- #require 'embulk/file_output' TODO not implemented
6
+ require 'embulk/file_output'
7
7
 
8
8
  class FormatterPlugin
9
9
  def self.transaction(config, schema, &control)
@@ -14,7 +14,7 @@ module Embulk
14
14
  def initialize(task, schema, file_output)
15
15
  @task = task
16
16
  @schema = schema
17
- @file_output
17
+ @file_output = file_output
18
18
  init
19
19
  end
20
20
 
@@ -58,7 +58,7 @@ module Embulk
58
58
  def open(java_task_source, java_schema, java_file_output)
59
59
  task_source = DataSource.from_java(java_task_source)
60
60
  schema = Schema.from_java(java_schema)
61
- file_output = FileOutput.from_java(java_file_output)
61
+ file_output = FileOutput.new(java_file_output)
62
62
  ruby_object = @ruby_class.new(task_source, schema, file_output)
63
63
  return OutputAdapter.new(ruby_object, schema, file_output)
64
64
  end
@@ -3,7 +3,7 @@ module Embulk
3
3
  require 'embulk/data_source'
4
4
  require 'embulk/schema'
5
5
  require 'embulk/page_builder'
6
- #require 'embulk/file_input' TODO not implemented
6
+ require 'embulk/file_input'
7
7
 
8
8
  class ParserPlugin
9
9
  def self.transaction(config, &control)
@@ -49,7 +49,7 @@ module Embulk
49
49
  def run(java_task_source, java_schema, java_file_input, java_output)
50
50
  task_source = DataSource.from_java(java_task_source)
51
51
  schema = Schema.from_java(java_schema)
52
- file_input = FileInput.from_java(java_file_input)
52
+ file_input = FileInput.new(java_file_input)
53
53
  page_builder = PageBuilder.new(schema, java_output)
54
54
  begin
55
55
  @ruby_class.new(task_source, schema, page_builder).run(file_input)
@@ -36,15 +36,13 @@ module Embulk
36
36
  register_plugin(:filter, type, klass, FilterPlugin)
37
37
  end
38
38
 
39
- ## TODO FileInput is not implemented yet.
40
- #def register_parser(type, klass)
41
- # register_plugin(:parser, type, klass, ParserPlugin)
42
- #end
39
+ def register_parser(type, klass)
40
+ register_plugin(:parser, type, klass, ParserPlugin)
41
+ end
43
42
 
44
- ## TODO FileOutput is not implemented yet.
45
- #def register_formatter(type, klass)
46
- # register_plugin(:formatter, type, klass, FormatterPlugin)
47
- #end
43
+ def register_formatter(type, klass)
44
+ register_plugin(:formatter, type, klass, FormatterPlugin)
45
+ end
48
46
 
49
47
  ## TODO DecoderPlugin JRuby API is not written by anyone yet
50
48
  #def register_decoder(type, klass)
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.4.4'
2
+ VERSION = '0.4.5'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.4.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-19 00:00:00.000000000 Z
11
+ date: 2015-02-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -271,6 +271,7 @@ files:
271
271
  - embulk-docs/src/release/release-0.4.2.rst
272
272
  - embulk-docs/src/release/release-0.4.3.rst
273
273
  - embulk-docs/src/release/release-0.4.4.rst
274
+ - embulk-docs/src/release/release-0.4.5.rst
274
275
  - embulk-standards/build.gradle
275
276
  - embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
276
277
  - embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
@@ -327,15 +328,19 @@ files:
327
328
  - lib/embulk/data/new/ruby/Gemfile
328
329
  - lib/embulk/data/new/ruby/Rakefile
329
330
  - lib/embulk/data/new/ruby/filter.rb.erb
331
+ - lib/embulk/data/new/ruby/formatter.rb.erb
330
332
  - lib/embulk/data/new/ruby/gemspec.erb
331
333
  - lib/embulk/data/new/ruby/input.rb.erb
332
334
  - lib/embulk/data/new/ruby/output.rb.erb
335
+ - lib/embulk/data/new/ruby/parser.rb.erb
333
336
  - lib/embulk/data/package_data.rb
334
337
  - lib/embulk/data_source.rb
335
338
  - lib/embulk/decoder_plugin.rb
336
339
  - lib/embulk/encoder_plugin.rb
337
340
  - lib/embulk/error.rb
341
+ - lib/embulk/file_input.rb
338
342
  - lib/embulk/file_input_plugin.rb
343
+ - lib/embulk/file_output.rb
339
344
  - lib/embulk/file_output_plugin.rb
340
345
  - lib/embulk/filter_plugin.rb
341
346
  - lib/embulk/formatter_plugin.rb
@@ -365,8 +370,8 @@ files:
365
370
  - classpath/bval-jsr303-0.5.jar
366
371
  - classpath/commons-beanutils-core-1.8.3.jar
367
372
  - classpath/commons-lang3-3.1.jar
368
- - classpath/embulk-core-0.4.4.jar
369
- - classpath/embulk-standards-0.4.4.jar
373
+ - classpath/embulk-core-0.4.5.jar
374
+ - classpath/embulk-standards-0.4.5.jar
370
375
  - classpath/guava-18.0.jar
371
376
  - classpath/guice-3.0.jar
372
377
  - classpath/guice-multibindings-3.0.jar