embulk 0.4.4 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 157c521d28f82705b9162cca7ca03e052d5e478f
4
- data.tar.gz: 5063ed2e3544fede216449ea7cebc11c427faf9c
3
+ metadata.gz: 57cffd859609745fafc0e644e0e8466c0511c622
4
+ data.tar.gz: 5df593df4e75dce977ac7ab295f9fe089ababa1a
5
5
  SHA512:
6
- metadata.gz: 694c3da663fbb60de1ac1529e9d17c561785bc0637713d3d6d325e11fdd69204e200bb359d2592b177cf5a05fb743b53d80083dfb9216807e3f1cc65a54de47b
7
- data.tar.gz: 3b4dcaf0704cce4a6bf6af89f8e1929412b7d85c54b59d98da84efdc24ef2d55dc28f4e347fe8b15f7f887c49c77a70a405ea3777723c5b2aed7f0c0a89fb1d2
6
+ metadata.gz: 2c613261f18c551e6eb1ea9f748148422054d0818a76d8367de8eb68a071445686e871ce304d1413b10eca8570d220b4d53dbd6f3b7f060b008ce3c500903fe9
7
+ data.tar.gz: 0e6c8086818d6f7d81ac8009ee5fbf76d0aa0a34fc962af60f3647d189ca488cd3c4aa09c8b49f62b3551fcd9b4ad42e2582b69f6a56f2c78f9f6e06f6986439
data/README.md CHANGED
@@ -24,7 +24,7 @@ You can release plugins to share your efforts of data cleaning, error handling,
24
24
  The single-file package is the simplest way to try Embulk. You can download the latest embulk-VERSION.jar from [the releases page](https://bintray.com/embulk/maven/embulk/view#files) and run it with java:
25
25
 
26
26
  ```
27
- wget https://bintray.com/artifact/download/embulk/maven/embulk-0.4.4.jar -O embulk.jar
27
+ wget https://bintray.com/artifact/download/embulk/maven/embulk-0.4.5.jar -O embulk.jar
28
28
  java -jar embulk.jar --help
29
29
  ```
30
30
 
@@ -40,10 +40,10 @@ java -jar embulk.jar run config.yml
40
40
  ### Using plugins
41
41
 
42
42
  You can use plugins to load data from/to various systems and file formats.
43
- An example is [embulk-plugin-postgres-json](https://github.com/frsyuki/embulk-plugin-postgres-json) plugin. It outputs data into PostgreSQL server using "json" column type.
43
+ An example is [embulk-output-postgres-json](https://github.com/frsyuki/embulk-output-postgres-json) plugin. It outputs data into PostgreSQL server using "json" column type.
44
44
 
45
45
  ```
46
- java -jar embulk.jar gem install embulk-plugin-postgres-json
46
+ java -jar embulk.jar gem install embulk-output-postgres-json
47
47
  java -jar embulk.jar gem list
48
48
  ```
49
49
 
@@ -11,7 +11,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
11
11
 
12
12
  allprojects {
13
13
  group = 'org.embulk'
14
- version = '0.4.4'
14
+ version = '0.4.5'
15
15
 
16
16
  apply plugin: 'java'
17
17
  apply plugin: 'maven-publish'
@@ -15,4 +15,5 @@ Release Notes
15
15
  release/release-0.4.2
16
16
  release/release-0.4.3
17
17
  release/release-0.4.4
18
+ release/release-0.4.5
18
19
 
@@ -0,0 +1,24 @@
1
+ Release 0.4.5
2
+ ==================================
3
+
4
+ CLI
5
+ ------------------
6
+
7
+ * Fixed a problem where ``embulk gem install foo --version x.y.z`` shows embulk's version.
8
+
9
+ Plugin SPI
10
+ ------------------
11
+
12
+ * Added ParserPlugin SPI for JRuby. You can write parser plugins in Ruby.
13
+ * Added FormatterPlugin SPI for JRuby. You can write formatter plugins in Ruby.
14
+
15
+
16
+ Plugin API Changes
17
+ ------------------
18
+
19
+ * Added ``Embulk::FileInput`` API used mainly by parser plugins.
20
+ * Added ``Embulk::FileOutput`` API used mainly by formatter plugins.
21
+
22
+ Release Date
23
+ ------------------
24
+ 2015-02-19
@@ -9,6 +9,11 @@ module Embulk
9
9
  buffer
10
10
  end
11
11
 
12
+ def self.from_ruby_string(string)
13
+ b = Buffer.new(string)
14
+ b.force_encoding('ASCII-8BIT')
15
+ end
16
+
12
17
  def to_java
13
18
  Java::Buffer.wrap(to_java_bytes)
14
19
  end
@@ -19,18 +19,20 @@ module Embulk
19
19
 
20
20
  require 'embulk/version'
21
21
 
22
- if argv.include?('--version')
23
- puts "embulk #{Embulk::VERSION}"
24
- exit 0
25
- end
26
-
27
22
  i = argv.find_index {|arg| arg !~ /^\-/ }
28
- usage nil unless i
23
+ unless i
24
+ if argv.include?('--version')
25
+ puts "embulk #{Embulk::VERSION}"
26
+ exit 0
27
+ end
28
+ usage nil
29
+ end
29
30
  subcmd = argv.slice!(i)
30
31
 
31
32
  require 'java'
32
33
  require 'optparse'
33
34
  op = OptionParser.new
35
+ op.version = Embulk::VERSION
34
36
 
35
37
  puts "#{Time.now.strftime("%Y-%m-%d %H:%M:%S,%3N %z")}: Embulk v#{Embulk::VERSION}"
36
38
 
@@ -127,8 +129,8 @@ categories:
127
129
  ruby-filter Ruby record filter plugin (like "add-hostname")
128
130
  #ruby-file-input Ruby file input plugin (like "ftp") # not implemented yet [#21]
129
131
  #ruby-file-output Ruby file output plugin (like "ftp") # not implemented yet [#22]
130
- #ruby-parser Ruby file parser plugin (like "csv") # not implemented yet [#33]
131
- #ruby-formatter Ruby file formatter plugin (like "csv") # not implemented yet [#34]
132
+ ruby-parser Ruby file parser plugin (like "csv") # not implemented yet [#33]
133
+ ruby-formatter Ruby file formatter plugin (like "csv") # not implemented yet [#34]
132
134
  #ruby-decoder Ruby file decoder plugin (like "gzip") # not implemented yet [#31]
133
135
  #ruby-encoder Ruby file encoder plugin (like "gzip") # not implemented yet [#32]
134
136
  java-input Java record input plugin (like "mysql")
@@ -259,8 +261,8 @@ examples:
259
261
  when "ruby-filter" then [:ruby, :filter]
260
262
  when "ruby-file-input" then raise "ruby-file-input is not implemented yet. See #21 on github." #[:ruby, :file_input]
261
263
  when "ruby-file-output" then raise "ruby-file-output is not implemented yet. See #22 on github." #[:ruby, :file_output]
262
- when "ruby-parser" then raise "ruby-parser is not implemented yet. See #33 on github." #[:ruby, :parser]
263
- when "ruby-formatter" then raise "ruby-formatter is not implemented yet. See #34 on github." #[:ruby, :formatter]
264
+ when "ruby-parser" then [:ruby, :parser]
265
+ when "ruby-formatter" then [:ruby, :formatter]
264
266
  when "ruby-decoder" then raise "ruby-decoder is not implemented yet. See #31 on github." #[:ruby, :decoder]
265
267
  when "ruby-encoder" then raise "ruby-decoder is not implemented yet. See #32 on github." #[:ruby, :encoder]
266
268
  else
@@ -7,8 +7,8 @@ module Embulk
7
7
  def self.transaction(config, in_schema, &control)
8
8
  # configuration code:
9
9
  task = {
10
- "property1" => config.param("property1", :string)
11
- "property2" => config.param("property2", :integer, default: 0)
10
+ "property1" => config.param("property1", :string),
11
+ "property2" => config.param("property2", :integer, default: 0),
12
12
  }
13
13
 
14
14
  yield(task, out_columns)
@@ -0,0 +1,47 @@
1
+ module Embulk
2
+ module Formatter
3
+
4
+ class <%= ruby_class_name %> < FormatterPlugin
5
+ Plugin.register_formatter(<%= name.dump %>, self)
6
+
7
+ def self.transaction(config, schema, &control)
8
+ # configuration code:
9
+ task = {
10
+ "property1" => config.param("property1", :string),
11
+ "property2" => config.param("property2", :integer, default: 0),
12
+ }
13
+
14
+ yield(task)
15
+ end
16
+
17
+ def init
18
+ # initialization code:
19
+ @property1 = task["property1"]
20
+ @property2 = task["property2"]
21
+
22
+ # your data
23
+ @current_file == nil
24
+ @current_file_size = 0
25
+ end
26
+
27
+ def close
28
+ end
29
+
30
+ def add(page)
31
+ # output code:
32
+ page.each do |record|
33
+ if @current_file == nil || @current_file_size > 32*1024
34
+ @current_file = @file_output.next_file
35
+ @current_file_size = 0
36
+ end
37
+ @current_file.write "|mydata|"
38
+ end
39
+ end
40
+
41
+ def finish
42
+ @file_output.finish
43
+ end
44
+ end
45
+
46
+ end
47
+ end
@@ -7,13 +7,13 @@ module Embulk
7
7
  def self.transaction(config, &control)
8
8
  # configuration code:
9
9
  task = {
10
- "property1" => config.param("property1", :string)
11
- "property2" => config.param("property2", :integer, default: 0)
10
+ "property1" => config.param("property1", :string),
11
+ "property2" => config.param("property2", :integer, default: 0),
12
12
  }
13
13
 
14
14
  columns = [
15
15
  Column.new(0, "example", :string),
16
- Column.new(1, "column", :integer),
16
+ Column.new(1, "column", :long),
17
17
  Column.new(2, "name", :double),
18
18
  ]
19
19
 
@@ -7,8 +7,8 @@ module Embulk
7
7
  def self.transaction(config, schema, count, &control)
8
8
  # configuration code:
9
9
  task = {
10
- "property1" => config.param("property1", :string)
11
- "property2" => config.param("property2", :integer, default: 0)
10
+ "property1" => config.param("property1", :string),
11
+ "property2" => config.param("property2", :integer, default: 0),
12
12
  }
13
13
 
14
14
  # resumable output:
@@ -0,0 +1,42 @@
1
+ module Embulk
2
+ module Parser
3
+
4
+ class <%= ruby_class_name %> < ParserPlugin
5
+ Plugin.register_parser(<%= name.dump %>, self)
6
+
7
+ def self.transaction(config, &control)
8
+ # configuration code:
9
+ task = {
10
+ "property1" => config.param("property1", :string),
11
+ "property2" => config.param("property2", :integer, default: 0),
12
+ }
13
+
14
+ columns = [
15
+ Column.new(0, "example", :string),
16
+ Column.new(1, "column", :long),
17
+ Column.new(2, "name", :double),
18
+ ]
19
+
20
+ yield(task, columns)
21
+ end
22
+
23
+ def init
24
+ # initialization code:
25
+ @property1 = task["property1"]
26
+ @property2 = task["property2"]
27
+ end
28
+
29
+ def run(file_input)
30
+ while file = file_input.next_file
31
+ file.each do |buffer|
32
+ # parsering code
33
+ record = ["col1", 2, 3.0]
34
+ @page_builder.add(record)
35
+ end
36
+ end
37
+ @page_builder.finish
38
+ end
39
+ end
40
+
41
+ end
42
+ end
@@ -0,0 +1,83 @@
1
+
2
+ module Embulk
3
+ require 'embulk/buffer'
4
+
5
+ class FileInput
6
+ def initialize(java_file_input)
7
+ @java_file_input = java_file_input
8
+ @buffer = nil
9
+ end
10
+
11
+ def next_file
12
+ if @java_file_input.nextFile
13
+ return self
14
+ else
15
+ return nil
16
+ end
17
+ end
18
+
19
+ def each(&block)
20
+ if @buffer
21
+ yield @buffer
22
+ @buffer = nil
23
+ end
24
+
25
+ while java_buffer = @java_file_input.poll
26
+ buffer = Buffer.from_java(java_buffer)
27
+ java_buffer.release
28
+ yield buffer
29
+ end
30
+ end
31
+
32
+ def read(count=nil, dest=nil)
33
+ if count == nil
34
+ @buffer ||= Buffer.new
35
+ while java_buffer = @java_file_input.poll
36
+ @buffer << Buffer.from_java(java_buffer)
37
+ java_buffer.release
38
+ end
39
+
40
+ return nil if @buffer.empty? && count != 0
41
+
42
+ if dest
43
+ dest.replace(@buffer)
44
+ else
45
+ dest = @buffer
46
+ end
47
+ @buffer = nil
48
+
49
+ else
50
+ @buffer ||= Buffer.new
51
+ until @buffer.size >= count
52
+ java_buffer = @java_file_input.poll
53
+ break unless java_buffer
54
+ @buffer << Buffer.from_java(java_buffer)
55
+ java_buffer.release
56
+ end
57
+
58
+ return nil if @buffer.empty? && count != 0
59
+
60
+ if @buffer.size <= count
61
+ if dest
62
+ dest.replace(@buffer)
63
+ else
64
+ dest = @buffer
65
+ end
66
+ @buffer = nil
67
+ else
68
+ data = @buffer.slice!(0, count)
69
+ if dest
70
+ dest.replace(data)
71
+ else
72
+ dest = data
73
+ end
74
+ end
75
+ end
76
+ return dest
77
+ end
78
+
79
+ def close
80
+ @java_file_input.close
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,51 @@
1
+
2
+ module Embulk
3
+ require 'embulk/buffer'
4
+
5
+ class FileOutput
6
+ def initialize(java_file_output)
7
+ @java_file_output = java_file_output
8
+ @buffer = Buffer.new
9
+ @buffer.force_encoding('ASCII-8BIT')
10
+ @flush_size = 32*1024
11
+ end
12
+
13
+ def next_file
14
+ flush
15
+ @java_file_output.nextFile
16
+ self
17
+ end
18
+
19
+ def write(buffer)
20
+ buffer.force_encoding('ASCII-8BIT') # TODO this is destructively change buffer
21
+ @buffer << buffer
22
+ if @buffer.size > @flush_size
23
+ flush
24
+ end
25
+ nil
26
+ end
27
+
28
+ def add(buffer)
29
+ flush
30
+ @java_file_output.add(Buffer.from_ruby_string(buffer))
31
+ nil
32
+ end
33
+
34
+ def flush
35
+ unless @buffer.empty?
36
+ @java_file_output.add(@buffer.to_java)
37
+ end
38
+ nil
39
+ end
40
+
41
+ def finish
42
+ flush
43
+ @java_file_output.finish
44
+ end
45
+
46
+ def close
47
+ @java_file_output.finish
48
+ end
49
+ end
50
+
51
+ end
@@ -3,7 +3,7 @@ module Embulk
3
3
  require 'embulk/data_source'
4
4
  require 'embulk/schema'
5
5
  require 'embulk/page'
6
- #require 'embulk/file_output' TODO not implemented
6
+ require 'embulk/file_output'
7
7
 
8
8
  class FormatterPlugin
9
9
  def self.transaction(config, schema, &control)
@@ -14,7 +14,7 @@ module Embulk
14
14
  def initialize(task, schema, file_output)
15
15
  @task = task
16
16
  @schema = schema
17
- @file_output
17
+ @file_output = file_output
18
18
  init
19
19
  end
20
20
 
@@ -58,7 +58,7 @@ module Embulk
58
58
  def open(java_task_source, java_schema, java_file_output)
59
59
  task_source = DataSource.from_java(java_task_source)
60
60
  schema = Schema.from_java(java_schema)
61
- file_output = FileOutput.from_java(java_file_output)
61
+ file_output = FileOutput.new(java_file_output)
62
62
  ruby_object = @ruby_class.new(task_source, schema, file_output)
63
63
  return OutputAdapter.new(ruby_object, schema, file_output)
64
64
  end
@@ -3,7 +3,7 @@ module Embulk
3
3
  require 'embulk/data_source'
4
4
  require 'embulk/schema'
5
5
  require 'embulk/page_builder'
6
- #require 'embulk/file_input' TODO not implemented
6
+ require 'embulk/file_input'
7
7
 
8
8
  class ParserPlugin
9
9
  def self.transaction(config, &control)
@@ -49,7 +49,7 @@ module Embulk
49
49
  def run(java_task_source, java_schema, java_file_input, java_output)
50
50
  task_source = DataSource.from_java(java_task_source)
51
51
  schema = Schema.from_java(java_schema)
52
- file_input = FileInput.from_java(java_file_input)
52
+ file_input = FileInput.new(java_file_input)
53
53
  page_builder = PageBuilder.new(schema, java_output)
54
54
  begin
55
55
  @ruby_class.new(task_source, schema, page_builder).run(file_input)
@@ -36,15 +36,13 @@ module Embulk
36
36
  register_plugin(:filter, type, klass, FilterPlugin)
37
37
  end
38
38
 
39
- ## TODO FileInput is not implemented yet.
40
- #def register_parser(type, klass)
41
- # register_plugin(:parser, type, klass, ParserPlugin)
42
- #end
39
+ def register_parser(type, klass)
40
+ register_plugin(:parser, type, klass, ParserPlugin)
41
+ end
43
42
 
44
- ## TODO FileOutput is not implemented yet.
45
- #def register_formatter(type, klass)
46
- # register_plugin(:formatter, type, klass, FormatterPlugin)
47
- #end
43
+ def register_formatter(type, klass)
44
+ register_plugin(:formatter, type, klass, FormatterPlugin)
45
+ end
48
46
 
49
47
  ## TODO DecoderPlugin JRuby API is not written by anyone yet
50
48
  #def register_decoder(type, klass)
@@ -1,3 +1,3 @@
1
1
  module Embulk
2
- VERSION = '0.4.4'
2
+ VERSION = '0.4.5'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.4.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-19 00:00:00.000000000 Z
11
+ date: 2015-02-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -271,6 +271,7 @@ files:
271
271
  - embulk-docs/src/release/release-0.4.2.rst
272
272
  - embulk-docs/src/release/release-0.4.3.rst
273
273
  - embulk-docs/src/release/release-0.4.4.rst
274
+ - embulk-docs/src/release/release-0.4.5.rst
274
275
  - embulk-standards/build.gradle
275
276
  - embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java
276
277
  - embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java
@@ -327,15 +328,19 @@ files:
327
328
  - lib/embulk/data/new/ruby/Gemfile
328
329
  - lib/embulk/data/new/ruby/Rakefile
329
330
  - lib/embulk/data/new/ruby/filter.rb.erb
331
+ - lib/embulk/data/new/ruby/formatter.rb.erb
330
332
  - lib/embulk/data/new/ruby/gemspec.erb
331
333
  - lib/embulk/data/new/ruby/input.rb.erb
332
334
  - lib/embulk/data/new/ruby/output.rb.erb
335
+ - lib/embulk/data/new/ruby/parser.rb.erb
333
336
  - lib/embulk/data/package_data.rb
334
337
  - lib/embulk/data_source.rb
335
338
  - lib/embulk/decoder_plugin.rb
336
339
  - lib/embulk/encoder_plugin.rb
337
340
  - lib/embulk/error.rb
341
+ - lib/embulk/file_input.rb
338
342
  - lib/embulk/file_input_plugin.rb
343
+ - lib/embulk/file_output.rb
339
344
  - lib/embulk/file_output_plugin.rb
340
345
  - lib/embulk/filter_plugin.rb
341
346
  - lib/embulk/formatter_plugin.rb
@@ -365,8 +370,8 @@ files:
365
370
  - classpath/bval-jsr303-0.5.jar
366
371
  - classpath/commons-beanutils-core-1.8.3.jar
367
372
  - classpath/commons-lang3-3.1.jar
368
- - classpath/embulk-core-0.4.4.jar
369
- - classpath/embulk-standards-0.4.4.jar
373
+ - classpath/embulk-core-0.4.5.jar
374
+ - classpath/embulk-standards-0.4.5.jar
370
375
  - classpath/guava-18.0.jar
371
376
  - classpath/guice-3.0.jar
372
377
  - classpath/guice-multibindings-3.0.jar