embulk-parser-unpack 0.1.0 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 650827aaf0ff16b4ef922a62b7de00f80ad4ce9f
4
- data.tar.gz: eb56b59f391e675e386d1b5051a9f37358a871b3
3
+ metadata.gz: 8189fb6c55eff53f51ba19551118505412203ea2
4
+ data.tar.gz: a88cef2596881d886ae9c7d94e773df70a8715c1
5
5
  SHA512:
6
- metadata.gz: 6c9131cb80a84d918b854db39a01ae0fc7a9c3cdb9992c8d170e498adaba42621880b102280491e8c0286eb9e61ee09f3b51dd12136864a86bb47a7cc97735e6
7
- data.tar.gz: 398f7585994badbadd0d5f3f7c1702d5ab3752f18561e18c500278b1e48ef5de01b28efc659f6de4232bde2fd1aaff091bb540b89587eea6ebf383f6fd7a5183
6
+ metadata.gz: 3803a0ca9e19602eaf552b7b2f89432aac1020007e04b5befb097dff0c1903d1ec39850681eb2083d3ec7f828d8f6072608d0a26ecaba76eeaa98456ca9f0a49
7
+ data.tar.gz: 671b3071171d5e74f81c9f87f5d76b4578ca7be1d4c08d86e6b996d06e91d7c612bb800f07969a74cd7e78b4d3149faef12f161b243c009cfc66c46995013f18
@@ -0,0 +1,5 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
@@ -0,0 +1 @@
1
+ jruby-9.1.5.0
@@ -0,0 +1,12 @@
1
+ language: ruby
2
+ rvm:
3
+ - jruby-9.1.5.0
4
+ jdk:
5
+ - oraclejdk8
6
+ env:
7
+ global:
8
+ - JRUBY_OPTS="-Xcli.debug=true --debug"
9
+
10
+ gemfile:
11
+ - gemfiles/embulk-latest
12
+
data/README.md CHANGED
@@ -1,6 +1,9 @@
1
+ [![Build Status](https://travis-ci.org/kakoni/embulk-parser-unpack.svg?branch=master)](https://travis-ci.org/kakoni/embulk-parser-unpack)
2
+
1
3
  # Unpack parser plugin for Embulk
2
4
 
3
- TODO: Write short description here and embulk-parser-unpack.gemspec file.
5
+ Unpack parser. Useful for parsing fixed width format files.
6
+ Can be used to transform `FirstSecond Third` line to `{key: "First", key2: "Second", key3: "Third"}`
4
7
 
5
8
  ## Overview
6
9
 
@@ -9,9 +12,9 @@ TODO: Write short description here and embulk-parser-unpack.gemspec file.
9
12
 
10
13
  ## Configuration
11
14
 
12
- - **option1**: description (integer, required)
13
- - **option2**: description (string, default: `"myvalue"`)
14
- - **option3**: description (string, default: `null`)
15
+ - **format**: Unpack format string. [See String#unpack](http://apidock.com/ruby/String/unpack) (string, required)
16
+ - **strip_whitespace**: Strip whitespace from parsed values. (bool, default: true)
17
+ - **columns**: declares the list of columns, unpacked values will be assigned to these in order.
15
18
 
16
19
  ## Example
17
20
 
@@ -20,19 +23,16 @@ in:
20
23
  type: any file input plugin type
21
24
  parser:
22
25
  type: unpack
23
- option1: example1
24
- option2: example2
25
- ```
26
-
27
- (If guess supported) you don't have to write `parser:` section in the configuration file. After writing `in:` section, you can let embulk guess `parser:` section using this command:
26
+ format: a2a5@10a4 #Extracts three values, first 2 chars, then 5 chars and lastly 4 chars from position 10.
27
+ columns:
28
+ - {name: first, type: string}
29
+ - {name: second, type: string}
30
+ - {name: third, type: string}
28
31
 
29
- ```
30
- $ embulk gem install embulk-parser-unpack
31
- $ embulk guess -g unpack config.yml -o guessed.yml
32
32
  ```
33
33
 
34
- ## Build
34
+ ## Install plugin
35
35
 
36
36
  ```
37
- $ rake
37
+ $ embulk gem install embulk-parser-unpack
38
38
  ```
data/Rakefile CHANGED
@@ -1,3 +1,8 @@
1
1
  require "bundler/gem_tasks"
2
2
 
3
- task default: :build
3
+ task default: :test
4
+
5
+ desc "Run tests"
6
+ task :test do
7
+ ruby("test/run-test.rb", "--use-color=yes")
8
+ end
@@ -1,10 +1,10 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-parser-unpack"
4
- spec.version = "0.1.0"
4
+ spec.version = "0.1.5"
5
5
  spec.authors = ["Karri Niemel\u{e4}"]
6
6
  spec.summary = "Unpack parser plugin for Embulk"
7
- spec.description = "Parses files read by other file input plugins."
7
+ spec.description = "Parses fixed width files read by other file input plugins."
8
8
  spec.email = ["kakoni@gmail.com"]
9
9
  spec.licenses = ["MIT"]
10
10
  spec.homepage = "https://github.com/kakoni/embulk-parser-unpack"
@@ -16,4 +16,6 @@ Gem::Specification.new do |spec|
16
16
  spec.add_development_dependency 'embulk', ['>= 0.8.9']
17
17
  spec.add_development_dependency 'bundler', ['>= 1.10.6']
18
18
  spec.add_development_dependency 'rake', ['>= 10.0']
19
+ spec.add_development_dependency 'test-unit'
20
+ spec.add_development_dependency 'test-unit-rr'
19
21
  end
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec :path => '../'
3
+
4
+ gem "embulk", "~> 0.8.13"
@@ -11,6 +11,7 @@ module Embulk
11
11
  task = {
12
12
  "decoder" => DataSource.from_java(decoder_task.dump),
13
13
  "format" => config.param("format", :string),
14
+ "strip_whitespace" => config.param("strip_whitespace", :bool, default: true),
14
15
  }
15
16
 
16
17
  columns = []
@@ -18,7 +19,6 @@ module Embulk
18
19
  schema.each do |column|
19
20
  name = column["name"]
20
21
  type = column["type"].to_sym
21
-
22
22
  columns << Column.new(nil, name, type)
23
23
  end
24
24
 
@@ -28,7 +28,7 @@ module Embulk
28
28
  def init
29
29
  @format = task["format"]
30
30
  @decoder = task.param("decoder", :hash).load_task(Java::LineDecoder::DecoderTask)
31
-
31
+ @strip_whitespace = task["strip_whitespace"]
32
32
  end
33
33
 
34
34
 
@@ -48,6 +48,7 @@ module Embulk
48
48
 
49
49
  def process_line(line)
50
50
  values = line.unpack(@format)
51
+ values.map(&:strip!) if @strip_whitespace
51
52
  page_builder.add(values)
52
53
  end
53
54
 
@@ -0,0 +1,45 @@
1
+ module CaptureIo
2
+ def capture(output = :out, &block)
3
+ _, out = swap_io(output, &block)
4
+ out
5
+ end
6
+
7
+ def silence(&block)
8
+ block_result = nil
9
+ swap_io(:out) do
10
+ block_result,_ = swap_io(:err, &block)
11
+ end
12
+ block_result
13
+ end
14
+
15
+ def swap_io(output = :out, &block)
16
+ java_import 'java.io.PrintStream'
17
+ java_import 'java.io.ByteArrayOutputStream'
18
+ java_import 'java.lang.System'
19
+
20
+ ruby_original_stream = output == :out ? $stdout.dup : $stderr.dup
21
+ java_original_stream = System.send(output) # :out or :err
22
+ ruby_buf = StringIO.new
23
+ java_buf = ByteArrayOutputStream.new
24
+
25
+ case output
26
+ when :out
27
+ $stdout = ruby_buf
28
+ System.setOut(PrintStream.new(java_buf))
29
+ when :err
30
+ $stderr = ruby_buf
31
+ System.setErr(PrintStream.new(java_buf))
32
+ end
33
+
34
+ [block.call, ruby_buf.string + java_buf.toString]
35
+ ensure
36
+ case output
37
+ when :out
38
+ $stdout = ruby_original_stream
39
+ System.setOut(java_original_stream)
40
+ when :err
41
+ $stderr = ruby_original_stream
42
+ System.setErr(java_original_stream)
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,52 @@
1
+ require "prepare_embulk"
2
+ require "embulk/parser/unpack"
3
+ require "embulk/data_source"
4
+
5
+
6
+ module Embulk
7
+ module Parser
8
+ class QueryStringTest < Test::Unit::TestCase
9
+
10
+ class TestProcessLine < self
11
+
12
+ def test_foo
13
+ mock(page_builder).add(["eka", "Toka", "Kolmas", "Neljas"])
14
+ line = "ekaTokaKolmas Neljas"
15
+ plugin.send(:process_line, line)
16
+ end
17
+
18
+ def plugin
19
+ @plugin ||= Unpack.new(DataSource[task], schema, page_builder)
20
+ end
21
+
22
+ def page_builder
23
+ @page_builder ||= Object.new
24
+ end
25
+
26
+ def task
27
+ {
28
+ "decoder" => {"Charset" => "UTF-8", "Newline" => "CRLF"},
29
+ "format" => "a3a4a6@14a6",
30
+ "columns" => columns,
31
+ }
32
+ end
33
+
34
+ def columns
35
+ [
36
+ {"name" => "foo", "type" => :string},
37
+ {"name" => "bar", "type" => :string},
38
+ {"name" => "baz", "type" => :string},
39
+ {"name" => "qux", "type" => :string},
40
+ ]
41
+ end
42
+
43
+ def schema
44
+ columns.map do |column|
45
+ Column.new(nil, column["name"], column["type"].to_sym)
46
+ end
47
+ end
48
+
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,19 @@
1
+ require "capture_io"
2
+
3
+ module EmbulkRunHelper
4
+ include CaptureIo
5
+
6
+ def embulk_guess(seed_path, dest_path)
7
+ silence do
8
+ embulk_exec(%W(guess -g query_string #{seed_path} -o #{dest_path}))
9
+ end
10
+ end
11
+
12
+ def embulk_run(yaml_path)
13
+ embulk_exec(%W(run #{yaml_path}))
14
+ end
15
+
16
+ def embulk_exec(cli_options = [])
17
+ Embulk.run(cli_options)
18
+ end
19
+ end
@@ -0,0 +1,5 @@
1
+ require "embulk"
2
+
3
+ Embulk.setup
4
+
5
+ require "embulk/command/embulk_run"
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ base_dir = File.expand_path(File.join(File.dirname(__FILE__), ".."))
4
+ lib_dir = File.join(base_dir, "lib")
5
+ test_dir = File.join(base_dir, "test")
6
+
7
+ require "test-unit"
8
+ require "test/unit/rr"
9
+
10
+ $LOAD_PATH.unshift(lib_dir)
11
+ $LOAD_PATH.unshift(test_dir)
12
+
13
+ ENV["TEST_UNIT_MAX_DIFF_TARGET_STRING_SIZE"] ||= "5000"
14
+
15
+ exit Test::Unit::AutoRunner.run(true, test_dir, ARGV + %w(--collector=dir))
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-unpack
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Karri Niemelä
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-09-23 00:00:00.000000000 Z
11
+ date: 2016-09-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -52,21 +52,58 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '10.0'
55
- description: Parses files read by other file input plugins.
55
+ - !ruby/object:Gem::Dependency
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ name: test-unit
62
+ prerelease: false
63
+ type: :development
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ name: test-unit-rr
76
+ prerelease: false
77
+ type: :development
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Parses fixed width files read by other file input plugins.
56
84
  email:
57
85
  - kakoni@gmail.com
58
86
  executables: []
59
87
  extensions: []
60
88
  extra_rdoc_files: []
61
89
  files:
90
+ - ".gitignore"
91
+ - ".ruby-version"
92
+ - ".travis.yml"
62
93
  - Gemfile
63
94
  - LICENSE
64
95
  - LICENSE.txt
65
96
  - README.md
66
97
  - Rakefile
67
98
  - embulk-parser-unpack.gemspec
99
+ - gemfiles/embulk-latest
68
100
  - lib/embulk/guess/unpack.rb
69
101
  - lib/embulk/parser/unpack.rb
102
+ - test/capture_io.rb
103
+ - test/embulk/parser/test_unpack.rb
104
+ - test/embulk_run_helper.rb
105
+ - test/prepare_embulk.rb
106
+ - test/run-test.rb
70
107
  homepage: https://github.com/kakoni/embulk-parser-unpack
71
108
  licenses:
72
109
  - MIT
@@ -91,4 +128,9 @@ rubygems_version: 2.4.8
91
128
  signing_key:
92
129
  specification_version: 4
93
130
  summary: Unpack parser plugin for Embulk
94
- test_files: []
131
+ test_files:
132
+ - test/capture_io.rb
133
+ - test/embulk/parser/test_unpack.rb
134
+ - test/embulk_run_helper.rb
135
+ - test/prepare_embulk.rb
136
+ - test/run-test.rb