embulk-parser-unpack 0.1.0 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 650827aaf0ff16b4ef922a62b7de00f80ad4ce9f
4
- data.tar.gz: eb56b59f391e675e386d1b5051a9f37358a871b3
3
+ metadata.gz: 8189fb6c55eff53f51ba19551118505412203ea2
4
+ data.tar.gz: a88cef2596881d886ae9c7d94e773df70a8715c1
5
5
  SHA512:
6
- metadata.gz: 6c9131cb80a84d918b854db39a01ae0fc7a9c3cdb9992c8d170e498adaba42621880b102280491e8c0286eb9e61ee09f3b51dd12136864a86bb47a7cc97735e6
7
- data.tar.gz: 398f7585994badbadd0d5f3f7c1702d5ab3752f18561e18c500278b1e48ef5de01b28efc659f6de4232bde2fd1aaff091bb540b89587eea6ebf383f6fd7a5183
6
+ metadata.gz: 3803a0ca9e19602eaf552b7b2f89432aac1020007e04b5befb097dff0c1903d1ec39850681eb2083d3ec7f828d8f6072608d0a26ecaba76eeaa98456ca9f0a49
7
+ data.tar.gz: 671b3071171d5e74f81c9f87f5d76b4578ca7be1d4c08d86e6b996d06e91d7c612bb800f07969a74cd7e78b4d3149faef12f161b243c009cfc66c46995013f18
@@ -0,0 +1,5 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
@@ -0,0 +1 @@
1
+ jruby-9.1.5.0
@@ -0,0 +1,12 @@
1
+ language: ruby
2
+ rvm:
3
+ - jruby-9.1.5.0
4
+ jdk:
5
+ - oraclejdk8
6
+ env:
7
+ global:
8
+ - JRUBY_OPTS="-Xcli.debug=true --debug"
9
+
10
+ gemfile:
11
+ - gemfiles/embulk-latest
12
+
data/README.md CHANGED
@@ -1,6 +1,9 @@
1
+ [![Build Status](https://travis-ci.org/kakoni/embulk-parser-unpack.svg?branch=master)](https://travis-ci.org/kakoni/embulk-parser-unpack)
2
+
1
3
  # Unpack parser plugin for Embulk
2
4
 
3
- TODO: Write short description here and embulk-parser-unpack.gemspec file.
5
+ Unpack parser. Useful for parsing fixed width format files.
6
+ Can be used to transform `FirstSecond Third` line to `{key: "First", key2: "Second", key3: "Third"}`
4
7
 
5
8
  ## Overview
6
9
 
@@ -9,9 +12,9 @@ TODO: Write short description here and embulk-parser-unpack.gemspec file.
9
12
 
10
13
  ## Configuration
11
14
 
12
- - **option1**: description (integer, required)
13
- - **option2**: description (string, default: `"myvalue"`)
14
- - **option3**: description (string, default: `null`)
15
+ - **format**: Unpack format string. [See String#unpack](http://apidock.com/ruby/String/unpack) (string, required)
16
+ - **strip_whitespace**: Strip whitespace from parsed values. (bool, default: true)
17
+ - **columns**: declares the list of columns, unpacked values will be assigned to these in order.
15
18
 
16
19
  ## Example
17
20
 
@@ -20,19 +23,16 @@ in:
20
23
  type: any file input plugin type
21
24
  parser:
22
25
  type: unpack
23
- option1: example1
24
- option2: example2
25
- ```
26
-
27
- (If guess supported) you don't have to write `parser:` section in the configuration file. After writing `in:` section, you can let embulk guess `parser:` section using this command:
26
+ format: a2a5@10a4 #Extracts three values, first 2 chars, then 5 chars and lastly 4 chars from position 10.
27
+ columns:
28
+ - {name: first, type: string}
29
+ - {name: second, type: string}
30
+ - {name: third, type: string}
28
31
 
29
- ```
30
- $ embulk gem install embulk-parser-unpack
31
- $ embulk guess -g unpack config.yml -o guessed.yml
32
32
  ```
33
33
 
34
- ## Build
34
+ ## Install plugin
35
35
 
36
36
  ```
37
- $ rake
37
+ $ embulk gem install embulk-parser-unpack
38
38
  ```
data/Rakefile CHANGED
@@ -1,3 +1,8 @@
1
1
  require "bundler/gem_tasks"
2
2
 
3
- task default: :build
3
+ task default: :test
4
+
5
+ desc "Run tests"
6
+ task :test do
7
+ ruby("test/run-test.rb", "--use-color=yes")
8
+ end
@@ -1,10 +1,10 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-parser-unpack"
4
- spec.version = "0.1.0"
4
+ spec.version = "0.1.5"
5
5
  spec.authors = ["Karri Niemel\u{e4}"]
6
6
  spec.summary = "Unpack parser plugin for Embulk"
7
- spec.description = "Parses files read by other file input plugins."
7
+ spec.description = "Parses fixed width files read by other file input plugins."
8
8
  spec.email = ["kakoni@gmail.com"]
9
9
  spec.licenses = ["MIT"]
10
10
  spec.homepage = "https://github.com/kakoni/embulk-parser-unpack"
@@ -16,4 +16,6 @@ Gem::Specification.new do |spec|
16
16
  spec.add_development_dependency 'embulk', ['>= 0.8.9']
17
17
  spec.add_development_dependency 'bundler', ['>= 1.10.6']
18
18
  spec.add_development_dependency 'rake', ['>= 10.0']
19
+ spec.add_development_dependency 'test-unit'
20
+ spec.add_development_dependency 'test-unit-rr'
19
21
  end
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec :path => '../'
3
+
4
+ gem "embulk", "~> 0.8.13"
@@ -11,6 +11,7 @@ module Embulk
11
11
  task = {
12
12
  "decoder" => DataSource.from_java(decoder_task.dump),
13
13
  "format" => config.param("format", :string),
14
+ "strip_whitespace" => config.param("strip_whitespace", :bool, default: true),
14
15
  }
15
16
 
16
17
  columns = []
@@ -18,7 +19,6 @@ module Embulk
18
19
  schema.each do |column|
19
20
  name = column["name"]
20
21
  type = column["type"].to_sym
21
-
22
22
  columns << Column.new(nil, name, type)
23
23
  end
24
24
 
@@ -28,7 +28,7 @@ module Embulk
28
28
  def init
29
29
  @format = task["format"]
30
30
  @decoder = task.param("decoder", :hash).load_task(Java::LineDecoder::DecoderTask)
31
-
31
+ @strip_whitespace = task["strip_whitespace"]
32
32
  end
33
33
 
34
34
 
@@ -48,6 +48,7 @@ module Embulk
48
48
 
49
49
  def process_line(line)
50
50
  values = line.unpack(@format)
51
+ values.map(&:strip!) if @strip_whitespace
51
52
  page_builder.add(values)
52
53
  end
53
54
 
@@ -0,0 +1,45 @@
1
+ module CaptureIo
2
+ def capture(output = :out, &block)
3
+ _, out = swap_io(output, &block)
4
+ out
5
+ end
6
+
7
+ def silence(&block)
8
+ block_result = nil
9
+ swap_io(:out) do
10
+ block_result,_ = swap_io(:err, &block)
11
+ end
12
+ block_result
13
+ end
14
+
15
+ def swap_io(output = :out, &block)
16
+ java_import 'java.io.PrintStream'
17
+ java_import 'java.io.ByteArrayOutputStream'
18
+ java_import 'java.lang.System'
19
+
20
+ ruby_original_stream = output == :out ? $stdout.dup : $stderr.dup
21
+ java_original_stream = System.send(output) # :out or :err
22
+ ruby_buf = StringIO.new
23
+ java_buf = ByteArrayOutputStream.new
24
+
25
+ case output
26
+ when :out
27
+ $stdout = ruby_buf
28
+ System.setOut(PrintStream.new(java_buf))
29
+ when :err
30
+ $stderr = ruby_buf
31
+ System.setErr(PrintStream.new(java_buf))
32
+ end
33
+
34
+ [block.call, ruby_buf.string + java_buf.toString]
35
+ ensure
36
+ case output
37
+ when :out
38
+ $stdout = ruby_original_stream
39
+ System.setOut(java_original_stream)
40
+ when :err
41
+ $stderr = ruby_original_stream
42
+ System.setErr(java_original_stream)
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,52 @@
1
+ require "prepare_embulk"
2
+ require "embulk/parser/unpack"
3
+ require "embulk/data_source"
4
+
5
+
6
+ module Embulk
7
+ module Parser
8
+ class QueryStringTest < Test::Unit::TestCase
9
+
10
+ class TestProcessLine < self
11
+
12
+ def test_foo
13
+ mock(page_builder).add(["eka", "Toka", "Kolmas", "Neljas"])
14
+ line = "ekaTokaKolmas Neljas"
15
+ plugin.send(:process_line, line)
16
+ end
17
+
18
+ def plugin
19
+ @plugin ||= Unpack.new(DataSource[task], schema, page_builder)
20
+ end
21
+
22
+ def page_builder
23
+ @page_builder ||= Object.new
24
+ end
25
+
26
+ def task
27
+ {
28
+ "decoder" => {"Charset" => "UTF-8", "Newline" => "CRLF"},
29
+ "format" => "a3a4a6@14a6",
30
+ "columns" => columns,
31
+ }
32
+ end
33
+
34
+ def columns
35
+ [
36
+ {"name" => "foo", "type" => :string},
37
+ {"name" => "bar", "type" => :string},
38
+ {"name" => "baz", "type" => :string},
39
+ {"name" => "qux", "type" => :string},
40
+ ]
41
+ end
42
+
43
+ def schema
44
+ columns.map do |column|
45
+ Column.new(nil, column["name"], column["type"].to_sym)
46
+ end
47
+ end
48
+
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,19 @@
1
+ require "capture_io"
2
+
3
+ module EmbulkRunHelper
4
+ include CaptureIo
5
+
6
+ def embulk_guess(seed_path, dest_path)
7
+ silence do
8
+ embulk_exec(%W(guess -g query_string #{seed_path} -o #{dest_path}))
9
+ end
10
+ end
11
+
12
+ def embulk_run(yaml_path)
13
+ embulk_exec(%W(run #{yaml_path}))
14
+ end
15
+
16
+ def embulk_exec(cli_options = [])
17
+ Embulk.run(cli_options)
18
+ end
19
+ end
@@ -0,0 +1,5 @@
1
+ require "embulk"
2
+
3
+ Embulk.setup
4
+
5
+ require "embulk/command/embulk_run"
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ base_dir = File.expand_path(File.join(File.dirname(__FILE__), ".."))
4
+ lib_dir = File.join(base_dir, "lib")
5
+ test_dir = File.join(base_dir, "test")
6
+
7
+ require "test-unit"
8
+ require "test/unit/rr"
9
+
10
+ $LOAD_PATH.unshift(lib_dir)
11
+ $LOAD_PATH.unshift(test_dir)
12
+
13
+ ENV["TEST_UNIT_MAX_DIFF_TARGET_STRING_SIZE"] ||= "5000"
14
+
15
+ exit Test::Unit::AutoRunner.run(true, test_dir, ARGV + %w(--collector=dir))
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-unpack
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Karri Niemelä
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-09-23 00:00:00.000000000 Z
11
+ date: 2016-09-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -52,21 +52,58 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '10.0'
55
- description: Parses files read by other file input plugins.
55
+ - !ruby/object:Gem::Dependency
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ name: test-unit
62
+ prerelease: false
63
+ type: :development
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ name: test-unit-rr
76
+ prerelease: false
77
+ type: :development
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Parses fixed width files read by other file input plugins.
56
84
  email:
57
85
  - kakoni@gmail.com
58
86
  executables: []
59
87
  extensions: []
60
88
  extra_rdoc_files: []
61
89
  files:
90
+ - ".gitignore"
91
+ - ".ruby-version"
92
+ - ".travis.yml"
62
93
  - Gemfile
63
94
  - LICENSE
64
95
  - LICENSE.txt
65
96
  - README.md
66
97
  - Rakefile
67
98
  - embulk-parser-unpack.gemspec
99
+ - gemfiles/embulk-latest
68
100
  - lib/embulk/guess/unpack.rb
69
101
  - lib/embulk/parser/unpack.rb
102
+ - test/capture_io.rb
103
+ - test/embulk/parser/test_unpack.rb
104
+ - test/embulk_run_helper.rb
105
+ - test/prepare_embulk.rb
106
+ - test/run-test.rb
70
107
  homepage: https://github.com/kakoni/embulk-parser-unpack
71
108
  licenses:
72
109
  - MIT
@@ -91,4 +128,9 @@ rubygems_version: 2.4.8
91
128
  signing_key:
92
129
  specification_version: 4
93
130
  summary: Unpack parser plugin for Embulk
94
- test_files: []
131
+ test_files:
132
+ - test/capture_io.rb
133
+ - test/embulk/parser/test_unpack.rb
134
+ - test/embulk_run_helper.rb
135
+ - test/prepare_embulk.rb
136
+ - test/run-test.rb