embulk-parser-unpack 0.1.0 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +5 -0
- data/.ruby-version +1 -0
- data/.travis.yml +12 -0
- data/README.md +14 -14
- data/Rakefile +6 -1
- data/embulk-parser-unpack.gemspec +4 -2
- data/gemfiles/embulk-latest +4 -0
- data/lib/embulk/parser/unpack.rb +3 -2
- data/test/capture_io.rb +45 -0
- data/test/embulk/parser/test_unpack.rb +52 -0
- data/test/embulk_run_helper.rb +19 -0
- data/test/prepare_embulk.rb +5 -0
- data/test/run-test.rb +15 -0
- metadata +46 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8189fb6c55eff53f51ba19551118505412203ea2
|
4
|
+
data.tar.gz: a88cef2596881d886ae9c7d94e773df70a8715c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3803a0ca9e19602eaf552b7b2f89432aac1020007e04b5befb097dff0c1903d1ec39850681eb2083d3ec7f828d8f6072608d0a26ecaba76eeaa98456ca9f0a49
|
7
|
+
data.tar.gz: 671b3071171d5e74f81c9f87f5d76b4578ca7be1d4c08d86e6b996d06e91d7c612bb800f07969a74cd7e78b4d3149faef12f161b243c009cfc66c46995013f18
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
jruby-9.1.5.0
|
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -1,6 +1,9 @@
|
|
1
|
+
[](https://travis-ci.org/kakoni/embulk-parser-unpack)
|
2
|
+
|
1
3
|
# Unpack parser plugin for Embulk
|
2
4
|
|
3
|
-
|
5
|
+
Unpack parser. Useful for parsing fixed width format files.
|
6
|
+
Can be used to transform `FirstSecond Third` line to `{key: "First", key2: "Second", key3: "Third"}`
|
4
7
|
|
5
8
|
## Overview
|
6
9
|
|
@@ -9,9 +12,9 @@ TODO: Write short description here and embulk-parser-unpack.gemspec file.
|
|
9
12
|
|
10
13
|
## Configuration
|
11
14
|
|
12
|
-
- **
|
13
|
-
- **
|
14
|
-
- **
|
15
|
+
- **format**: Unpack format string. [See String#unpack](http://apidock.com/ruby/String/unpack) (string, required)
|
16
|
+
- **strip_whitespace**: Strip whitespace from parsed values. (bool, default: true)
|
17
|
+
- **columns**: declares the list of columns, unpacked values will be assigned to these in order.
|
15
18
|
|
16
19
|
## Example
|
17
20
|
|
@@ -20,19 +23,16 @@ in:
|
|
20
23
|
type: any file input plugin type
|
21
24
|
parser:
|
22
25
|
type: unpack
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
26
|
+
format: a2a5@10a4 #Extracts three values, first 2 chars, then 5 chars and lastly 4 chars from position 10.
|
27
|
+
columns:
|
28
|
+
- {name: first, type: string}
|
29
|
+
- {name: second, type: string}
|
30
|
+
- {name: third, type: string}
|
28
31
|
|
29
|
-
```
|
30
|
-
$ embulk gem install embulk-parser-unpack
|
31
|
-
$ embulk guess -g unpack config.yml -o guessed.yml
|
32
32
|
```
|
33
33
|
|
34
|
-
##
|
34
|
+
## Install plugin
|
35
35
|
|
36
36
|
```
|
37
|
-
$
|
37
|
+
$ embulk gem install embulk-parser-unpack
|
38
38
|
```
|
data/Rakefile
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "embulk-parser-unpack"
|
4
|
-
spec.version = "0.1.
|
4
|
+
spec.version = "0.1.5"
|
5
5
|
spec.authors = ["Karri Niemel\u{e4}"]
|
6
6
|
spec.summary = "Unpack parser plugin for Embulk"
|
7
|
-
spec.description = "Parses files read by other file input plugins."
|
7
|
+
spec.description = "Parses fixed width files read by other file input plugins."
|
8
8
|
spec.email = ["kakoni@gmail.com"]
|
9
9
|
spec.licenses = ["MIT"]
|
10
10
|
spec.homepage = "https://github.com/kakoni/embulk-parser-unpack"
|
@@ -16,4 +16,6 @@ Gem::Specification.new do |spec|
|
|
16
16
|
spec.add_development_dependency 'embulk', ['>= 0.8.9']
|
17
17
|
spec.add_development_dependency 'bundler', ['>= 1.10.6']
|
18
18
|
spec.add_development_dependency 'rake', ['>= 10.0']
|
19
|
+
spec.add_development_dependency 'test-unit'
|
20
|
+
spec.add_development_dependency 'test-unit-rr'
|
19
21
|
end
|
data/lib/embulk/parser/unpack.rb
CHANGED
@@ -11,6 +11,7 @@ module Embulk
|
|
11
11
|
task = {
|
12
12
|
"decoder" => DataSource.from_java(decoder_task.dump),
|
13
13
|
"format" => config.param("format", :string),
|
14
|
+
"strip_whitespace" => config.param("strip_whitespace", :bool, default: true),
|
14
15
|
}
|
15
16
|
|
16
17
|
columns = []
|
@@ -18,7 +19,6 @@ module Embulk
|
|
18
19
|
schema.each do |column|
|
19
20
|
name = column["name"]
|
20
21
|
type = column["type"].to_sym
|
21
|
-
|
22
22
|
columns << Column.new(nil, name, type)
|
23
23
|
end
|
24
24
|
|
@@ -28,7 +28,7 @@ module Embulk
|
|
28
28
|
def init
|
29
29
|
@format = task["format"]
|
30
30
|
@decoder = task.param("decoder", :hash).load_task(Java::LineDecoder::DecoderTask)
|
31
|
-
|
31
|
+
@strip_whitespace = task["strip_whitespace"]
|
32
32
|
end
|
33
33
|
|
34
34
|
|
@@ -48,6 +48,7 @@ module Embulk
|
|
48
48
|
|
49
49
|
def process_line(line)
|
50
50
|
values = line.unpack(@format)
|
51
|
+
values.map(&:strip!) if @strip_whitespace
|
51
52
|
page_builder.add(values)
|
52
53
|
end
|
53
54
|
|
data/test/capture_io.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
module CaptureIo
|
2
|
+
def capture(output = :out, &block)
|
3
|
+
_, out = swap_io(output, &block)
|
4
|
+
out
|
5
|
+
end
|
6
|
+
|
7
|
+
def silence(&block)
|
8
|
+
block_result = nil
|
9
|
+
swap_io(:out) do
|
10
|
+
block_result,_ = swap_io(:err, &block)
|
11
|
+
end
|
12
|
+
block_result
|
13
|
+
end
|
14
|
+
|
15
|
+
def swap_io(output = :out, &block)
|
16
|
+
java_import 'java.io.PrintStream'
|
17
|
+
java_import 'java.io.ByteArrayOutputStream'
|
18
|
+
java_import 'java.lang.System'
|
19
|
+
|
20
|
+
ruby_original_stream = output == :out ? $stdout.dup : $stderr.dup
|
21
|
+
java_original_stream = System.send(output) # :out or :err
|
22
|
+
ruby_buf = StringIO.new
|
23
|
+
java_buf = ByteArrayOutputStream.new
|
24
|
+
|
25
|
+
case output
|
26
|
+
when :out
|
27
|
+
$stdout = ruby_buf
|
28
|
+
System.setOut(PrintStream.new(java_buf))
|
29
|
+
when :err
|
30
|
+
$stderr = ruby_buf
|
31
|
+
System.setErr(PrintStream.new(java_buf))
|
32
|
+
end
|
33
|
+
|
34
|
+
[block.call, ruby_buf.string + java_buf.toString]
|
35
|
+
ensure
|
36
|
+
case output
|
37
|
+
when :out
|
38
|
+
$stdout = ruby_original_stream
|
39
|
+
System.setOut(java_original_stream)
|
40
|
+
when :err
|
41
|
+
$stderr = ruby_original_stream
|
42
|
+
System.setErr(java_original_stream)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require "prepare_embulk"
|
2
|
+
require "embulk/parser/unpack"
|
3
|
+
require "embulk/data_source"
|
4
|
+
|
5
|
+
|
6
|
+
module Embulk
|
7
|
+
module Parser
|
8
|
+
class QueryStringTest < Test::Unit::TestCase
|
9
|
+
|
10
|
+
class TestProcessLine < self
|
11
|
+
|
12
|
+
def test_foo
|
13
|
+
mock(page_builder).add(["eka", "Toka", "Kolmas", "Neljas"])
|
14
|
+
line = "ekaTokaKolmas Neljas"
|
15
|
+
plugin.send(:process_line, line)
|
16
|
+
end
|
17
|
+
|
18
|
+
def plugin
|
19
|
+
@plugin ||= Unpack.new(DataSource[task], schema, page_builder)
|
20
|
+
end
|
21
|
+
|
22
|
+
def page_builder
|
23
|
+
@page_builder ||= Object.new
|
24
|
+
end
|
25
|
+
|
26
|
+
def task
|
27
|
+
{
|
28
|
+
"decoder" => {"Charset" => "UTF-8", "Newline" => "CRLF"},
|
29
|
+
"format" => "a3a4a6@14a6",
|
30
|
+
"columns" => columns,
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
def columns
|
35
|
+
[
|
36
|
+
{"name" => "foo", "type" => :string},
|
37
|
+
{"name" => "bar", "type" => :string},
|
38
|
+
{"name" => "baz", "type" => :string},
|
39
|
+
{"name" => "qux", "type" => :string},
|
40
|
+
]
|
41
|
+
end
|
42
|
+
|
43
|
+
def schema
|
44
|
+
columns.map do |column|
|
45
|
+
Column.new(nil, column["name"], column["type"].to_sym)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require "capture_io"
|
2
|
+
|
3
|
+
module EmbulkRunHelper
|
4
|
+
include CaptureIo
|
5
|
+
|
6
|
+
def embulk_guess(seed_path, dest_path)
|
7
|
+
silence do
|
8
|
+
embulk_exec(%W(guess -g query_string #{seed_path} -o #{dest_path}))
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def embulk_run(yaml_path)
|
13
|
+
embulk_exec(%W(run #{yaml_path}))
|
14
|
+
end
|
15
|
+
|
16
|
+
def embulk_exec(cli_options = [])
|
17
|
+
Embulk.run(cli_options)
|
18
|
+
end
|
19
|
+
end
|
data/test/run-test.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
base_dir = File.expand_path(File.join(File.dirname(__FILE__), ".."))
|
4
|
+
lib_dir = File.join(base_dir, "lib")
|
5
|
+
test_dir = File.join(base_dir, "test")
|
6
|
+
|
7
|
+
require "test-unit"
|
8
|
+
require "test/unit/rr"
|
9
|
+
|
10
|
+
$LOAD_PATH.unshift(lib_dir)
|
11
|
+
$LOAD_PATH.unshift(test_dir)
|
12
|
+
|
13
|
+
ENV["TEST_UNIT_MAX_DIFF_TARGET_STRING_SIZE"] ||= "5000"
|
14
|
+
|
15
|
+
exit Test::Unit::AutoRunner.run(true, test_dir, ARGV + %w(--collector=dir))
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-unpack
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Karri Niemelä
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-09-
|
11
|
+
date: 2016-09-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,21 +52,58 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '10.0'
|
55
|
-
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
name: test-unit
|
62
|
+
prerelease: false
|
63
|
+
type: :development
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
requirement: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
name: test-unit-rr
|
76
|
+
prerelease: false
|
77
|
+
type: :development
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
description: Parses fixed width files read by other file input plugins.
|
56
84
|
email:
|
57
85
|
- kakoni@gmail.com
|
58
86
|
executables: []
|
59
87
|
extensions: []
|
60
88
|
extra_rdoc_files: []
|
61
89
|
files:
|
90
|
+
- ".gitignore"
|
91
|
+
- ".ruby-version"
|
92
|
+
- ".travis.yml"
|
62
93
|
- Gemfile
|
63
94
|
- LICENSE
|
64
95
|
- LICENSE.txt
|
65
96
|
- README.md
|
66
97
|
- Rakefile
|
67
98
|
- embulk-parser-unpack.gemspec
|
99
|
+
- gemfiles/embulk-latest
|
68
100
|
- lib/embulk/guess/unpack.rb
|
69
101
|
- lib/embulk/parser/unpack.rb
|
102
|
+
- test/capture_io.rb
|
103
|
+
- test/embulk/parser/test_unpack.rb
|
104
|
+
- test/embulk_run_helper.rb
|
105
|
+
- test/prepare_embulk.rb
|
106
|
+
- test/run-test.rb
|
70
107
|
homepage: https://github.com/kakoni/embulk-parser-unpack
|
71
108
|
licenses:
|
72
109
|
- MIT
|
@@ -91,4 +128,9 @@ rubygems_version: 2.4.8
|
|
91
128
|
signing_key:
|
92
129
|
specification_version: 4
|
93
130
|
summary: Unpack parser plugin for Embulk
|
94
|
-
test_files:
|
131
|
+
test_files:
|
132
|
+
- test/capture_io.rb
|
133
|
+
- test/embulk/parser/test_unpack.rb
|
134
|
+
- test/embulk_run_helper.rb
|
135
|
+
- test/prepare_embulk.rb
|
136
|
+
- test/run-test.rb
|