embulk-filter-expand_json_array 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 39016765cabb31388f886776716e7c19dcedf13f
4
+ data.tar.gz: 0ff2be418ab5199de563afa1169f57695c775d1c
5
+ SHA512:
6
+ metadata.gz: fad74fff4af56f6ee404098ec16f2ed7c75e3b1b8b8aa1b8f4946b2201e732d414eca856649ff8dfaa2de20c610456ad64246c65aefe2d51e85d722fbfcca804
7
+ data.tar.gz: cec5c1c5f58feac4ec725dcf6057766b9573d9af563d548d4ae0c64845a0647d5be904492d80246ae14e0a22c945c8da00646b0809fb6844212518649b0ba761
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ jruby-9.1.2.0
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,58 @@
1
+ # Expand Json Array filter plugin for Embulk
2
+
3
+ ## Overview
4
+
5
+ * **Plugin type**: filter
6
+
7
+ This is a embulk plugin for expanding record which has json column includes `N`-size array to `N` records.
8
+
9
+ For example, plugin expands below record which has json column `json_value` includes size-3 array value as `latest_receipt_info` to 3 records.
10
+
11
+ Before:
12
+
13
+ ```
14
+ +-------------------------+---------+-------------+--------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
15
+ | time:timestamp | id:long | name:string | score:double | json_payload:string |
16
+ +-------------------------+---------+-------------+--------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
17
+ | 2015-10-11 00:00:00 UTC | 0 | Avis Lind | -3256.869635206057 | {"phone_numbers":"1-276-220-7263","app_id":0,"point":-1601.6890336884562,"created_at":"2015-10-07 20:23:57 +0900","profile":{"like_words":["maiores","eum","aut"],"anniversary":{"voluptatem":"dolor","et":"ullam"}},"latest_receipt_info":[1,2,3]} |
18
+ +-------------------------+---------+-------------+--------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
19
+ ```
20
+
21
+ Filtered:
22
+
23
+ ```
24
+ +-------------------------+---------+-------------+--------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------+
25
+ | time:timestamp | id:long | name:string | score:double | json_payload:string | latest_receipt_info:string |
26
+ +-------------------------+---------+-------------+--------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------+
27
+ | 2015-10-11 00:00:00 UTC | 0 | Avis Lind | -3256.869635206057 | {"phone_numbers":"1-276-220-7263","app_id":0,"point":-1601.6890336884562,"created_at":"2015-10-07 20:23:57 +0900","profile":{"like_words":["maiores","eum","aut"],"anniversary":{"voluptatem":"dolor","et":"ullam"}},"latest_receipt_info":[1,2,3]} | 1 |
28
+ | 2015-10-11 00:00:00 UTC | 0 | Avis Lind | -3256.869635206057 | {"phone_numbers":"1-276-220-7263","app_id":0,"point":-1601.6890336884562,"created_at":"2015-10-07 20:23:57 +0900","profile":{"like_words":["maiores","eum","aut"],"anniversary":{"voluptatem":"dolor","et":"ullam"}},"latest_receipt_info":[1,2,3]} | 2 |
29
+ | 2015-10-11 00:00:00 UTC | 0 | Avis Lind | -3256.869635206057 | {"phone_numbers":"1-276-220-7263","app_id":0,"point":-1601.6890336884562,"created_at":"2015-10-07 20:23:57 +0900","profile":{"like_words":["maiores","eum","aut"],"anniversary":{"voluptatem":"dolor","et":"ullam"}},"latest_receipt_info":[1,2,3]} | 3 |
30
+ +-------------------------+---------+-------------+--------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------+
31
+ ```
32
+
33
+ ## Configuration
34
+
35
+ - **json_column_name**: column name of json value in record (string , required)
36
+ - **root**: pointer of array value in json, specified as [jsonpath](https://github.com/joshbuddy/jsonpath)(string , required)
37
+ - **expanded_column_name**: column name after parsing target array value in json (string , required)
38
+
39
+ ## Example
40
+
41
+ ```yaml
42
+ filters:
43
+ - type: expand_json_array
44
+ json_column_name: json_payload
45
+ root: "$.latest_receipt_info"
46
+ expanded_column_name: latest_receipt_info
47
+ ```
48
+
49
+
50
+ ## Build
51
+
52
+ ```
53
+ $ rake
54
+ ```
55
+
56
+ ## Reference
57
+
58
+ - [civitaspo/embulk-filter-expand_json](https://github.com/civitaspo/embulk-filter-expand_json)
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :build
@@ -0,0 +1,21 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "embulk-filter-expand_json_array"
4
+ spec.version = "0.1.0"
5
+ spec.authors = ["Naoki AINOYA"]
6
+ spec.summary = "Expand Json Array filter plugin for Embulk"
7
+ spec.description = "Expand Json Array"
8
+ spec.email = ["ainonic@gmail.com"]
9
+ spec.licenses = ["MIT"]
10
+ # TODO set this: spec.homepage = "https://github.com/ainonic/embulk-filter-expand_json_array"
11
+
12
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
13
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
14
+ spec.require_paths = ["lib"]
15
+
16
+ spec.add_dependency 'jsonpath', ['>= 0.5.8']
17
+
18
+ spec.add_development_dependency 'embulk', ['>= 0.8.13']
19
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
20
+ spec.add_development_dependency 'rake', ['>= 10.0']
21
+ end
data/example/data.tsv ADDED
@@ -0,0 +1,2 @@
1
+ time id name score json_payload
2
+ 2015-10-11 08:06:23 +0900 0 Avis Lind -3256.869635206057 {"phone_numbers":"1-276-220-7263","app_id":0,"point":-1601.6890336884562,"created_at":"2015-10-07 20:23:57 +0900","profile":{"like_words":["maiores","eum","aut"],"anniversary":{"voluptatem":"dolor","et":"ullam"}},"latest_receipt_info":[1,2,3]}
@@ -0,0 +1,24 @@
1
+ in:
2
+ type: file
3
+ path_prefix: ./data.tsv
4
+ parser:
5
+ type: csv
6
+ delimiter: "\t"
7
+ charset: UTF-8
8
+ newline: CRLF
9
+ null_string: 'NULL'
10
+ skip_header_lines: 1
11
+ comment_line_marker: '#'
12
+ columns:
13
+ - {name: time, type: timestamp, format: "%Y-%m-%d"}
14
+ - {name: id, type: long}
15
+ - {name: name, type: string}
16
+ - {name: score, type: double}
17
+ - {name: json_payload, type: string}
18
+ filters:
19
+ - type: expand_json_array
20
+ json_column_name: json_payload
21
+ root: "$.latest_receipt_info"
22
+ expanded_column_name: latest_receipt_info
23
+ out:
24
+ type: stdout
@@ -0,0 +1,59 @@
1
+ require 'jsonpath'
2
+
3
+ module Embulk
4
+ module Filter
5
+
6
+ class ExpandJsonArray < FilterPlugin
7
+ Plugin.register_filter("expand_json_array", self)
8
+
9
+ def self.transaction(config, in_schema, &control)
10
+ # configuration code:
11
+ task = {
12
+ "json_column_name" => config.param("json_column_name", :string),
13
+ "root" => config.param("root", :string),
14
+ "expanded_column_name" => config.param("expanded_column_name", :string)
15
+ }
16
+
17
+ task['parse_target_column'] = in_schema.find{|c| c.name == task['json_column_name']}
18
+
19
+ columns = [
20
+ Column.new(nil, task["expanded_column_name"], :string),
21
+ ]
22
+
23
+ out_columns = in_schema + columns
24
+
25
+ yield(task, out_columns)
26
+ end
27
+
28
+ def init
29
+ # initialization code:
30
+ @json_column_name = task["json_column_name"]
31
+ @root = task["root"]
32
+ @expanded_column_name = task["expanded_column_name"]
33
+
34
+ @parse_target_column = task['parse_target_column']
35
+
36
+ @json_path = JsonPath.new(@root)
37
+ end
38
+
39
+ def close
40
+ end
41
+
42
+ def add(page)
43
+ # filtering code:
44
+ page.each do |record|
45
+ expanded_columns = @json_path.on(record[@parse_target_column['index']]).flatten
46
+
47
+ expanded_columns.each do |ec|
48
+ page_builder.add(record + [ec])
49
+ end
50
+ end
51
+ end
52
+
53
+ def finish
54
+ page_builder.finish
55
+ end
56
+ end
57
+
58
+ end
59
+ end
metadata ADDED
@@ -0,0 +1,110 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-filter-expand_json_array
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Naoki AINOYA
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-10-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: 0.5.8
19
+ name: jsonpath
20
+ prerelease: false
21
+ type: :runtime
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.5.8
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 0.8.13
33
+ name: embulk
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 0.8.13
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.10.6
47
+ name: bundler
48
+ prerelease: false
49
+ type: :development
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 1.10.6
55
+ - !ruby/object:Gem::Dependency
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '10.0'
61
+ name: rake
62
+ prerelease: false
63
+ type: :development
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ description: Expand Json Array
70
+ email:
71
+ - ainonic@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - ".ruby-version"
78
+ - Gemfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - embulk-filter-expand_json_array.gemspec
83
+ - example/data.tsv
84
+ - example/embulk.yml
85
+ - lib/embulk/filter/expand_json_array.rb
86
+ homepage:
87
+ licenses:
88
+ - MIT
89
+ metadata: {}
90
+ post_install_message:
91
+ rdoc_options: []
92
+ require_paths:
93
+ - lib
94
+ required_ruby_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ">="
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ required_rubygems_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ requirements: []
105
+ rubyforge_project:
106
+ rubygems_version: 2.6.4
107
+ signing_key:
108
+ specification_version: 4
109
+ summary: Expand Json Array filter plugin for Embulk
110
+ test_files: []