embulk-filter-expand_json_array 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 39016765cabb31388f886776716e7c19dcedf13f
4
+ data.tar.gz: 0ff2be418ab5199de563afa1169f57695c775d1c
5
+ SHA512:
6
+ metadata.gz: fad74fff4af56f6ee404098ec16f2ed7c75e3b1b8b8aa1b8f4946b2201e732d414eca856649ff8dfaa2de20c610456ad64246c65aefe2d51e85d722fbfcca804
7
+ data.tar.gz: cec5c1c5f58feac4ec725dcf6057766b9573d9af563d548d4ae0c64845a0647d5be904492d80246ae14e0a22c945c8da00646b0809fb6844212518649b0ba761
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ jruby-9.1.2.0
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,58 @@
1
+ # Expand Json Array filter plugin for Embulk
2
+
3
+ ## Overview
4
+
5
+ * **Plugin type**: filter
6
+
7
+ This is a embulk plugin for expanding record which has json column includes `N`-size array to `N` records.
8
+
9
+ For example, plugin expands below record which has json column `json_value` includes size-3 array value as `latest_receipt_info` to 3 records.
10
+
11
+ Before:
12
+
13
+ ```
14
+ +-------------------------+---------+-------------+--------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
15
+ | time:timestamp | id:long | name:string | score:double | json_payload:string |
16
+ +-------------------------+---------+-------------+--------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
17
+ | 2015-10-11 00:00:00 UTC | 0 | Avis Lind | -3256.869635206057 | {"phone_numbers":"1-276-220-7263","app_id":0,"point":-1601.6890336884562,"created_at":"2015-10-07 20:23:57 +0900","profile":{"like_words":["maiores","eum","aut"],"anniversary":{"voluptatem":"dolor","et":"ullam"}},"latest_receipt_info":[1,2,3]} |
18
+ +-------------------------+---------+-------------+--------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
19
+ ```
20
+
21
+ Filtered:
22
+
23
+ ```
24
+ +-------------------------+---------+-------------+--------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------+
25
+ | time:timestamp | id:long | name:string | score:double | json_payload:string | latest_receipt_info:string |
26
+ +-------------------------+---------+-------------+--------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------+
27
+ | 2015-10-11 00:00:00 UTC | 0 | Avis Lind | -3256.869635206057 | {"phone_numbers":"1-276-220-7263","app_id":0,"point":-1601.6890336884562,"created_at":"2015-10-07 20:23:57 +0900","profile":{"like_words":["maiores","eum","aut"],"anniversary":{"voluptatem":"dolor","et":"ullam"}},"latest_receipt_info":[1,2,3]} | 1 |
28
+ | 2015-10-11 00:00:00 UTC | 0 | Avis Lind | -3256.869635206057 | {"phone_numbers":"1-276-220-7263","app_id":0,"point":-1601.6890336884562,"created_at":"2015-10-07 20:23:57 +0900","profile":{"like_words":["maiores","eum","aut"],"anniversary":{"voluptatem":"dolor","et":"ullam"}},"latest_receipt_info":[1,2,3]} | 2 |
29
+ | 2015-10-11 00:00:00 UTC | 0 | Avis Lind | -3256.869635206057 | {"phone_numbers":"1-276-220-7263","app_id":0,"point":-1601.6890336884562,"created_at":"2015-10-07 20:23:57 +0900","profile":{"like_words":["maiores","eum","aut"],"anniversary":{"voluptatem":"dolor","et":"ullam"}},"latest_receipt_info":[1,2,3]} | 3 |
30
+ +-------------------------+---------+-------------+--------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------+
31
+ ```
32
+
33
+ ## Configuration
34
+
35
+ - **json_column_name**: column name of json value in record (string , required)
36
+ - **root**: pointer of array value in json, specified as [jsonpath](https://github.com/joshbuddy/jsonpath)(string , required)
37
+ - **expanded_column_name**: column name after parsing target array value in json (string , required)
38
+
39
+ ## Example
40
+
41
+ ```yaml
42
+ filters:
43
+ - type: expand_json_array
44
+ json_column_name: json_payload
45
+ root: "$.latest_receipt_info"
46
+ expanded_column_name: latest_receipt_info
47
+ ```
48
+
49
+
50
+ ## Build
51
+
52
+ ```
53
+ $ rake
54
+ ```
55
+
56
+ ## Reference
57
+
58
+ - [civitaspo/embulk-filter-expand_json](https://github.com/civitaspo/embulk-filter-expand_json)
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :build
@@ -0,0 +1,21 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "embulk-filter-expand_json_array"
4
+ spec.version = "0.1.0"
5
+ spec.authors = ["Naoki AINOYA"]
6
+ spec.summary = "Expand Json Array filter plugin for Embulk"
7
+ spec.description = "Expand Json Array"
8
+ spec.email = ["ainonic@gmail.com"]
9
+ spec.licenses = ["MIT"]
10
+ # TODO set this: spec.homepage = "https://github.com/ainonic/embulk-filter-expand_json_array"
11
+
12
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
13
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
14
+ spec.require_paths = ["lib"]
15
+
16
+ spec.add_dependency 'jsonpath', ['>= 0.5.8']
17
+
18
+ spec.add_development_dependency 'embulk', ['>= 0.8.13']
19
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
20
+ spec.add_development_dependency 'rake', ['>= 10.0']
21
+ end
data/example/data.tsv ADDED
@@ -0,0 +1,2 @@
1
+ time id name score json_payload
2
+ 2015-10-11 08:06:23 +0900 0 Avis Lind -3256.869635206057 {"phone_numbers":"1-276-220-7263","app_id":0,"point":-1601.6890336884562,"created_at":"2015-10-07 20:23:57 +0900","profile":{"like_words":["maiores","eum","aut"],"anniversary":{"voluptatem":"dolor","et":"ullam"}},"latest_receipt_info":[1,2,3]}
@@ -0,0 +1,24 @@
1
+ in:
2
+ type: file
3
+ path_prefix: ./data.tsv
4
+ parser:
5
+ type: csv
6
+ delimiter: "\t"
7
+ charset: UTF-8
8
+ newline: CRLF
9
+ null_string: 'NULL'
10
+ skip_header_lines: 1
11
+ comment_line_marker: '#'
12
+ columns:
13
+ - {name: time, type: timestamp, format: "%Y-%m-%d"}
14
+ - {name: id, type: long}
15
+ - {name: name, type: string}
16
+ - {name: score, type: double}
17
+ - {name: json_payload, type: string}
18
+ filters:
19
+ - type: expand_json_array
20
+ json_column_name: json_payload
21
+ root: "$.latest_receipt_info"
22
+ expanded_column_name: latest_receipt_info
23
+ out:
24
+ type: stdout
@@ -0,0 +1,59 @@
1
+ require 'jsonpath'
2
+
3
+ module Embulk
4
+ module Filter
5
+
6
+ class ExpandJsonArray < FilterPlugin
7
+ Plugin.register_filter("expand_json_array", self)
8
+
9
+ def self.transaction(config, in_schema, &control)
10
+ # configuration code:
11
+ task = {
12
+ "json_column_name" => config.param("json_column_name", :string),
13
+ "root" => config.param("root", :string),
14
+ "expanded_column_name" => config.param("expanded_column_name", :string)
15
+ }
16
+
17
+ task['parse_target_column'] = in_schema.find{|c| c.name == task['json_column_name']}
18
+
19
+ columns = [
20
+ Column.new(nil, task["expanded_column_name"], :string),
21
+ ]
22
+
23
+ out_columns = in_schema + columns
24
+
25
+ yield(task, out_columns)
26
+ end
27
+
28
+ def init
29
+ # initialization code:
30
+ @json_column_name = task["json_column_name"]
31
+ @root = task["root"]
32
+ @expanded_column_name = task["expanded_column_name"]
33
+
34
+ @parse_target_column = task['parse_target_column']
35
+
36
+ @json_path = JsonPath.new(@root)
37
+ end
38
+
39
+ def close
40
+ end
41
+
42
+ def add(page)
43
+ # filtering code:
44
+ page.each do |record|
45
+ expanded_columns = @json_path.on(record[@parse_target_column['index']]).flatten
46
+
47
+ expanded_columns.each do |ec|
48
+ page_builder.add(record + [ec])
49
+ end
50
+ end
51
+ end
52
+
53
+ def finish
54
+ page_builder.finish
55
+ end
56
+ end
57
+
58
+ end
59
+ end
metadata ADDED
@@ -0,0 +1,110 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-filter-expand_json_array
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Naoki AINOYA
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-10-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: 0.5.8
19
+ name: jsonpath
20
+ prerelease: false
21
+ type: :runtime
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.5.8
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 0.8.13
33
+ name: embulk
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 0.8.13
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.10.6
47
+ name: bundler
48
+ prerelease: false
49
+ type: :development
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 1.10.6
55
+ - !ruby/object:Gem::Dependency
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '10.0'
61
+ name: rake
62
+ prerelease: false
63
+ type: :development
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ description: Expand Json Array
70
+ email:
71
+ - ainonic@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - ".ruby-version"
78
+ - Gemfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - embulk-filter-expand_json_array.gemspec
83
+ - example/data.tsv
84
+ - example/embulk.yml
85
+ - lib/embulk/filter/expand_json_array.rb
86
+ homepage:
87
+ licenses:
88
+ - MIT
89
+ metadata: {}
90
+ post_install_message:
91
+ rdoc_options: []
92
+ require_paths:
93
+ - lib
94
+ required_ruby_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ">="
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ required_rubygems_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ requirements: []
105
+ rubyforge_project:
106
+ rubygems_version: 2.6.4
107
+ signing_key:
108
+ specification_version: 4
109
+ summary: Expand Json Array filter plugin for Embulk
110
+ test_files: []