embulk-filter-ruby_proc 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 911204f43abd2acd2d9ac90317e474994fbdf221
4
+ data.tar.gz: 4521e06844679d721b5e9a21e7d01a368b600278
5
+ SHA512:
6
+ metadata.gz: 8da351fbf8fe0f39f9e29289bd0365bce6ecc03af0f833c2088fda6fdc9a7b0789781f99a70b44e452adda9824804e192e8256e1290e5f61154c5d497084f3d9
7
+ data.tar.gz: 2f1ad9b592fb2210ada815239e5bed14f1f4265c2fc43ee715102542e87f97dacef2998031ad4b44b693ecf0fc3aef1b6afdd0b3ea2ce4e3e9c6da32d2fbd600
@@ -0,0 +1,6 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
6
+ /example/out*
@@ -0,0 +1 @@
1
+ jruby-9.0.4.0
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,80 @@
1
+ # Ruby Proc filter plugin for Embulk
2
+
3
+ This plugin is inspired by [mgi166/embulk-filter-eval: Eval ruby code on filtering](https://github.com/mgi166/embulk-filter-eval "mgi166/embulk-filter-eval: Eval ruby code on filtering")
4
+
5
+ This plugin apply ruby proc to each record.
6
+
7
+ ## Overview
8
+
9
+ * **Plugin type**: filter
10
+
11
+ ## Configuration
12
+
13
+ - **columns**: filter definition (hash, required)
14
+ - **requires**: pre required libraries (array, default: `[]`)
15
+
16
+ ## Example
17
+
18
+ ### input
19
+ ```csv
20
+ id,account,time,purchase,comment,data
21
+ 1,32864,2015-01-27 19:23:49,20150127,embulk,"{\"foo\": \"bar\", \"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}"
22
+ 2,14824,2015-01-27 19:01:23,20150127,embulk jruby,NULL
23
+ 3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin",NULL
24
+ 4,11270,2015-01-29 11:54:36,20150129,NULL,NULL
25
+ ```
26
+
27
+ ### config
28
+ ```yaml
29
+ # ...
30
+
31
+ filters:
32
+ - type: ruby_proc
33
+ requires:
34
+ - cgi
35
+ columns:
36
+ - name: data
37
+ proc: |
38
+ ->(data) do
39
+ data["events"] = data["events"].map.with_index do |e, idx|
40
+ e.tap { |e_| e_["idx"] = idx }
41
+ end
42
+ data.to_json
43
+ end
44
+ - name: id
45
+ proc: |
46
+ ->(id) do
47
+ id * 2
48
+ end
49
+ type: string
50
+ - name: comment
51
+ proc: |
52
+ ->(comment, record) do
53
+ return [record["account"].to_s].to_json unless comment
54
+ comment.upcase.split(" ").map { |s| CGI.escape(s) }.to_json
55
+ end
56
+ skip_nil: false
57
+ type: json
58
+ target: events
59
+
60
+ # ...
61
+
62
+ ```
63
+
64
+ ### preview
65
+ ```
66
+ +-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
67
+ | id:string | account:long | time:timestamp | purchase:timestamp | comment:json | data:json |
68
+ +-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
69
+ | 2 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK"] | {"events":[{"id":1,"name":"Name1","idx":0},{"id":2,"name":"Name2","idx":1}],"foo":"bar"} |
70
+ | 4 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK","JRUBY"] | |
71
+ | 6 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | ["EMBULK","%22CSV%22","PARSER","PLUGIN"] | |
72
+ | 8 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | ["11270"] | |
73
+ +-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
74
+ ```
75
+
76
+ ## Build
77
+
78
+ ```
79
+ $ rake
80
+ ```
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :build
@@ -0,0 +1,19 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "embulk-filter-ruby_proc"
4
+ spec.version = "0.1.0"
5
+ spec.authors = ["joker1007"]
6
+ spec.summary = "Ruby Proc filter plugin for Embulk"
7
+ spec.description = "Filter each record by ruby proc"
8
+ spec.email = ["kakyoin.hierophant@gmail.com"]
9
+ spec.licenses = ["MIT"]
10
+ spec.homepage = "https://github.com/kakyoin.hierophant/embulk-filter-ruby_proc"
11
+
12
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
13
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
14
+ spec.require_paths = ["lib"]
15
+
16
+ spec.add_development_dependency 'embulk', ['>= 0.8.1']
17
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
18
+ spec.add_development_dependency 'rake', ['>= 10.0']
19
+ end
@@ -0,0 +1,67 @@
1
+ in:
2
+ type: file
3
+ path_prefix: ./sample_
4
+ parser:
5
+ charset: UTF-8
6
+ newline: CRLF
7
+ type: csv
8
+ delimiter: ','
9
+ quote: '"'
10
+ escape: '\'
11
+ null_string: 'NULL'
12
+ trim_if_not_quoted: false
13
+ skip_header_lines: 1
14
+ allow_extra_columns: false
15
+ allow_optional_columns: false
16
+ columns:
17
+ - {name: id, type: long}
18
+ - {name: account, type: long}
19
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
20
+ - {name: purchase, type: timestamp, format: '%Y%m%d'}
21
+ - {name: comment, type: string}
22
+ - {name: data, type: json}
23
+
24
+ filters:
25
+ - type: ruby_proc
26
+ requires:
27
+ - cgi
28
+ columns:
29
+ - name: data
30
+ proc: |
31
+ ->(data) do
32
+ data["events"] = data["events"].map.with_index do |e, idx|
33
+ e.tap { |e_| e_["idx"] = idx }
34
+ end
35
+ data.to_json
36
+ end
37
+ - name: id
38
+ proc: |
39
+ ->(id) do
40
+ id * 2
41
+ end
42
+ type: string
43
+ - name: comment
44
+ proc: |
45
+ ->(comment, record) do
46
+ return [record["account"].to_s].to_json unless comment
47
+ comment.upcase.split(" ").map { |s| CGI.escape(s) }.to_json
48
+ end
49
+ skip_nil: false
50
+ type: json
51
+ target: events
52
+
53
+ out:
54
+ type: file
55
+ path_prefix: ./out_
56
+ file_ext: tsv
57
+ formatter:
58
+ type: csv
59
+ delimiter: "\t"
60
+ newline: CRLF
61
+ newline_in_field: LF
62
+ charset: UTF-8
63
+ quote_policy: MINIMAL
64
+ quote: '"'
65
+ escape: "\\"
66
+ null_string: 'NULL'
67
+ default_timezone: 'UTC'
@@ -0,0 +1,5 @@
1
+ id,account,time,purchase,comment,data
2
+ 1,32864,2015-01-27 19:23:49,20150127,embulk,"{\"foo\": \"bar\", \"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}"
3
+ 2,14824,2015-01-27 19:01:23,20150127,embulk jruby,NULL
4
+ 3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin",NULL
5
+ 4,11270,2015-01-29 11:54:36,20150129,NULL,NULL
@@ -0,0 +1,70 @@
1
+ module Embulk
2
+ module Filter
3
+
4
+ class RubyProc < FilterPlugin
5
+ Plugin.register_filter("ruby_proc", self)
6
+
7
+ def self.transaction(config, in_schema, &control)
8
+ task = {
9
+ "columns" => config.param("columns", :array),
10
+ "requires" => config.param("requires", :array, default: []),
11
+ }
12
+
13
+ out_columns = in_schema.map do |col|
14
+ target = task["columns"].find { |filter_col| filter_col["name"] == col.name }
15
+ if target
16
+ type = target["type"] ? target["type"].to_sym : col.type
17
+ Embulk::Column.new(index: col.index, name: col.name, type: type || col.type, format: target["format"] || col.format)
18
+ else
19
+ col
20
+ end
21
+ end
22
+
23
+ yield(task, out_columns)
24
+ end
25
+
26
+ def init
27
+ task["requires"].each do |lib|
28
+ require lib
29
+ end
30
+ @procs = Hash[task["columns"].map {|col|
31
+ [col["name"], eval(col["proc"])]
32
+ }]
33
+ @skip_nils = Hash[task["columns"].map {|col|
34
+ [col["name"], col["skip_nil"].nil? ? true : !!col["skip_nil"]]
35
+ }]
36
+ end
37
+
38
+ def close
39
+ end
40
+
41
+ def add(page)
42
+ page.each do |record|
43
+ record_hash = hashrize(record)
44
+ @procs.each do |col, pr|
45
+ next unless record_hash.has_key?(col)
46
+ next if record_hash[col].nil? && @skip_nils[col]
47
+
48
+ if pr.arity == 1
49
+ record_hash[col] = pr.call(record_hash[col])
50
+ else
51
+ record_hash[col] = pr.call(record_hash[col], record_hash)
52
+ end
53
+ end
54
+ page_builder.add(record_hash.values)
55
+ end
56
+ end
57
+
58
+ def finish
59
+ page_builder.finish
60
+ end
61
+
62
+ private
63
+
64
+ def hashrize(record)
65
+ Hash[in_schema.names.zip(record)]
66
+ end
67
+ end
68
+
69
+ end
70
+ end
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-filter-ruby_proc
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - joker1007
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-02-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: embulk
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.8.1
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ">="
23
+ - !ruby/object:Gem::Version
24
+ version: 0.8.1
25
+ prerelease: false
26
+ type: :development
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 1.10.6
34
+ requirement: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: 1.10.6
39
+ prerelease: false
40
+ type: :development
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ version_requirements: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ requirement: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: '10.0'
53
+ prerelease: false
54
+ type: :development
55
+ description: Filter each record by ruby proc
56
+ email:
57
+ - kakyoin.hierophant@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".ruby-version"
64
+ - Gemfile
65
+ - LICENSE.txt
66
+ - README.md
67
+ - Rakefile
68
+ - embulk-filter-ruby_proc.gemspec
69
+ - example/config.yml
70
+ - example/sample_01.csv
71
+ - lib/embulk/filter/ruby_proc.rb
72
+ homepage: https://github.com/kakyoin.hierophant/embulk-filter-ruby_proc
73
+ licenses:
74
+ - MIT
75
+ metadata: {}
76
+ post_install_message:
77
+ rdoc_options: []
78
+ require_paths:
79
+ - lib
80
+ required_ruby_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ required_rubygems_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirements: []
91
+ rubyforge_project:
92
+ rubygems_version: 2.4.8
93
+ signing_key:
94
+ specification_version: 4
95
+ summary: Ruby Proc filter plugin for Embulk
96
+ test_files: []