embulk-filter-ruby_proc 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.ruby-version +1 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +21 -0
- data/README.md +80 -0
- data/Rakefile +3 -0
- data/embulk-filter-ruby_proc.gemspec +19 -0
- data/example/config.yml +67 -0
- data/example/sample_01.csv +5 -0
- data/lib/embulk/filter/ruby_proc.rb +70 -0
- metadata +96 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 911204f43abd2acd2d9ac90317e474994fbdf221
|
4
|
+
data.tar.gz: 4521e06844679d721b5e9a21e7d01a368b600278
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8da351fbf8fe0f39f9e29289bd0365bce6ecc03af0f833c2088fda6fdc9a7b0789781f99a70b44e452adda9824804e192e8256e1290e5f61154c5d497084f3d9
|
7
|
+
data.tar.gz: 2f1ad9b592fb2210ada815239e5bed14f1f4265c2fc43ee715102542e87f97dacef2998031ad4b44b693ecf0fc3aef1b6afdd0b3ea2ce4e3e9c6da32d2fbd600
|
data/.gitignore
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
jruby-9.0.4.0
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
|
2
|
+
MIT License
|
3
|
+
|
4
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
a copy of this software and associated documentation files (the
|
6
|
+
"Software"), to deal in the Software without restriction, including
|
7
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
# Ruby Proc filter plugin for Embulk
|
2
|
+
|
3
|
+
This plugin is inspired by [mgi166/embulk-filter-eval: Eval ruby code on filtering](https://github.com/mgi166/embulk-filter-eval "mgi166/embulk-filter-eval: Eval ruby code on filtering")
|
4
|
+
|
5
|
+
This plugin apply ruby proc to each record.
|
6
|
+
|
7
|
+
## Overview
|
8
|
+
|
9
|
+
* **Plugin type**: filter
|
10
|
+
|
11
|
+
## Configuration
|
12
|
+
|
13
|
+
- **columns**: filter definition (hash, required)
|
14
|
+
- **requires**: pre required libraries (array, default: `[]`)
|
15
|
+
|
16
|
+
## Example
|
17
|
+
|
18
|
+
### input
|
19
|
+
```csv
|
20
|
+
id,account,time,purchase,comment,data
|
21
|
+
1,32864,2015-01-27 19:23:49,20150127,embulk,"{\"foo\": \"bar\", \"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}"
|
22
|
+
2,14824,2015-01-27 19:01:23,20150127,embulk jruby,NULL
|
23
|
+
3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin",NULL
|
24
|
+
4,11270,2015-01-29 11:54:36,20150129,NULL,NULL
|
25
|
+
```
|
26
|
+
|
27
|
+
### config
|
28
|
+
```yaml
|
29
|
+
# ...
|
30
|
+
|
31
|
+
filters:
|
32
|
+
- type: ruby_proc
|
33
|
+
requires:
|
34
|
+
- cgi
|
35
|
+
columns:
|
36
|
+
- name: data
|
37
|
+
proc: |
|
38
|
+
->(data) do
|
39
|
+
data["events"] = data["events"].map.with_index do |e, idx|
|
40
|
+
e.tap { |e_| e_["idx"] = idx }
|
41
|
+
end
|
42
|
+
data.to_json
|
43
|
+
end
|
44
|
+
- name: id
|
45
|
+
proc: |
|
46
|
+
->(id) do
|
47
|
+
id * 2
|
48
|
+
end
|
49
|
+
type: string
|
50
|
+
- name: comment
|
51
|
+
proc: |
|
52
|
+
->(comment, record) do
|
53
|
+
return [record["account"].to_s].to_json unless comment
|
54
|
+
comment.upcase.split(" ").map { |s| CGI.escape(s) }.to_json
|
55
|
+
end
|
56
|
+
skip_nil: false
|
57
|
+
type: json
|
58
|
+
target: events
|
59
|
+
|
60
|
+
# ...
|
61
|
+
|
62
|
+
```
|
63
|
+
|
64
|
+
### preview
|
65
|
+
```
|
66
|
+
+-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
|
67
|
+
| id:string | account:long | time:timestamp | purchase:timestamp | comment:json | data:json |
|
68
|
+
+-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
|
69
|
+
| 2 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK"] | {"events":[{"id":1,"name":"Name1","idx":0},{"id":2,"name":"Name2","idx":1}],"foo":"bar"} |
|
70
|
+
| 4 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK","JRUBY"] | |
|
71
|
+
| 6 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | ["EMBULK","%22CSV%22","PARSER","PLUGIN"] | |
|
72
|
+
| 8 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | ["11270"] | |
|
73
|
+
+-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
|
74
|
+
```
|
75
|
+
|
76
|
+
## Build
|
77
|
+
|
78
|
+
```
|
79
|
+
$ rake
|
80
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
|
2
|
+
Gem::Specification.new do |spec|
|
3
|
+
spec.name = "embulk-filter-ruby_proc"
|
4
|
+
spec.version = "0.1.0"
|
5
|
+
spec.authors = ["joker1007"]
|
6
|
+
spec.summary = "Ruby Proc filter plugin for Embulk"
|
7
|
+
spec.description = "Filter each record by ruby proc"
|
8
|
+
spec.email = ["kakyoin.hierophant@gmail.com"]
|
9
|
+
spec.licenses = ["MIT"]
|
10
|
+
spec.homepage = "https://github.com/kakyoin.hierophant/embulk-filter-ruby_proc"
|
11
|
+
|
12
|
+
spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
|
13
|
+
spec.test_files = spec.files.grep(%r{^(test|spec)/})
|
14
|
+
spec.require_paths = ["lib"]
|
15
|
+
|
16
|
+
spec.add_development_dependency 'embulk', ['>= 0.8.1']
|
17
|
+
spec.add_development_dependency 'bundler', ['>= 1.10.6']
|
18
|
+
spec.add_development_dependency 'rake', ['>= 10.0']
|
19
|
+
end
|
data/example/config.yml
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: ./sample_
|
4
|
+
parser:
|
5
|
+
charset: UTF-8
|
6
|
+
newline: CRLF
|
7
|
+
type: csv
|
8
|
+
delimiter: ','
|
9
|
+
quote: '"'
|
10
|
+
escape: '\'
|
11
|
+
null_string: 'NULL'
|
12
|
+
trim_if_not_quoted: false
|
13
|
+
skip_header_lines: 1
|
14
|
+
allow_extra_columns: false
|
15
|
+
allow_optional_columns: false
|
16
|
+
columns:
|
17
|
+
- {name: id, type: long}
|
18
|
+
- {name: account, type: long}
|
19
|
+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
20
|
+
- {name: purchase, type: timestamp, format: '%Y%m%d'}
|
21
|
+
- {name: comment, type: string}
|
22
|
+
- {name: data, type: json}
|
23
|
+
|
24
|
+
filters:
|
25
|
+
- type: ruby_proc
|
26
|
+
requires:
|
27
|
+
- cgi
|
28
|
+
columns:
|
29
|
+
- name: data
|
30
|
+
proc: |
|
31
|
+
->(data) do
|
32
|
+
data["events"] = data["events"].map.with_index do |e, idx|
|
33
|
+
e.tap { |e_| e_["idx"] = idx }
|
34
|
+
end
|
35
|
+
data.to_json
|
36
|
+
end
|
37
|
+
- name: id
|
38
|
+
proc: |
|
39
|
+
->(id) do
|
40
|
+
id * 2
|
41
|
+
end
|
42
|
+
type: string
|
43
|
+
- name: comment
|
44
|
+
proc: |
|
45
|
+
->(comment, record) do
|
46
|
+
return [record["account"].to_s].to_json unless comment
|
47
|
+
comment.upcase.split(" ").map { |s| CGI.escape(s) }.to_json
|
48
|
+
end
|
49
|
+
skip_nil: false
|
50
|
+
type: json
|
51
|
+
target: events
|
52
|
+
|
53
|
+
out:
|
54
|
+
type: file
|
55
|
+
path_prefix: ./out_
|
56
|
+
file_ext: tsv
|
57
|
+
formatter:
|
58
|
+
type: csv
|
59
|
+
delimiter: "\t"
|
60
|
+
newline: CRLF
|
61
|
+
newline_in_field: LF
|
62
|
+
charset: UTF-8
|
63
|
+
quote_policy: MINIMAL
|
64
|
+
quote: '"'
|
65
|
+
escape: "\\"
|
66
|
+
null_string: 'NULL'
|
67
|
+
default_timezone: 'UTC'
|
@@ -0,0 +1,5 @@
|
|
1
|
+
id,account,time,purchase,comment,data
|
2
|
+
1,32864,2015-01-27 19:23:49,20150127,embulk,"{\"foo\": \"bar\", \"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}"
|
3
|
+
2,14824,2015-01-27 19:01:23,20150127,embulk jruby,NULL
|
4
|
+
3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin",NULL
|
5
|
+
4,11270,2015-01-29 11:54:36,20150129,NULL,NULL
|
@@ -0,0 +1,70 @@
|
|
1
|
+
module Embulk
|
2
|
+
module Filter
|
3
|
+
|
4
|
+
class RubyProc < FilterPlugin
|
5
|
+
Plugin.register_filter("ruby_proc", self)
|
6
|
+
|
7
|
+
def self.transaction(config, in_schema, &control)
|
8
|
+
task = {
|
9
|
+
"columns" => config.param("columns", :array),
|
10
|
+
"requires" => config.param("requires", :array, default: []),
|
11
|
+
}
|
12
|
+
|
13
|
+
out_columns = in_schema.map do |col|
|
14
|
+
target = task["columns"].find { |filter_col| filter_col["name"] == col.name }
|
15
|
+
if target
|
16
|
+
type = target["type"] ? target["type"].to_sym : col.type
|
17
|
+
Embulk::Column.new(index: col.index, name: col.name, type: type || col.type, format: target["format"] || col.format)
|
18
|
+
else
|
19
|
+
col
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
yield(task, out_columns)
|
24
|
+
end
|
25
|
+
|
26
|
+
def init
|
27
|
+
task["requires"].each do |lib|
|
28
|
+
require lib
|
29
|
+
end
|
30
|
+
@procs = Hash[task["columns"].map {|col|
|
31
|
+
[col["name"], eval(col["proc"])]
|
32
|
+
}]
|
33
|
+
@skip_nils = Hash[task["columns"].map {|col|
|
34
|
+
[col["name"], col["skip_nil"].nil? ? true : !!col["skip_nil"]]
|
35
|
+
}]
|
36
|
+
end
|
37
|
+
|
38
|
+
def close
|
39
|
+
end
|
40
|
+
|
41
|
+
def add(page)
|
42
|
+
page.each do |record|
|
43
|
+
record_hash = hashrize(record)
|
44
|
+
@procs.each do |col, pr|
|
45
|
+
next unless record_hash.has_key?(col)
|
46
|
+
next if record_hash[col].nil? && @skip_nils[col]
|
47
|
+
|
48
|
+
if pr.arity == 1
|
49
|
+
record_hash[col] = pr.call(record_hash[col])
|
50
|
+
else
|
51
|
+
record_hash[col] = pr.call(record_hash[col], record_hash)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
page_builder.add(record_hash.values)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def finish
|
59
|
+
page_builder.finish
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def hashrize(record)
|
65
|
+
Hash[in_schema.names.zip(record)]
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
metadata
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: embulk-filter-ruby_proc
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- joker1007
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-02-01 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: embulk
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.8.1
|
20
|
+
requirement: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - ">="
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: 0.8.1
|
25
|
+
prerelease: false
|
26
|
+
type: :development
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.10.6
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - ">="
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: 1.10.6
|
39
|
+
prerelease: false
|
40
|
+
type: :development
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
version_requirements: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '10.0'
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '10.0'
|
53
|
+
prerelease: false
|
54
|
+
type: :development
|
55
|
+
description: Filter each record by ruby proc
|
56
|
+
email:
|
57
|
+
- kakyoin.hierophant@gmail.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- ".gitignore"
|
63
|
+
- ".ruby-version"
|
64
|
+
- Gemfile
|
65
|
+
- LICENSE.txt
|
66
|
+
- README.md
|
67
|
+
- Rakefile
|
68
|
+
- embulk-filter-ruby_proc.gemspec
|
69
|
+
- example/config.yml
|
70
|
+
- example/sample_01.csv
|
71
|
+
- lib/embulk/filter/ruby_proc.rb
|
72
|
+
homepage: https://github.com/kakyoin.hierophant/embulk-filter-ruby_proc
|
73
|
+
licenses:
|
74
|
+
- MIT
|
75
|
+
metadata: {}
|
76
|
+
post_install_message:
|
77
|
+
rdoc_options: []
|
78
|
+
require_paths:
|
79
|
+
- lib
|
80
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
requirements: []
|
91
|
+
rubyforge_project:
|
92
|
+
rubygems_version: 2.4.8
|
93
|
+
signing_key:
|
94
|
+
specification_version: 4
|
95
|
+
summary: Ruby Proc filter plugin for Embulk
|
96
|
+
test_files: []
|