embulk-filter-ruby_proc 0.7.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +17 -0
- data/embulk-filter-ruby_proc.gemspec +1 -1
- data/lib/embulk/filter/ruby_proc.rb +47 -20
- metadata +19 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 727830dc138bf80017b10197c86129c82b61a1e0dc92a5c76548eaea8c23649f
|
4
|
+
data.tar.gz: 4cc54640d0b6f5906b4e91428553a1732adda5e8da4dd4bdfb5ee9a0dd712e8c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 460294e10ed0191f609d06236846c1a0acfcc70626f5b29ad45452ba84e6ddbab777747ded81594e787a63bc5eea9bd9b570d49a9fa906560bb325cd497c3152
|
7
|
+
data.tar.gz: 1cad7c00adb50981afc402c44ad60665aa41bae3ed0e1b6b2b5b492917b0334cb147aa0564f613666181ede81b49da90aec7efda74fc5de374f507216f36575d
|
data/README.md
CHANGED
@@ -51,6 +51,11 @@ filters:
|
|
51
51
|
->(record) do
|
52
52
|
[record.dup, record.dup.tap { |r| r["id"] += 10 }]
|
53
53
|
end
|
54
|
+
skip_rows:
|
55
|
+
- proc: |
|
56
|
+
->(record) do
|
57
|
+
record["id"].odd?
|
58
|
+
end
|
54
59
|
columns:
|
55
60
|
- name: data
|
56
61
|
proc: |
|
@@ -75,6 +80,8 @@ filters:
|
|
75
80
|
|
76
81
|
```
|
77
82
|
|
83
|
+
If you want to skip record in "rows proc" or "columns proc", use `throw :skip_record`.
|
84
|
+
|
78
85
|
```rb
|
79
86
|
# comment_upcase.rb
|
80
87
|
|
@@ -90,6 +97,16 @@ end
|
|
90
97
|
- rows proc must return record hash or array of record hash.
|
91
98
|
- user must take care of object identity. Otherwise, error may be occurred when plugin applys column procs.
|
92
99
|
|
100
|
+
### proc execution order
|
101
|
+
|
102
|
+
1. before procs
|
103
|
+
1. per record
|
104
|
+
1. all row procs
|
105
|
+
1. per record applied row procs
|
106
|
+
1. all skip\_row procs
|
107
|
+
1. column procs
|
108
|
+
1. after procs
|
109
|
+
|
93
110
|
### preview
|
94
111
|
```
|
95
112
|
+-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
|
@@ -33,6 +33,7 @@ module Embulk
|
|
33
33
|
task = {
|
34
34
|
"columns" => config.param("columns", :array, default: []),
|
35
35
|
"rows" => config.param("rows", :array, default: []),
|
36
|
+
"skip_rows" => config.param("skip_rows", :array, default: []),
|
36
37
|
"before" => config.param("before", :array, default: []),
|
37
38
|
"after" => config.param("after", :array, default: []),
|
38
39
|
"requires" => config.param("requires", :array, default: []),
|
@@ -55,6 +56,7 @@ module Embulk
|
|
55
56
|
|
56
57
|
@proc_store ||= {}
|
57
58
|
@row_proc_store ||= {}
|
59
|
+
@skip_row_proc_store ||= {}
|
58
60
|
transaction_id = rand(100000000)
|
59
61
|
until !@proc_store.has_key?(transaction_id)
|
60
62
|
transaction_id = rand(100000000)
|
@@ -83,8 +85,15 @@ module Embulk
|
|
83
85
|
eval(File.read(rowdef["proc_file"]), evaluator_binding, File.expand_path(rowdef["proc_file"]))
|
84
86
|
end
|
85
87
|
}.compact
|
88
|
+
@skip_row_proc_store[transaction_id] = skip_row_procs = task["skip_rows"].map {|rowdef|
|
89
|
+
if rowdef["proc"]
|
90
|
+
eval(rowdef["proc"], evaluator_binding)
|
91
|
+
else
|
92
|
+
eval(File.read(rowdef["proc_file"]), evaluator_binding, File.expand_path(rowdef["proc_file"]))
|
93
|
+
end
|
94
|
+
}.compact
|
86
95
|
task["transaction_id"] = transaction_id
|
87
|
-
raise "Need columns or rows parameter" if procs.empty? && row_procs.empty?
|
96
|
+
raise "Need columns or rows parameter" if procs.empty? && row_procs.empty? && skip_row_procs.empty?
|
88
97
|
|
89
98
|
before_procs.each do |pr|
|
90
99
|
pr.call
|
@@ -113,6 +122,10 @@ module Embulk
|
|
113
122
|
@row_proc_store
|
114
123
|
end
|
115
124
|
|
125
|
+
def self.skip_row_proc_store
|
126
|
+
@skip_row_proc_store
|
127
|
+
end
|
128
|
+
|
116
129
|
def self.parse_col_procs(columns, evaluator_binding)
|
117
130
|
Hash[columns.map {|col|
|
118
131
|
if col["proc"]
|
@@ -138,13 +151,15 @@ module Embulk
|
|
138
151
|
require lib
|
139
152
|
end
|
140
153
|
|
141
|
-
if self.class.proc_store.nil? || self.class.row_proc_store.nil?
|
154
|
+
if self.class.proc_store.nil? || self.class.row_proc_store.nil? || self.class.skip_row_proc_store.nil?
|
142
155
|
evaluator_binding = Evaluator.new(task["variables"]).get_binding
|
143
156
|
@procs = self.class.parse_col_procs(task["columns"], evaluator_binding)
|
144
157
|
@row_procs = self.class.parse_row_procs(task["rows"], evaluator_binding)
|
158
|
+
@skip_row_procs = self.class.parse_row_procs(task["skip_rows"], evaluator_binding)
|
145
159
|
else
|
146
160
|
@procs = self.class.proc_store[task["transaction_id"]]
|
147
161
|
@row_procs = self.class.row_proc_store[task["transaction_id"]]
|
162
|
+
@skip_row_procs = self.class.skip_row_proc_store[task["transaction_id"]]
|
148
163
|
end
|
149
164
|
@skip_nils = Hash[task["columns"].map {|col|
|
150
165
|
[col["name"], col["skip_nil"].nil? ? true : !!col["skip_nil"]]
|
@@ -160,32 +175,40 @@ module Embulk
|
|
160
175
|
record_hashes = [hashrize(record)]
|
161
176
|
else
|
162
177
|
record_hashes = row_procs.each_with_object([]) do |pr, arr|
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
178
|
+
catch :skip_record do
|
179
|
+
result = pr.call(hashrize(record))
|
180
|
+
case result
|
181
|
+
when Array
|
182
|
+
result.each do |r|
|
183
|
+
arr << r
|
184
|
+
end
|
185
|
+
when Hash
|
186
|
+
arr << result
|
187
|
+
else
|
188
|
+
raise "row proc return value must be a Array or Hash"
|
168
189
|
end
|
169
|
-
when Hash
|
170
|
-
arr << result
|
171
|
-
else
|
172
|
-
raise "row proc return value must be a Array or Hash"
|
173
190
|
end
|
174
191
|
end
|
175
192
|
end
|
176
193
|
|
177
194
|
record_hashes.each do |record_hash|
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
if pr.arity == 1
|
183
|
-
record_hash[col] = pr.call(record_hash[col])
|
184
|
-
else
|
185
|
-
record_hash[col] = pr.call(record_hash[col], record_hash)
|
195
|
+
catch :skip_record do
|
196
|
+
skip_row_procs.each do |pr|
|
197
|
+
throw :skip_record if pr.call(record_hash)
|
186
198
|
end
|
199
|
+
|
200
|
+
procs.each do |col, pr|
|
201
|
+
next unless record_hash.has_key?(col)
|
202
|
+
next if record_hash[col].nil? && skip_nils[col]
|
203
|
+
|
204
|
+
if pr.arity == 1
|
205
|
+
record_hash[col] = pr.call(record_hash[col])
|
206
|
+
else
|
207
|
+
record_hash[col] = pr.call(record_hash[col], record_hash)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
page_builder.add(record_hash.values)
|
187
211
|
end
|
188
|
-
page_builder.add(record_hash.values)
|
189
212
|
end
|
190
213
|
end
|
191
214
|
end
|
@@ -208,6 +231,10 @@ module Embulk
|
|
208
231
|
@row_procs
|
209
232
|
end
|
210
233
|
|
234
|
+
def skip_row_procs
|
235
|
+
@skip_row_procs
|
236
|
+
end
|
237
|
+
|
211
238
|
def skip_nils
|
212
239
|
@skip_nils
|
213
240
|
end
|
metadata
CHANGED
@@ -1,57 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-ruby_proc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joker1007
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-01-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: embulk
|
15
|
-
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: 0.8.1
|
20
|
-
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
21
23
|
requirements:
|
22
24
|
- - ">="
|
23
25
|
- !ruby/object:Gem::Version
|
24
26
|
version: 0.8.1
|
25
|
-
prerelease: false
|
26
|
-
type: :development
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
|
-
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 1.10.6
|
34
|
-
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
35
37
|
requirements:
|
36
38
|
- - ">="
|
37
39
|
- !ruby/object:Gem::Version
|
38
40
|
version: 1.10.6
|
39
|
-
prerelease: false
|
40
|
-
type: :development
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rake
|
43
|
-
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '10.0'
|
48
|
-
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
49
51
|
requirements:
|
50
52
|
- - ">="
|
51
53
|
- !ruby/object:Gem::Version
|
52
54
|
version: '10.0'
|
53
|
-
prerelease: false
|
54
|
-
type: :development
|
55
55
|
description: Filter each record by ruby proc
|
56
56
|
email:
|
57
57
|
- kakyoin.hierophant@gmail.com
|
@@ -74,7 +74,7 @@ homepage: https://github.com/joker1007/embulk-filter-ruby_proc
|
|
74
74
|
licenses:
|
75
75
|
- MIT
|
76
76
|
metadata: {}
|
77
|
-
post_install_message:
|
77
|
+
post_install_message:
|
78
78
|
rdoc_options: []
|
79
79
|
require_paths:
|
80
80
|
- lib
|
@@ -89,9 +89,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
89
89
|
- !ruby/object:Gem::Version
|
90
90
|
version: '0'
|
91
91
|
requirements: []
|
92
|
-
rubyforge_project:
|
93
|
-
rubygems_version: 2.4
|
94
|
-
signing_key:
|
92
|
+
rubyforge_project:
|
93
|
+
rubygems_version: 2.7.4
|
94
|
+
signing_key:
|
95
95
|
specification_version: 4
|
96
96
|
summary: Ruby Proc filter plugin for Embulk
|
97
97
|
test_files: []
|