embulk-filter-ruby_proc 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +17 -0
- data/embulk-filter-ruby_proc.gemspec +1 -1
- data/lib/embulk/filter/ruby_proc.rb +47 -20
- metadata +19 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 727830dc138bf80017b10197c86129c82b61a1e0dc92a5c76548eaea8c23649f
|
4
|
+
data.tar.gz: 4cc54640d0b6f5906b4e91428553a1732adda5e8da4dd4bdfb5ee9a0dd712e8c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 460294e10ed0191f609d06236846c1a0acfcc70626f5b29ad45452ba84e6ddbab777747ded81594e787a63bc5eea9bd9b570d49a9fa906560bb325cd497c3152
|
7
|
+
data.tar.gz: 1cad7c00adb50981afc402c44ad60665aa41bae3ed0e1b6b2b5b492917b0334cb147aa0564f613666181ede81b49da90aec7efda74fc5de374f507216f36575d
|
data/README.md
CHANGED
@@ -51,6 +51,11 @@ filters:
|
|
51
51
|
->(record) do
|
52
52
|
[record.dup, record.dup.tap { |r| r["id"] += 10 }]
|
53
53
|
end
|
54
|
+
skip_rows:
|
55
|
+
- proc: |
|
56
|
+
->(record) do
|
57
|
+
record["id"].odd?
|
58
|
+
end
|
54
59
|
columns:
|
55
60
|
- name: data
|
56
61
|
proc: |
|
@@ -75,6 +80,8 @@ filters:
|
|
75
80
|
|
76
81
|
```
|
77
82
|
|
83
|
+
If you want to skip record in "rows proc" or "columns proc", use `throw :skip_record`.
|
84
|
+
|
78
85
|
```rb
|
79
86
|
# comment_upcase.rb
|
80
87
|
|
@@ -90,6 +97,16 @@ end
|
|
90
97
|
- rows proc must return record hash or array of record hash.
|
91
98
|
- user must take care of object identity. Otherwise, error may be occurred when plugin applys column procs.
|
92
99
|
|
100
|
+
### proc execution order
|
101
|
+
|
102
|
+
1. before procs
|
103
|
+
1. per record
|
104
|
+
1. all row procs
|
105
|
+
1. per record applied row procs
|
106
|
+
1. all skip\_row procs
|
107
|
+
1. column procs
|
108
|
+
1. after procs
|
109
|
+
|
93
110
|
### preview
|
94
111
|
```
|
95
112
|
+-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
|
@@ -33,6 +33,7 @@ module Embulk
|
|
33
33
|
task = {
|
34
34
|
"columns" => config.param("columns", :array, default: []),
|
35
35
|
"rows" => config.param("rows", :array, default: []),
|
36
|
+
"skip_rows" => config.param("skip_rows", :array, default: []),
|
36
37
|
"before" => config.param("before", :array, default: []),
|
37
38
|
"after" => config.param("after", :array, default: []),
|
38
39
|
"requires" => config.param("requires", :array, default: []),
|
@@ -55,6 +56,7 @@ module Embulk
|
|
55
56
|
|
56
57
|
@proc_store ||= {}
|
57
58
|
@row_proc_store ||= {}
|
59
|
+
@skip_row_proc_store ||= {}
|
58
60
|
transaction_id = rand(100000000)
|
59
61
|
until !@proc_store.has_key?(transaction_id)
|
60
62
|
transaction_id = rand(100000000)
|
@@ -83,8 +85,15 @@ module Embulk
|
|
83
85
|
eval(File.read(rowdef["proc_file"]), evaluator_binding, File.expand_path(rowdef["proc_file"]))
|
84
86
|
end
|
85
87
|
}.compact
|
88
|
+
@skip_row_proc_store[transaction_id] = skip_row_procs = task["skip_rows"].map {|rowdef|
|
89
|
+
if rowdef["proc"]
|
90
|
+
eval(rowdef["proc"], evaluator_binding)
|
91
|
+
else
|
92
|
+
eval(File.read(rowdef["proc_file"]), evaluator_binding, File.expand_path(rowdef["proc_file"]))
|
93
|
+
end
|
94
|
+
}.compact
|
86
95
|
task["transaction_id"] = transaction_id
|
87
|
-
raise "Need columns or rows parameter" if procs.empty? && row_procs.empty?
|
96
|
+
raise "Need columns or rows parameter" if procs.empty? && row_procs.empty? && skip_row_procs.empty?
|
88
97
|
|
89
98
|
before_procs.each do |pr|
|
90
99
|
pr.call
|
@@ -113,6 +122,10 @@ module Embulk
|
|
113
122
|
@row_proc_store
|
114
123
|
end
|
115
124
|
|
125
|
+
def self.skip_row_proc_store
|
126
|
+
@skip_row_proc_store
|
127
|
+
end
|
128
|
+
|
116
129
|
def self.parse_col_procs(columns, evaluator_binding)
|
117
130
|
Hash[columns.map {|col|
|
118
131
|
if col["proc"]
|
@@ -138,13 +151,15 @@ module Embulk
|
|
138
151
|
require lib
|
139
152
|
end
|
140
153
|
|
141
|
-
if self.class.proc_store.nil? || self.class.row_proc_store.nil?
|
154
|
+
if self.class.proc_store.nil? || self.class.row_proc_store.nil? || self.class.skip_row_proc_store.nil?
|
142
155
|
evaluator_binding = Evaluator.new(task["variables"]).get_binding
|
143
156
|
@procs = self.class.parse_col_procs(task["columns"], evaluator_binding)
|
144
157
|
@row_procs = self.class.parse_row_procs(task["rows"], evaluator_binding)
|
158
|
+
@skip_row_procs = self.class.parse_row_procs(task["skip_rows"], evaluator_binding)
|
145
159
|
else
|
146
160
|
@procs = self.class.proc_store[task["transaction_id"]]
|
147
161
|
@row_procs = self.class.row_proc_store[task["transaction_id"]]
|
162
|
+
@skip_row_procs = self.class.skip_row_proc_store[task["transaction_id"]]
|
148
163
|
end
|
149
164
|
@skip_nils = Hash[task["columns"].map {|col|
|
150
165
|
[col["name"], col["skip_nil"].nil? ? true : !!col["skip_nil"]]
|
@@ -160,32 +175,40 @@ module Embulk
|
|
160
175
|
record_hashes = [hashrize(record)]
|
161
176
|
else
|
162
177
|
record_hashes = row_procs.each_with_object([]) do |pr, arr|
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
178
|
+
catch :skip_record do
|
179
|
+
result = pr.call(hashrize(record))
|
180
|
+
case result
|
181
|
+
when Array
|
182
|
+
result.each do |r|
|
183
|
+
arr << r
|
184
|
+
end
|
185
|
+
when Hash
|
186
|
+
arr << result
|
187
|
+
else
|
188
|
+
raise "row proc return value must be a Array or Hash"
|
168
189
|
end
|
169
|
-
when Hash
|
170
|
-
arr << result
|
171
|
-
else
|
172
|
-
raise "row proc return value must be a Array or Hash"
|
173
190
|
end
|
174
191
|
end
|
175
192
|
end
|
176
193
|
|
177
194
|
record_hashes.each do |record_hash|
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
if pr.arity == 1
|
183
|
-
record_hash[col] = pr.call(record_hash[col])
|
184
|
-
else
|
185
|
-
record_hash[col] = pr.call(record_hash[col], record_hash)
|
195
|
+
catch :skip_record do
|
196
|
+
skip_row_procs.each do |pr|
|
197
|
+
throw :skip_record if pr.call(record_hash)
|
186
198
|
end
|
199
|
+
|
200
|
+
procs.each do |col, pr|
|
201
|
+
next unless record_hash.has_key?(col)
|
202
|
+
next if record_hash[col].nil? && skip_nils[col]
|
203
|
+
|
204
|
+
if pr.arity == 1
|
205
|
+
record_hash[col] = pr.call(record_hash[col])
|
206
|
+
else
|
207
|
+
record_hash[col] = pr.call(record_hash[col], record_hash)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
page_builder.add(record_hash.values)
|
187
211
|
end
|
188
|
-
page_builder.add(record_hash.values)
|
189
212
|
end
|
190
213
|
end
|
191
214
|
end
|
@@ -208,6 +231,10 @@ module Embulk
|
|
208
231
|
@row_procs
|
209
232
|
end
|
210
233
|
|
234
|
+
def skip_row_procs
|
235
|
+
@skip_row_procs
|
236
|
+
end
|
237
|
+
|
211
238
|
def skip_nils
|
212
239
|
@skip_nils
|
213
240
|
end
|
metadata
CHANGED
@@ -1,57 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-ruby_proc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joker1007
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-01-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: embulk
|
15
|
-
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: 0.8.1
|
20
|
-
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
21
23
|
requirements:
|
22
24
|
- - ">="
|
23
25
|
- !ruby/object:Gem::Version
|
24
26
|
version: 0.8.1
|
25
|
-
prerelease: false
|
26
|
-
type: :development
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
|
-
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 1.10.6
|
34
|
-
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
35
37
|
requirements:
|
36
38
|
- - ">="
|
37
39
|
- !ruby/object:Gem::Version
|
38
40
|
version: 1.10.6
|
39
|
-
prerelease: false
|
40
|
-
type: :development
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rake
|
43
|
-
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '10.0'
|
48
|
-
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
49
51
|
requirements:
|
50
52
|
- - ">="
|
51
53
|
- !ruby/object:Gem::Version
|
52
54
|
version: '10.0'
|
53
|
-
prerelease: false
|
54
|
-
type: :development
|
55
55
|
description: Filter each record by ruby proc
|
56
56
|
email:
|
57
57
|
- kakyoin.hierophant@gmail.com
|
@@ -74,7 +74,7 @@ homepage: https://github.com/joker1007/embulk-filter-ruby_proc
|
|
74
74
|
licenses:
|
75
75
|
- MIT
|
76
76
|
metadata: {}
|
77
|
-
post_install_message:
|
77
|
+
post_install_message:
|
78
78
|
rdoc_options: []
|
79
79
|
require_paths:
|
80
80
|
- lib
|
@@ -89,9 +89,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
89
89
|
- !ruby/object:Gem::Version
|
90
90
|
version: '0'
|
91
91
|
requirements: []
|
92
|
-
rubyforge_project:
|
93
|
-
rubygems_version: 2.4
|
94
|
-
signing_key:
|
92
|
+
rubyforge_project:
|
93
|
+
rubygems_version: 2.7.4
|
94
|
+
signing_key:
|
95
95
|
specification_version: 4
|
96
96
|
summary: Ruby Proc filter plugin for Embulk
|
97
97
|
test_files: []
|