embulk-filter-ruby_proc 0.7.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 754bf37f61d14a24054997630406b8c9b02e7aff
4
- data.tar.gz: c47c596b3745548ac8919e7be92727e6157bfd70
2
+ SHA256:
3
+ metadata.gz: 727830dc138bf80017b10197c86129c82b61a1e0dc92a5c76548eaea8c23649f
4
+ data.tar.gz: 4cc54640d0b6f5906b4e91428553a1732adda5e8da4dd4bdfb5ee9a0dd712e8c
5
5
  SHA512:
6
- metadata.gz: acf1ee4540152e40b7e706466b67f1f4f48eb23d200c988a3d46fe21dce669c19897c8f000cfbcf55f600c7c908a9accd7b51b268ecc7c9ad92f1cf1c5a733df
7
- data.tar.gz: d3757d66ef78d46463a533b436fb4ae58c2e8a7fe09c9baad6c238eaca5204a5d6b76de095c7733ffa6e9f289056d7879e6e2e6fe534a82c7c83cbe55bde5fa6
6
+ metadata.gz: 460294e10ed0191f609d06236846c1a0acfcc70626f5b29ad45452ba84e6ddbab777747ded81594e787a63bc5eea9bd9b570d49a9fa906560bb325cd497c3152
7
+ data.tar.gz: 1cad7c00adb50981afc402c44ad60665aa41bae3ed0e1b6b2b5b492917b0334cb147aa0564f613666181ede81b49da90aec7efda74fc5de374f507216f36575d
data/README.md CHANGED
@@ -51,6 +51,11 @@ filters:
51
51
  ->(record) do
52
52
  [record.dup, record.dup.tap { |r| r["id"] += 10 }]
53
53
  end
54
+ skip_rows:
55
+ - proc: |
56
+ ->(record) do
57
+ record["id"].odd?
58
+ end
54
59
  columns:
55
60
  - name: data
56
61
  proc: |
@@ -75,6 +80,8 @@ filters:
75
80
 
76
81
  ```
77
82
 
83
+ If you want to skip record in "rows proc" or "columns proc", use `throw :skip_record`.
84
+
78
85
  ```rb
79
86
  # comment_upcase.rb
80
87
 
@@ -90,6 +97,16 @@ end
90
97
  - rows proc must return record hash or array of record hash.
91
98
  - user must take care of object identity. Otherwise, error may be occurred when plugin applys column procs.
92
99
 
100
+ ### proc execution order
101
+
102
+ 1. before procs
103
+ 1. per record
104
+ 1. all row procs
105
+ 1. per record applied row procs
106
+ 1. all skip\_row procs
107
+ 1. column procs
108
+ 1. after procs
109
+
93
110
  ### preview
94
111
  ```
95
112
  +-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-filter-ruby_proc"
4
- spec.version = "0.7.0"
4
+ spec.version = "0.8.0"
5
5
  spec.authors = ["joker1007"]
6
6
  spec.summary = "Ruby Proc filter plugin for Embulk"
7
7
  spec.description = "Filter each record by ruby proc"
@@ -33,6 +33,7 @@ module Embulk
33
33
  task = {
34
34
  "columns" => config.param("columns", :array, default: []),
35
35
  "rows" => config.param("rows", :array, default: []),
36
+ "skip_rows" => config.param("skip_rows", :array, default: []),
36
37
  "before" => config.param("before", :array, default: []),
37
38
  "after" => config.param("after", :array, default: []),
38
39
  "requires" => config.param("requires", :array, default: []),
@@ -55,6 +56,7 @@ module Embulk
55
56
 
56
57
  @proc_store ||= {}
57
58
  @row_proc_store ||= {}
59
+ @skip_row_proc_store ||= {}
58
60
  transaction_id = rand(100000000)
59
61
  until !@proc_store.has_key?(transaction_id)
60
62
  transaction_id = rand(100000000)
@@ -83,8 +85,15 @@ module Embulk
83
85
  eval(File.read(rowdef["proc_file"]), evaluator_binding, File.expand_path(rowdef["proc_file"]))
84
86
  end
85
87
  }.compact
88
+ @skip_row_proc_store[transaction_id] = skip_row_procs = task["skip_rows"].map {|rowdef|
89
+ if rowdef["proc"]
90
+ eval(rowdef["proc"], evaluator_binding)
91
+ else
92
+ eval(File.read(rowdef["proc_file"]), evaluator_binding, File.expand_path(rowdef["proc_file"]))
93
+ end
94
+ }.compact
86
95
  task["transaction_id"] = transaction_id
87
- raise "Need columns or rows parameter" if procs.empty? && row_procs.empty?
96
+ raise "Need columns or rows parameter" if procs.empty? && row_procs.empty? && skip_row_procs.empty?
88
97
 
89
98
  before_procs.each do |pr|
90
99
  pr.call
@@ -113,6 +122,10 @@ module Embulk
113
122
  @row_proc_store
114
123
  end
115
124
 
125
+ def self.skip_row_proc_store
126
+ @skip_row_proc_store
127
+ end
128
+
116
129
  def self.parse_col_procs(columns, evaluator_binding)
117
130
  Hash[columns.map {|col|
118
131
  if col["proc"]
@@ -138,13 +151,15 @@ module Embulk
138
151
  require lib
139
152
  end
140
153
 
141
- if self.class.proc_store.nil? || self.class.row_proc_store.nil?
154
+ if self.class.proc_store.nil? || self.class.row_proc_store.nil? || self.class.skip_row_proc_store.nil?
142
155
  evaluator_binding = Evaluator.new(task["variables"]).get_binding
143
156
  @procs = self.class.parse_col_procs(task["columns"], evaluator_binding)
144
157
  @row_procs = self.class.parse_row_procs(task["rows"], evaluator_binding)
158
+ @skip_row_procs = self.class.parse_row_procs(task["skip_rows"], evaluator_binding)
145
159
  else
146
160
  @procs = self.class.proc_store[task["transaction_id"]]
147
161
  @row_procs = self.class.row_proc_store[task["transaction_id"]]
162
+ @skip_row_procs = self.class.skip_row_proc_store[task["transaction_id"]]
148
163
  end
149
164
  @skip_nils = Hash[task["columns"].map {|col|
150
165
  [col["name"], col["skip_nil"].nil? ? true : !!col["skip_nil"]]
@@ -160,32 +175,40 @@ module Embulk
160
175
  record_hashes = [hashrize(record)]
161
176
  else
162
177
  record_hashes = row_procs.each_with_object([]) do |pr, arr|
163
- result = pr.call(hashrize(record))
164
- case result
165
- when Array
166
- result.each do |r|
167
- arr << r
178
+ catch :skip_record do
179
+ result = pr.call(hashrize(record))
180
+ case result
181
+ when Array
182
+ result.each do |r|
183
+ arr << r
184
+ end
185
+ when Hash
186
+ arr << result
187
+ else
188
+ raise "row proc return value must be a Array or Hash"
168
189
  end
169
- when Hash
170
- arr << result
171
- else
172
- raise "row proc return value must be a Array or Hash"
173
190
  end
174
191
  end
175
192
  end
176
193
 
177
194
  record_hashes.each do |record_hash|
178
- procs.each do |col, pr|
179
- next unless record_hash.has_key?(col)
180
- next if record_hash[col].nil? && skip_nils[col]
181
-
182
- if pr.arity == 1
183
- record_hash[col] = pr.call(record_hash[col])
184
- else
185
- record_hash[col] = pr.call(record_hash[col], record_hash)
195
+ catch :skip_record do
196
+ skip_row_procs.each do |pr|
197
+ throw :skip_record if pr.call(record_hash)
186
198
  end
199
+
200
+ procs.each do |col, pr|
201
+ next unless record_hash.has_key?(col)
202
+ next if record_hash[col].nil? && skip_nils[col]
203
+
204
+ if pr.arity == 1
205
+ record_hash[col] = pr.call(record_hash[col])
206
+ else
207
+ record_hash[col] = pr.call(record_hash[col], record_hash)
208
+ end
209
+ end
210
+ page_builder.add(record_hash.values)
187
211
  end
188
- page_builder.add(record_hash.values)
189
212
  end
190
213
  end
191
214
  end
@@ -208,6 +231,10 @@ module Embulk
208
231
  @row_procs
209
232
  end
210
233
 
234
+ def skip_row_procs
235
+ @skip_row_procs
236
+ end
237
+
211
238
  def skip_nils
212
239
  @skip_nils
213
240
  end
metadata CHANGED
@@ -1,57 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-ruby_proc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-08-14 00:00:00.000000000 Z
11
+ date: 2018-01-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: embulk
15
- version_requirements: !ruby/object:Gem::Requirement
15
+ requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: 0.8.1
20
- requirement: !ruby/object:Gem::Requirement
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
21
23
  requirements:
22
24
  - - ">="
23
25
  - !ruby/object:Gem::Version
24
26
  version: 0.8.1
25
- prerelease: false
26
- type: :development
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
- version_requirements: !ruby/object:Gem::Requirement
29
+ requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: 1.10.6
34
- requirement: !ruby/object:Gem::Requirement
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
35
37
  requirements:
36
38
  - - ">="
37
39
  - !ruby/object:Gem::Version
38
40
  version: 1.10.6
39
- prerelease: false
40
- type: :development
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rake
43
- version_requirements: !ruby/object:Gem::Requirement
43
+ requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '10.0'
48
- requirement: !ruby/object:Gem::Requirement
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
49
51
  requirements:
50
52
  - - ">="
51
53
  - !ruby/object:Gem::Version
52
54
  version: '10.0'
53
- prerelease: false
54
- type: :development
55
55
  description: Filter each record by ruby proc
56
56
  email:
57
57
  - kakyoin.hierophant@gmail.com
@@ -74,7 +74,7 @@ homepage: https://github.com/joker1007/embulk-filter-ruby_proc
74
74
  licenses:
75
75
  - MIT
76
76
  metadata: {}
77
- post_install_message:
77
+ post_install_message:
78
78
  rdoc_options: []
79
79
  require_paths:
80
80
  - lib
@@ -89,9 +89,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
89
89
  - !ruby/object:Gem::Version
90
90
  version: '0'
91
91
  requirements: []
92
- rubyforge_project:
93
- rubygems_version: 2.4.8
94
- signing_key:
92
+ rubyforge_project:
93
+ rubygems_version: 2.7.4
94
+ signing_key:
95
95
  specification_version: 4
96
96
  summary: Ruby Proc filter plugin for Embulk
97
97
  test_files: []