embulk-filter-ruby_proc 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 754bf37f61d14a24054997630406b8c9b02e7aff
4
- data.tar.gz: c47c596b3745548ac8919e7be92727e6157bfd70
2
+ SHA256:
3
+ metadata.gz: 727830dc138bf80017b10197c86129c82b61a1e0dc92a5c76548eaea8c23649f
4
+ data.tar.gz: 4cc54640d0b6f5906b4e91428553a1732adda5e8da4dd4bdfb5ee9a0dd712e8c
5
5
  SHA512:
6
- metadata.gz: acf1ee4540152e40b7e706466b67f1f4f48eb23d200c988a3d46fe21dce669c19897c8f000cfbcf55f600c7c908a9accd7b51b268ecc7c9ad92f1cf1c5a733df
7
- data.tar.gz: d3757d66ef78d46463a533b436fb4ae58c2e8a7fe09c9baad6c238eaca5204a5d6b76de095c7733ffa6e9f289056d7879e6e2e6fe534a82c7c83cbe55bde5fa6
6
+ metadata.gz: 460294e10ed0191f609d06236846c1a0acfcc70626f5b29ad45452ba84e6ddbab777747ded81594e787a63bc5eea9bd9b570d49a9fa906560bb325cd497c3152
7
+ data.tar.gz: 1cad7c00adb50981afc402c44ad60665aa41bae3ed0e1b6b2b5b492917b0334cb147aa0564f613666181ede81b49da90aec7efda74fc5de374f507216f36575d
data/README.md CHANGED
@@ -51,6 +51,11 @@ filters:
51
51
  ->(record) do
52
52
  [record.dup, record.dup.tap { |r| r["id"] += 10 }]
53
53
  end
54
+ skip_rows:
55
+ - proc: |
56
+ ->(record) do
57
+ record["id"].odd?
58
+ end
54
59
  columns:
55
60
  - name: data
56
61
  proc: |
@@ -75,6 +80,8 @@ filters:
75
80
 
76
81
  ```
77
82
 
83
+ If you want to skip record in "rows proc" or "columns proc", use `throw :skip_record`.
84
+
78
85
  ```rb
79
86
  # comment_upcase.rb
80
87
 
@@ -90,6 +97,16 @@ end
90
97
  - rows proc must return record hash or array of record hash.
91
98
  - user must take care of object identity. Otherwise, error may be occurred when plugin applys column procs.
92
99
 
100
+ ### proc execution order
101
+
102
+ 1. before procs
103
+ 1. per record
104
+ 1. all row procs
105
+ 1. per record applied row procs
106
+ 1. all skip\_row procs
107
+ 1. column procs
108
+ 1. after procs
109
+
93
110
  ### preview
94
111
  ```
95
112
  +-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-filter-ruby_proc"
4
- spec.version = "0.7.0"
4
+ spec.version = "0.8.0"
5
5
  spec.authors = ["joker1007"]
6
6
  spec.summary = "Ruby Proc filter plugin for Embulk"
7
7
  spec.description = "Filter each record by ruby proc"
@@ -33,6 +33,7 @@ module Embulk
33
33
  task = {
34
34
  "columns" => config.param("columns", :array, default: []),
35
35
  "rows" => config.param("rows", :array, default: []),
36
+ "skip_rows" => config.param("skip_rows", :array, default: []),
36
37
  "before" => config.param("before", :array, default: []),
37
38
  "after" => config.param("after", :array, default: []),
38
39
  "requires" => config.param("requires", :array, default: []),
@@ -55,6 +56,7 @@ module Embulk
55
56
 
56
57
  @proc_store ||= {}
57
58
  @row_proc_store ||= {}
59
+ @skip_row_proc_store ||= {}
58
60
  transaction_id = rand(100000000)
59
61
  until !@proc_store.has_key?(transaction_id)
60
62
  transaction_id = rand(100000000)
@@ -83,8 +85,15 @@ module Embulk
83
85
  eval(File.read(rowdef["proc_file"]), evaluator_binding, File.expand_path(rowdef["proc_file"]))
84
86
  end
85
87
  }.compact
88
+ @skip_row_proc_store[transaction_id] = skip_row_procs = task["skip_rows"].map {|rowdef|
89
+ if rowdef["proc"]
90
+ eval(rowdef["proc"], evaluator_binding)
91
+ else
92
+ eval(File.read(rowdef["proc_file"]), evaluator_binding, File.expand_path(rowdef["proc_file"]))
93
+ end
94
+ }.compact
86
95
  task["transaction_id"] = transaction_id
87
- raise "Need columns or rows parameter" if procs.empty? && row_procs.empty?
96
+ raise "Need columns or rows parameter" if procs.empty? && row_procs.empty? && skip_row_procs.empty?
88
97
 
89
98
  before_procs.each do |pr|
90
99
  pr.call
@@ -113,6 +122,10 @@ module Embulk
113
122
  @row_proc_store
114
123
  end
115
124
 
125
+ def self.skip_row_proc_store
126
+ @skip_row_proc_store
127
+ end
128
+
116
129
  def self.parse_col_procs(columns, evaluator_binding)
117
130
  Hash[columns.map {|col|
118
131
  if col["proc"]
@@ -138,13 +151,15 @@ module Embulk
138
151
  require lib
139
152
  end
140
153
 
141
- if self.class.proc_store.nil? || self.class.row_proc_store.nil?
154
+ if self.class.proc_store.nil? || self.class.row_proc_store.nil? || self.class.skip_row_proc_store.nil?
142
155
  evaluator_binding = Evaluator.new(task["variables"]).get_binding
143
156
  @procs = self.class.parse_col_procs(task["columns"], evaluator_binding)
144
157
  @row_procs = self.class.parse_row_procs(task["rows"], evaluator_binding)
158
+ @skip_row_procs = self.class.parse_row_procs(task["skip_rows"], evaluator_binding)
145
159
  else
146
160
  @procs = self.class.proc_store[task["transaction_id"]]
147
161
  @row_procs = self.class.row_proc_store[task["transaction_id"]]
162
+ @skip_row_procs = self.class.skip_row_proc_store[task["transaction_id"]]
148
163
  end
149
164
  @skip_nils = Hash[task["columns"].map {|col|
150
165
  [col["name"], col["skip_nil"].nil? ? true : !!col["skip_nil"]]
@@ -160,32 +175,40 @@ module Embulk
160
175
  record_hashes = [hashrize(record)]
161
176
  else
162
177
  record_hashes = row_procs.each_with_object([]) do |pr, arr|
163
- result = pr.call(hashrize(record))
164
- case result
165
- when Array
166
- result.each do |r|
167
- arr << r
178
+ catch :skip_record do
179
+ result = pr.call(hashrize(record))
180
+ case result
181
+ when Array
182
+ result.each do |r|
183
+ arr << r
184
+ end
185
+ when Hash
186
+ arr << result
187
+ else
188
+ raise "row proc return value must be a Array or Hash"
168
189
  end
169
- when Hash
170
- arr << result
171
- else
172
- raise "row proc return value must be a Array or Hash"
173
190
  end
174
191
  end
175
192
  end
176
193
 
177
194
  record_hashes.each do |record_hash|
178
- procs.each do |col, pr|
179
- next unless record_hash.has_key?(col)
180
- next if record_hash[col].nil? && skip_nils[col]
181
-
182
- if pr.arity == 1
183
- record_hash[col] = pr.call(record_hash[col])
184
- else
185
- record_hash[col] = pr.call(record_hash[col], record_hash)
195
+ catch :skip_record do
196
+ skip_row_procs.each do |pr|
197
+ throw :skip_record if pr.call(record_hash)
186
198
  end
199
+
200
+ procs.each do |col, pr|
201
+ next unless record_hash.has_key?(col)
202
+ next if record_hash[col].nil? && skip_nils[col]
203
+
204
+ if pr.arity == 1
205
+ record_hash[col] = pr.call(record_hash[col])
206
+ else
207
+ record_hash[col] = pr.call(record_hash[col], record_hash)
208
+ end
209
+ end
210
+ page_builder.add(record_hash.values)
187
211
  end
188
- page_builder.add(record_hash.values)
189
212
  end
190
213
  end
191
214
  end
@@ -208,6 +231,10 @@ module Embulk
208
231
  @row_procs
209
232
  end
210
233
 
234
+ def skip_row_procs
235
+ @skip_row_procs
236
+ end
237
+
211
238
  def skip_nils
212
239
  @skip_nils
213
240
  end
metadata CHANGED
@@ -1,57 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-ruby_proc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-08-14 00:00:00.000000000 Z
11
+ date: 2018-01-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: embulk
15
- version_requirements: !ruby/object:Gem::Requirement
15
+ requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: 0.8.1
20
- requirement: !ruby/object:Gem::Requirement
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
21
23
  requirements:
22
24
  - - ">="
23
25
  - !ruby/object:Gem::Version
24
26
  version: 0.8.1
25
- prerelease: false
26
- type: :development
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
- version_requirements: !ruby/object:Gem::Requirement
29
+ requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: 1.10.6
34
- requirement: !ruby/object:Gem::Requirement
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
35
37
  requirements:
36
38
  - - ">="
37
39
  - !ruby/object:Gem::Version
38
40
  version: 1.10.6
39
- prerelease: false
40
- type: :development
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rake
43
- version_requirements: !ruby/object:Gem::Requirement
43
+ requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '10.0'
48
- requirement: !ruby/object:Gem::Requirement
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
49
51
  requirements:
50
52
  - - ">="
51
53
  - !ruby/object:Gem::Version
52
54
  version: '10.0'
53
- prerelease: false
54
- type: :development
55
55
  description: Filter each record by ruby proc
56
56
  email:
57
57
  - kakyoin.hierophant@gmail.com
@@ -74,7 +74,7 @@ homepage: https://github.com/joker1007/embulk-filter-ruby_proc
74
74
  licenses:
75
75
  - MIT
76
76
  metadata: {}
77
- post_install_message:
77
+ post_install_message:
78
78
  rdoc_options: []
79
79
  require_paths:
80
80
  - lib
@@ -89,9 +89,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
89
89
  - !ruby/object:Gem::Version
90
90
  version: '0'
91
91
  requirements: []
92
- rubyforge_project:
93
- rubygems_version: 2.4.8
94
- signing_key:
92
+ rubyforge_project:
93
+ rubygems_version: 2.7.4
94
+ signing_key:
95
95
  specification_version: 4
96
96
  summary: Ruby Proc filter plugin for Embulk
97
97
  test_files: []