embulk-filter-ruby_proc 0.8.0 → 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 727830dc138bf80017b10197c86129c82b61a1e0dc92a5c76548eaea8c23649f
4
- data.tar.gz: 4cc54640d0b6f5906b4e91428553a1732adda5e8da4dd4bdfb5ee9a0dd712e8c
3
+ metadata.gz: 3ac1518db663e3846227546d746213c9c2f3f7a507ebce549c4137a75294ed54
4
+ data.tar.gz: a8cb61ed6b043444138bbe7e84fe0185f44377c27fc758b47b8227195f536740
5
5
  SHA512:
6
- metadata.gz: 460294e10ed0191f609d06236846c1a0acfcc70626f5b29ad45452ba84e6ddbab777747ded81594e787a63bc5eea9bd9b570d49a9fa906560bb325cd497c3152
7
- data.tar.gz: 1cad7c00adb50981afc402c44ad60665aa41bae3ed0e1b6b2b5b492917b0334cb147aa0564f613666181ede81b49da90aec7efda74fc5de374f507216f36575d
6
+ metadata.gz: f903a1672abada9756fb495e681ac517fdf0fbc1632b03b3c678ff0d5b4dc0ac02d776757c02e2fa035a2b1f322cd45b6a873d68ccfccbf0858d7b585e6fbbc8
7
+ data.tar.gz: e23dcfd2b09c1e88cc3cd73c12b65b2c362f96f547af245670640db8ced487255649b2e3fad93e98e697f5d729c3db6ff56ec1d6b29a8c4a87b501d00fb57813
data/.gitignore CHANGED
@@ -4,3 +4,4 @@
4
4
  /.bundle/
5
5
  /Gemfile.lock
6
6
  /example/out*
7
+ .ruby-version
data/README.md CHANGED
@@ -75,6 +75,13 @@ filters:
75
75
  proc_file: comment_upcase.rb
76
76
  skip_nil: false
77
77
  type: json
78
+ pages:
79
+ - proc: |
80
+ ->(records) do
81
+ records.map do |record|
82
+ record.tap { |r| r["id"] += 1 }
83
+ end
84
+ end
78
85
 
79
86
  # ...
80
87
 
@@ -96,6 +103,8 @@ end
96
103
  - instance variable is shared
97
104
  - rows proc must return record hash or array of record hash.
98
105
  - user must take care of object identity. Otherwise, error may be occurred when plugin applys column procs.
106
+ - pages proc must return array of record hash.
107
+ - use `page_size` option to increase size of processing record (ex. `-X page_size=64KB`)
99
108
 
100
109
  ### proc execution order
101
110
 
@@ -105,6 +114,7 @@ end
105
114
  1. per record applied row procs
106
115
  1. all skip\_row procs
107
116
  1. column procs
117
+ 1. per page procs
108
118
  1. after procs
109
119
 
110
120
  ### preview
@@ -113,13 +123,13 @@ end
113
123
  | id:string | account:long | time:timestamp | purchase:timestamp | comment:json | data:json |
114
124
  +-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
115
125
  | 3 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK"] | {"events":[{"id":1,"name":"Name1","idx":0},{"id":2,"name":"Name2","idx":1}],"foo":"bar"} |
116
- | 33 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK"] | {"events":[{"id":1,"name":"Name1","idx":0},{"id":2,"name":"Name2","idx":1}],"foo":"bar"} |
126
+ | 34 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK"] | {"events":[{"id":1,"name":"Name1","idx":0},{"id":2,"name":"Name2","idx":1}],"foo":"bar"} |
117
127
  | 6 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK","JRUBY"] | |
118
- | 36 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK","JRUBY"] | |
128
+ | 37 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK","JRUBY"] | |
119
129
  | 9 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | ["EMBULK","%22CSV%22","PARSER","PLUGIN"] | |
120
- | 39 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | ["EMBULK","%22CSV%22","PARSER","PLUGIN"] | |
130
+ | 40 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | ["EMBULK","%22CSV%22","PARSER","PLUGIN"] | |
121
131
  | 12 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | ["11270"] | |
122
- | 42 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | ["11270"] | |
132
+ | 43 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | ["11270"] | |
123
133
  +-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
124
134
  ```
125
135
 
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-filter-ruby_proc"
4
- spec.version = "0.8.0"
4
+ spec.version = "0.8.1"
5
5
  spec.authors = ["joker1007"]
6
6
  spec.summary = "Ruby Proc filter plugin for Embulk"
7
7
  spec.description = "Filter each record by ruby proc"
@@ -57,6 +57,13 @@ filters:
57
57
  id * variables["multiply"]
58
58
  end
59
59
  type: string
60
+ pages:
61
+ - proc: |
62
+ ->(records) do
63
+ records.map do |record|
64
+ record.tap { |r| r["id"] += 1 }
65
+ end
66
+ end
60
67
 
61
68
  - type: ruby_proc
62
69
  requires:
@@ -33,6 +33,7 @@ module Embulk
33
33
  task = {
34
34
  "columns" => config.param("columns", :array, default: []),
35
35
  "rows" => config.param("rows", :array, default: []),
36
+ "pages" => config.param("pages", :array, default: []),
36
37
  "skip_rows" => config.param("skip_rows", :array, default: []),
37
38
  "before" => config.param("before", :array, default: []),
38
39
  "after" => config.param("after", :array, default: []),
@@ -56,6 +57,7 @@ module Embulk
56
57
 
57
58
  @proc_store ||= {}
58
59
  @row_proc_store ||= {}
60
+ @page_proc_store ||= {}
59
61
  @skip_row_proc_store ||= {}
60
62
  transaction_id = rand(100000000)
61
63
  until !@proc_store.has_key?(transaction_id)
@@ -85,6 +87,13 @@ module Embulk
85
87
  eval(File.read(rowdef["proc_file"]), evaluator_binding, File.expand_path(rowdef["proc_file"]))
86
88
  end
87
89
  }.compact
90
+ @page_proc_store[transaction_id] = page_procs = task["pages"].map {|page|
91
+ if page["proc"]
92
+ eval(page["proc"], evaluator_binding)
93
+ else
94
+ eval(File.read(page["proc_file"]), evaluator_binding, File.expand_path(page["proc_file"]))
95
+ end
96
+ }.compact
88
97
  @skip_row_proc_store[transaction_id] = skip_row_procs = task["skip_rows"].map {|rowdef|
89
98
  if rowdef["proc"]
90
99
  eval(rowdef["proc"], evaluator_binding)
@@ -93,7 +102,9 @@ module Embulk
93
102
  end
94
103
  }.compact
95
104
  task["transaction_id"] = transaction_id
96
- raise "Need columns or rows parameter" if procs.empty? && row_procs.empty? && skip_row_procs.empty?
105
+ if procs.empty? && row_procs.empty? && page_procs.empty? && skip_row_procs.empty?
106
+ raise "Need columns or rows or pages parameter"
107
+ end
97
108
 
98
109
  before_procs.each do |pr|
99
110
  pr.call
@@ -122,6 +133,10 @@ module Embulk
122
133
  @row_proc_store
123
134
  end
124
135
 
136
+ def self.page_proc_store
137
+ @page_proc_store
138
+ end
139
+
125
140
  def self.skip_row_proc_store
126
141
  @skip_row_proc_store
127
142
  end
@@ -146,19 +161,31 @@ module Embulk
146
161
  }.compact
147
162
  end
148
163
 
164
+ def self.parse_page_procs(pages, evaluator_binding)
165
+ pages.map {|page|
166
+ if page["proc"]
167
+ eval(page["proc"], evaluator_binding)
168
+ else
169
+ eval(File.read(page["proc_file"]), evaluator_binding, File.expand_path(page["proc_file"]))
170
+ end
171
+ }.compact
172
+ end
173
+
149
174
  def init
150
175
  task["requires"].each do |lib|
151
176
  require lib
152
177
  end
153
178
 
154
- if self.class.proc_store.nil? || self.class.row_proc_store.nil? || self.class.skip_row_proc_store.nil?
179
+ if self.class.proc_store.nil? || self.class.row_proc_store.nil? || self.class.page_proc_store.nil? || self.class.skip_row_proc_store.nil?
155
180
  evaluator_binding = Evaluator.new(task["variables"]).get_binding
156
181
  @procs = self.class.parse_col_procs(task["columns"], evaluator_binding)
157
182
  @row_procs = self.class.parse_row_procs(task["rows"], evaluator_binding)
183
+ @page_procs = self.class.parse_page_procs(task["pages"], evaluator_binding)
158
184
  @skip_row_procs = self.class.parse_row_procs(task["skip_rows"], evaluator_binding)
159
185
  else
160
186
  @procs = self.class.proc_store[task["transaction_id"]]
161
187
  @row_procs = self.class.row_proc_store[task["transaction_id"]]
188
+ @page_procs = self.class.page_proc_store[task["transaction_id"]]
162
189
  @skip_row_procs = self.class.skip_row_proc_store[task["transaction_id"]]
163
190
  end
164
191
  @skip_nils = Hash[task["columns"].map {|col|
@@ -170,6 +197,7 @@ module Embulk
170
197
  end
171
198
 
172
199
  def add(page)
200
+ proc_records = []
173
201
  page.each do |record|
174
202
  if row_procs.empty?
175
203
  record_hashes = [hashrize(record)]
@@ -207,10 +235,21 @@ module Embulk
207
235
  record_hash[col] = pr.call(record_hash[col], record_hash)
208
236
  end
209
237
  end
210
- page_builder.add(record_hash.values)
238
+ if page_procs.empty?
239
+ page_builder.add(record_hash.values)
240
+ else
241
+ proc_records << record_hash
242
+ end
211
243
  end
212
244
  end
213
245
  end
246
+
247
+ unless page_procs.empty?
248
+ page_procs.each do |pr|
249
+ result = pr.call(proc_records)
250
+ result.each { |record| page_builder.add(record.values) }
251
+ end
252
+ end
214
253
  end
215
254
 
216
255
  def finish
@@ -231,6 +270,10 @@ module Embulk
231
270
  @row_procs
232
271
  end
233
272
 
273
+ def page_procs
274
+ @page_procs
275
+ end
276
+
234
277
  def skip_row_procs
235
278
  @skip_row_procs
236
279
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-ruby_proc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-28 00:00:00.000000000 Z
11
+ date: 2019-08-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: embulk
@@ -60,7 +60,6 @@ extensions: []
60
60
  extra_rdoc_files: []
61
61
  files:
62
62
  - ".gitignore"
63
- - ".ruby-version"
64
63
  - Gemfile
65
64
  - LICENSE.txt
66
65
  - README.md
@@ -89,8 +88,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
89
88
  - !ruby/object:Gem::Version
90
89
  version: '0'
91
90
  requirements: []
92
- rubyforge_project:
93
- rubygems_version: 2.7.4
91
+ rubygems_version: 3.0.3
94
92
  signing_key:
95
93
  specification_version: 4
96
94
  summary: Ruby Proc filter plugin for Embulk
@@ -1 +0,0 @@
1
- jruby-9.0.4.0