embulk-filter-ruby_proc 0.8.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 727830dc138bf80017b10197c86129c82b61a1e0dc92a5c76548eaea8c23649f
4
- data.tar.gz: 4cc54640d0b6f5906b4e91428553a1732adda5e8da4dd4bdfb5ee9a0dd712e8c
3
+ metadata.gz: 3ac1518db663e3846227546d746213c9c2f3f7a507ebce549c4137a75294ed54
4
+ data.tar.gz: a8cb61ed6b043444138bbe7e84fe0185f44377c27fc758b47b8227195f536740
5
5
  SHA512:
6
- metadata.gz: 460294e10ed0191f609d06236846c1a0acfcc70626f5b29ad45452ba84e6ddbab777747ded81594e787a63bc5eea9bd9b570d49a9fa906560bb325cd497c3152
7
- data.tar.gz: 1cad7c00adb50981afc402c44ad60665aa41bae3ed0e1b6b2b5b492917b0334cb147aa0564f613666181ede81b49da90aec7efda74fc5de374f507216f36575d
6
+ metadata.gz: f903a1672abada9756fb495e681ac517fdf0fbc1632b03b3c678ff0d5b4dc0ac02d776757c02e2fa035a2b1f322cd45b6a873d68ccfccbf0858d7b585e6fbbc8
7
+ data.tar.gz: e23dcfd2b09c1e88cc3cd73c12b65b2c362f96f547af245670640db8ced487255649b2e3fad93e98e697f5d729c3db6ff56ec1d6b29a8c4a87b501d00fb57813
data/.gitignore CHANGED
@@ -4,3 +4,4 @@
4
4
  /.bundle/
5
5
  /Gemfile.lock
6
6
  /example/out*
7
+ .ruby-version
data/README.md CHANGED
@@ -75,6 +75,13 @@ filters:
75
75
  proc_file: comment_upcase.rb
76
76
  skip_nil: false
77
77
  type: json
78
+ pages:
79
+ - proc: |
80
+ ->(records) do
81
+ records.map do |record|
82
+ record.tap { |r| r["id"] += 1 }
83
+ end
84
+ end
78
85
 
79
86
  # ...
80
87
 
@@ -96,6 +103,8 @@ end
96
103
  - instance variable is shared
97
104
  - rows proc must return record hash or array of record hash.
98
105
  - user must take care of object identity. Otherwise, error may be occurred when plugin applys column procs.
106
+ - pages proc must return array of record hash.
107
+ - use `page_size` option to increase size of processing record (ex. `-X page_size=64KB`)
99
108
 
100
109
  ### proc execution order
101
110
 
@@ -105,6 +114,7 @@ end
105
114
  1. per record applied row procs
106
115
  1. all skip\_row procs
107
116
  1. column procs
117
+ 1. per page procs
108
118
  1. after procs
109
119
 
110
120
  ### preview
@@ -113,13 +123,13 @@ end
113
123
  | id:string | account:long | time:timestamp | purchase:timestamp | comment:json | data:json |
114
124
  +-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
115
125
  | 3 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK"] | {"events":[{"id":1,"name":"Name1","idx":0},{"id":2,"name":"Name2","idx":1}],"foo":"bar"} |
116
- | 33 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK"] | {"events":[{"id":1,"name":"Name1","idx":0},{"id":2,"name":"Name2","idx":1}],"foo":"bar"} |
126
+ | 34 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK"] | {"events":[{"id":1,"name":"Name1","idx":0},{"id":2,"name":"Name2","idx":1}],"foo":"bar"} |
117
127
  | 6 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK","JRUBY"] | |
118
- | 36 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK","JRUBY"] | |
128
+ | 37 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK","JRUBY"] | |
119
129
  | 9 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | ["EMBULK","%22CSV%22","PARSER","PLUGIN"] | |
120
- | 39 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | ["EMBULK","%22CSV%22","PARSER","PLUGIN"] | |
130
+ | 40 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | ["EMBULK","%22CSV%22","PARSER","PLUGIN"] | |
121
131
  | 12 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | ["11270"] | |
122
- | 42 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | ["11270"] | |
132
+ | 43 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | ["11270"] | |
123
133
  +-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
124
134
  ```
125
135
 
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-filter-ruby_proc"
4
- spec.version = "0.8.0"
4
+ spec.version = "0.8.1"
5
5
  spec.authors = ["joker1007"]
6
6
  spec.summary = "Ruby Proc filter plugin for Embulk"
7
7
  spec.description = "Filter each record by ruby proc"
@@ -57,6 +57,13 @@ filters:
57
57
  id * variables["multiply"]
58
58
  end
59
59
  type: string
60
+ pages:
61
+ - proc: |
62
+ ->(records) do
63
+ records.map do |record|
64
+ record.tap { |r| r["id"] += 1 }
65
+ end
66
+ end
60
67
 
61
68
  - type: ruby_proc
62
69
  requires:
@@ -33,6 +33,7 @@ module Embulk
33
33
  task = {
34
34
  "columns" => config.param("columns", :array, default: []),
35
35
  "rows" => config.param("rows", :array, default: []),
36
+ "pages" => config.param("pages", :array, default: []),
36
37
  "skip_rows" => config.param("skip_rows", :array, default: []),
37
38
  "before" => config.param("before", :array, default: []),
38
39
  "after" => config.param("after", :array, default: []),
@@ -56,6 +57,7 @@ module Embulk
56
57
 
57
58
  @proc_store ||= {}
58
59
  @row_proc_store ||= {}
60
+ @page_proc_store ||= {}
59
61
  @skip_row_proc_store ||= {}
60
62
  transaction_id = rand(100000000)
61
63
  until !@proc_store.has_key?(transaction_id)
@@ -85,6 +87,13 @@ module Embulk
85
87
  eval(File.read(rowdef["proc_file"]), evaluator_binding, File.expand_path(rowdef["proc_file"]))
86
88
  end
87
89
  }.compact
90
+ @page_proc_store[transaction_id] = page_procs = task["pages"].map {|page|
91
+ if page["proc"]
92
+ eval(page["proc"], evaluator_binding)
93
+ else
94
+ eval(File.read(page["proc_file"]), evaluator_binding, File.expand_path(page["proc_file"]))
95
+ end
96
+ }.compact
88
97
  @skip_row_proc_store[transaction_id] = skip_row_procs = task["skip_rows"].map {|rowdef|
89
98
  if rowdef["proc"]
90
99
  eval(rowdef["proc"], evaluator_binding)
@@ -93,7 +102,9 @@ module Embulk
93
102
  end
94
103
  }.compact
95
104
  task["transaction_id"] = transaction_id
96
- raise "Need columns or rows parameter" if procs.empty? && row_procs.empty? && skip_row_procs.empty?
105
+ if procs.empty? && row_procs.empty? && page_procs.empty? && skip_row_procs.empty?
106
+ raise "Need columns or rows or pages parameter"
107
+ end
97
108
 
98
109
  before_procs.each do |pr|
99
110
  pr.call
@@ -122,6 +133,10 @@ module Embulk
122
133
  @row_proc_store
123
134
  end
124
135
 
136
+ def self.page_proc_store
137
+ @page_proc_store
138
+ end
139
+
125
140
  def self.skip_row_proc_store
126
141
  @skip_row_proc_store
127
142
  end
@@ -146,19 +161,31 @@ module Embulk
146
161
  }.compact
147
162
  end
148
163
 
164
+ def self.parse_page_procs(pages, evaluator_binding)
165
+ pages.map {|page|
166
+ if page["proc"]
167
+ eval(page["proc"], evaluator_binding)
168
+ else
169
+ eval(File.read(page["proc_file"]), evaluator_binding, File.expand_path(page["proc_file"]))
170
+ end
171
+ }.compact
172
+ end
173
+
149
174
  def init
150
175
  task["requires"].each do |lib|
151
176
  require lib
152
177
  end
153
178
 
154
- if self.class.proc_store.nil? || self.class.row_proc_store.nil? || self.class.skip_row_proc_store.nil?
179
+ if self.class.proc_store.nil? || self.class.row_proc_store.nil? || self.class.page_proc_store.nil? || self.class.skip_row_proc_store.nil?
155
180
  evaluator_binding = Evaluator.new(task["variables"]).get_binding
156
181
  @procs = self.class.parse_col_procs(task["columns"], evaluator_binding)
157
182
  @row_procs = self.class.parse_row_procs(task["rows"], evaluator_binding)
183
+ @page_procs = self.class.parse_page_procs(task["pages"], evaluator_binding)
158
184
  @skip_row_procs = self.class.parse_row_procs(task["skip_rows"], evaluator_binding)
159
185
  else
160
186
  @procs = self.class.proc_store[task["transaction_id"]]
161
187
  @row_procs = self.class.row_proc_store[task["transaction_id"]]
188
+ @page_procs = self.class.page_proc_store[task["transaction_id"]]
162
189
  @skip_row_procs = self.class.skip_row_proc_store[task["transaction_id"]]
163
190
  end
164
191
  @skip_nils = Hash[task["columns"].map {|col|
@@ -170,6 +197,7 @@ module Embulk
170
197
  end
171
198
 
172
199
  def add(page)
200
+ proc_records = []
173
201
  page.each do |record|
174
202
  if row_procs.empty?
175
203
  record_hashes = [hashrize(record)]
@@ -207,10 +235,21 @@ module Embulk
207
235
  record_hash[col] = pr.call(record_hash[col], record_hash)
208
236
  end
209
237
  end
210
- page_builder.add(record_hash.values)
238
+ if page_procs.empty?
239
+ page_builder.add(record_hash.values)
240
+ else
241
+ proc_records << record_hash
242
+ end
211
243
  end
212
244
  end
213
245
  end
246
+
247
+ unless page_procs.empty?
248
+ page_procs.each do |pr|
249
+ result = pr.call(proc_records)
250
+ result.each { |record| page_builder.add(record.values) }
251
+ end
252
+ end
214
253
  end
215
254
 
216
255
  def finish
@@ -231,6 +270,10 @@ module Embulk
231
270
  @row_procs
232
271
  end
233
272
 
273
+ def page_procs
274
+ @page_procs
275
+ end
276
+
234
277
  def skip_row_procs
235
278
  @skip_row_procs
236
279
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-ruby_proc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-28 00:00:00.000000000 Z
11
+ date: 2019-08-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: embulk
@@ -60,7 +60,6 @@ extensions: []
60
60
  extra_rdoc_files: []
61
61
  files:
62
62
  - ".gitignore"
63
- - ".ruby-version"
64
63
  - Gemfile
65
64
  - LICENSE.txt
66
65
  - README.md
@@ -89,8 +88,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
89
88
  - !ruby/object:Gem::Version
90
89
  version: '0'
91
90
  requirements: []
92
- rubyforge_project:
93
- rubygems_version: 2.7.4
91
+ rubygems_version: 3.0.3
94
92
  signing_key:
95
93
  specification_version: 4
96
94
  summary: Ruby Proc filter plugin for Embulk
@@ -1 +0,0 @@
1
- jruby-9.0.4.0