embulk-filter-ruby_proc 0.8.0 → 0.8.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +14 -4
- data/embulk-filter-ruby_proc.gemspec +1 -1
- data/example/config.yml +7 -0
- data/lib/embulk/filter/ruby_proc.rb +46 -3
- metadata +3 -5
- data/.ruby-version +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3ac1518db663e3846227546d746213c9c2f3f7a507ebce549c4137a75294ed54
|
4
|
+
data.tar.gz: a8cb61ed6b043444138bbe7e84fe0185f44377c27fc758b47b8227195f536740
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f903a1672abada9756fb495e681ac517fdf0fbc1632b03b3c678ff0d5b4dc0ac02d776757c02e2fa035a2b1f322cd45b6a873d68ccfccbf0858d7b585e6fbbc8
|
7
|
+
data.tar.gz: e23dcfd2b09c1e88cc3cd73c12b65b2c362f96f547af245670640db8ced487255649b2e3fad93e98e697f5d729c3db6ff56ec1d6b29a8c4a87b501d00fb57813
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -75,6 +75,13 @@ filters:
|
|
75
75
|
proc_file: comment_upcase.rb
|
76
76
|
skip_nil: false
|
77
77
|
type: json
|
78
|
+
pages:
|
79
|
+
- proc: |
|
80
|
+
->(records) do
|
81
|
+
records.map do |record|
|
82
|
+
record.tap { |r| r["id"] += 1 }
|
83
|
+
end
|
84
|
+
end
|
78
85
|
|
79
86
|
# ...
|
80
87
|
|
@@ -96,6 +103,8 @@ end
|
|
96
103
|
- instance variable is shared
|
97
104
|
- rows proc must return record hash or array of record hash.
|
98
105
|
- user must take care of object identity. Otherwise, error may be occurred when plugin applys column procs.
|
106
|
+
- pages proc must return array of record hash.
|
107
|
+
- use `page_size` option to increase size of processing record (ex. `-X page_size=64KB`)
|
99
108
|
|
100
109
|
### proc execution order
|
101
110
|
|
@@ -105,6 +114,7 @@ end
|
|
105
114
|
1. per record applied row procs
|
106
115
|
1. all skip\_row procs
|
107
116
|
1. column procs
|
117
|
+
1. per page procs
|
108
118
|
1. after procs
|
109
119
|
|
110
120
|
### preview
|
@@ -113,13 +123,13 @@ end
|
|
113
123
|
| id:string | account:long | time:timestamp | purchase:timestamp | comment:json | data:json |
|
114
124
|
+-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
|
115
125
|
| 3 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK"] | {"events":[{"id":1,"name":"Name1","idx":0},{"id":2,"name":"Name2","idx":1}],"foo":"bar"} |
|
116
|
-
|
|
126
|
+
| 34 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK"] | {"events":[{"id":1,"name":"Name1","idx":0},{"id":2,"name":"Name2","idx":1}],"foo":"bar"} |
|
117
127
|
| 6 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK","JRUBY"] | |
|
118
|
-
|
|
128
|
+
| 37 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK","JRUBY"] | |
|
119
129
|
| 9 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | ["EMBULK","%22CSV%22","PARSER","PLUGIN"] | |
|
120
|
-
|
|
130
|
+
| 40 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | ["EMBULK","%22CSV%22","PARSER","PLUGIN"] | |
|
121
131
|
| 12 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | ["11270"] | |
|
122
|
-
|
|
132
|
+
| 43 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | ["11270"] | |
|
123
133
|
+-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
|
124
134
|
```
|
125
135
|
|
data/example/config.yml
CHANGED
@@ -33,6 +33,7 @@ module Embulk
|
|
33
33
|
task = {
|
34
34
|
"columns" => config.param("columns", :array, default: []),
|
35
35
|
"rows" => config.param("rows", :array, default: []),
|
36
|
+
"pages" => config.param("pages", :array, default: []),
|
36
37
|
"skip_rows" => config.param("skip_rows", :array, default: []),
|
37
38
|
"before" => config.param("before", :array, default: []),
|
38
39
|
"after" => config.param("after", :array, default: []),
|
@@ -56,6 +57,7 @@ module Embulk
|
|
56
57
|
|
57
58
|
@proc_store ||= {}
|
58
59
|
@row_proc_store ||= {}
|
60
|
+
@page_proc_store ||= {}
|
59
61
|
@skip_row_proc_store ||= {}
|
60
62
|
transaction_id = rand(100000000)
|
61
63
|
until !@proc_store.has_key?(transaction_id)
|
@@ -85,6 +87,13 @@ module Embulk
|
|
85
87
|
eval(File.read(rowdef["proc_file"]), evaluator_binding, File.expand_path(rowdef["proc_file"]))
|
86
88
|
end
|
87
89
|
}.compact
|
90
|
+
@page_proc_store[transaction_id] = page_procs = task["pages"].map {|page|
|
91
|
+
if page["proc"]
|
92
|
+
eval(page["proc"], evaluator_binding)
|
93
|
+
else
|
94
|
+
eval(File.read(page["proc_file"]), evaluator_binding, File.expand_path(page["proc_file"]))
|
95
|
+
end
|
96
|
+
}.compact
|
88
97
|
@skip_row_proc_store[transaction_id] = skip_row_procs = task["skip_rows"].map {|rowdef|
|
89
98
|
if rowdef["proc"]
|
90
99
|
eval(rowdef["proc"], evaluator_binding)
|
@@ -93,7 +102,9 @@ module Embulk
|
|
93
102
|
end
|
94
103
|
}.compact
|
95
104
|
task["transaction_id"] = transaction_id
|
96
|
-
|
105
|
+
if procs.empty? && row_procs.empty? && page_procs.empty? && skip_row_procs.empty?
|
106
|
+
raise "Need columns or rows or pages parameter"
|
107
|
+
end
|
97
108
|
|
98
109
|
before_procs.each do |pr|
|
99
110
|
pr.call
|
@@ -122,6 +133,10 @@ module Embulk
|
|
122
133
|
@row_proc_store
|
123
134
|
end
|
124
135
|
|
136
|
+
def self.page_proc_store
|
137
|
+
@page_proc_store
|
138
|
+
end
|
139
|
+
|
125
140
|
def self.skip_row_proc_store
|
126
141
|
@skip_row_proc_store
|
127
142
|
end
|
@@ -146,19 +161,31 @@ module Embulk
|
|
146
161
|
}.compact
|
147
162
|
end
|
148
163
|
|
164
|
+
def self.parse_page_procs(pages, evaluator_binding)
|
165
|
+
pages.map {|page|
|
166
|
+
if page["proc"]
|
167
|
+
eval(page["proc"], evaluator_binding)
|
168
|
+
else
|
169
|
+
eval(File.read(page["proc_file"]), evaluator_binding, File.expand_path(page["proc_file"]))
|
170
|
+
end
|
171
|
+
}.compact
|
172
|
+
end
|
173
|
+
|
149
174
|
def init
|
150
175
|
task["requires"].each do |lib|
|
151
176
|
require lib
|
152
177
|
end
|
153
178
|
|
154
|
-
if self.class.proc_store.nil? || self.class.row_proc_store.nil? || self.class.skip_row_proc_store.nil?
|
179
|
+
if self.class.proc_store.nil? || self.class.row_proc_store.nil? || self.class.page_proc_store.nil? || self.class.skip_row_proc_store.nil?
|
155
180
|
evaluator_binding = Evaluator.new(task["variables"]).get_binding
|
156
181
|
@procs = self.class.parse_col_procs(task["columns"], evaluator_binding)
|
157
182
|
@row_procs = self.class.parse_row_procs(task["rows"], evaluator_binding)
|
183
|
+
@page_procs = self.class.parse_page_procs(task["pages"], evaluator_binding)
|
158
184
|
@skip_row_procs = self.class.parse_row_procs(task["skip_rows"], evaluator_binding)
|
159
185
|
else
|
160
186
|
@procs = self.class.proc_store[task["transaction_id"]]
|
161
187
|
@row_procs = self.class.row_proc_store[task["transaction_id"]]
|
188
|
+
@page_procs = self.class.page_proc_store[task["transaction_id"]]
|
162
189
|
@skip_row_procs = self.class.skip_row_proc_store[task["transaction_id"]]
|
163
190
|
end
|
164
191
|
@skip_nils = Hash[task["columns"].map {|col|
|
@@ -170,6 +197,7 @@ module Embulk
|
|
170
197
|
end
|
171
198
|
|
172
199
|
def add(page)
|
200
|
+
proc_records = []
|
173
201
|
page.each do |record|
|
174
202
|
if row_procs.empty?
|
175
203
|
record_hashes = [hashrize(record)]
|
@@ -207,10 +235,21 @@ module Embulk
|
|
207
235
|
record_hash[col] = pr.call(record_hash[col], record_hash)
|
208
236
|
end
|
209
237
|
end
|
210
|
-
|
238
|
+
if page_procs.empty?
|
239
|
+
page_builder.add(record_hash.values)
|
240
|
+
else
|
241
|
+
proc_records << record_hash
|
242
|
+
end
|
211
243
|
end
|
212
244
|
end
|
213
245
|
end
|
246
|
+
|
247
|
+
unless page_procs.empty?
|
248
|
+
page_procs.each do |pr|
|
249
|
+
result = pr.call(proc_records)
|
250
|
+
result.each { |record| page_builder.add(record.values) }
|
251
|
+
end
|
252
|
+
end
|
214
253
|
end
|
215
254
|
|
216
255
|
def finish
|
@@ -231,6 +270,10 @@ module Embulk
|
|
231
270
|
@row_procs
|
232
271
|
end
|
233
272
|
|
273
|
+
def page_procs
|
274
|
+
@page_procs
|
275
|
+
end
|
276
|
+
|
234
277
|
def skip_row_procs
|
235
278
|
@skip_row_procs
|
236
279
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-ruby_proc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joker1007
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-08-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: embulk
|
@@ -60,7 +60,6 @@ extensions: []
|
|
60
60
|
extra_rdoc_files: []
|
61
61
|
files:
|
62
62
|
- ".gitignore"
|
63
|
-
- ".ruby-version"
|
64
63
|
- Gemfile
|
65
64
|
- LICENSE.txt
|
66
65
|
- README.md
|
@@ -89,8 +88,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
89
88
|
- !ruby/object:Gem::Version
|
90
89
|
version: '0'
|
91
90
|
requirements: []
|
92
|
-
|
93
|
-
rubygems_version: 2.7.4
|
91
|
+
rubygems_version: 3.0.3
|
94
92
|
signing_key:
|
95
93
|
specification_version: 4
|
96
94
|
summary: Ruby Proc filter plugin for Embulk
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
jruby-9.0.4.0
|