embulk-filter-ruby_proc 0.8.0 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +14 -4
- data/embulk-filter-ruby_proc.gemspec +1 -1
- data/example/config.yml +7 -0
- data/lib/embulk/filter/ruby_proc.rb +46 -3
- metadata +3 -5
- data/.ruby-version +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3ac1518db663e3846227546d746213c9c2f3f7a507ebce549c4137a75294ed54
|
4
|
+
data.tar.gz: a8cb61ed6b043444138bbe7e84fe0185f44377c27fc758b47b8227195f536740
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f903a1672abada9756fb495e681ac517fdf0fbc1632b03b3c678ff0d5b4dc0ac02d776757c02e2fa035a2b1f322cd45b6a873d68ccfccbf0858d7b585e6fbbc8
|
7
|
+
data.tar.gz: e23dcfd2b09c1e88cc3cd73c12b65b2c362f96f547af245670640db8ced487255649b2e3fad93e98e697f5d729c3db6ff56ec1d6b29a8c4a87b501d00fb57813
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -75,6 +75,13 @@ filters:
|
|
75
75
|
proc_file: comment_upcase.rb
|
76
76
|
skip_nil: false
|
77
77
|
type: json
|
78
|
+
pages:
|
79
|
+
- proc: |
|
80
|
+
->(records) do
|
81
|
+
records.map do |record|
|
82
|
+
record.tap { |r| r["id"] += 1 }
|
83
|
+
end
|
84
|
+
end
|
78
85
|
|
79
86
|
# ...
|
80
87
|
|
@@ -96,6 +103,8 @@ end
|
|
96
103
|
- instance variable is shared
|
97
104
|
- rows proc must return record hash or array of record hash.
|
98
105
|
- user must take care of object identity. Otherwise, error may be occurred when plugin applys column procs.
|
106
|
+
- pages proc must return array of record hash.
|
107
|
+
- use `page_size` option to increase size of processing record (ex. `-X page_size=64KB`)
|
99
108
|
|
100
109
|
### proc execution order
|
101
110
|
|
@@ -105,6 +114,7 @@ end
|
|
105
114
|
1. per record applied row procs
|
106
115
|
1. all skip\_row procs
|
107
116
|
1. column procs
|
117
|
+
1. per page procs
|
108
118
|
1. after procs
|
109
119
|
|
110
120
|
### preview
|
@@ -113,13 +123,13 @@ end
|
|
113
123
|
| id:string | account:long | time:timestamp | purchase:timestamp | comment:json | data:json |
|
114
124
|
+-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
|
115
125
|
| 3 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK"] | {"events":[{"id":1,"name":"Name1","idx":0},{"id":2,"name":"Name2","idx":1}],"foo":"bar"} |
|
116
|
-
|
|
126
|
+
| 34 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK"] | {"events":[{"id":1,"name":"Name1","idx":0},{"id":2,"name":"Name2","idx":1}],"foo":"bar"} |
|
117
127
|
| 6 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK","JRUBY"] | |
|
118
|
-
|
|
128
|
+
| 37 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK","JRUBY"] | |
|
119
129
|
| 9 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | ["EMBULK","%22CSV%22","PARSER","PLUGIN"] | |
|
120
|
-
|
|
130
|
+
| 40 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | ["EMBULK","%22CSV%22","PARSER","PLUGIN"] | |
|
121
131
|
| 12 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | ["11270"] | |
|
122
|
-
|
|
132
|
+
| 43 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | ["11270"] | |
|
123
133
|
+-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
|
124
134
|
```
|
125
135
|
|
data/example/config.yml
CHANGED
@@ -33,6 +33,7 @@ module Embulk
|
|
33
33
|
task = {
|
34
34
|
"columns" => config.param("columns", :array, default: []),
|
35
35
|
"rows" => config.param("rows", :array, default: []),
|
36
|
+
"pages" => config.param("pages", :array, default: []),
|
36
37
|
"skip_rows" => config.param("skip_rows", :array, default: []),
|
37
38
|
"before" => config.param("before", :array, default: []),
|
38
39
|
"after" => config.param("after", :array, default: []),
|
@@ -56,6 +57,7 @@ module Embulk
|
|
56
57
|
|
57
58
|
@proc_store ||= {}
|
58
59
|
@row_proc_store ||= {}
|
60
|
+
@page_proc_store ||= {}
|
59
61
|
@skip_row_proc_store ||= {}
|
60
62
|
transaction_id = rand(100000000)
|
61
63
|
until !@proc_store.has_key?(transaction_id)
|
@@ -85,6 +87,13 @@ module Embulk
|
|
85
87
|
eval(File.read(rowdef["proc_file"]), evaluator_binding, File.expand_path(rowdef["proc_file"]))
|
86
88
|
end
|
87
89
|
}.compact
|
90
|
+
@page_proc_store[transaction_id] = page_procs = task["pages"].map {|page|
|
91
|
+
if page["proc"]
|
92
|
+
eval(page["proc"], evaluator_binding)
|
93
|
+
else
|
94
|
+
eval(File.read(page["proc_file"]), evaluator_binding, File.expand_path(page["proc_file"]))
|
95
|
+
end
|
96
|
+
}.compact
|
88
97
|
@skip_row_proc_store[transaction_id] = skip_row_procs = task["skip_rows"].map {|rowdef|
|
89
98
|
if rowdef["proc"]
|
90
99
|
eval(rowdef["proc"], evaluator_binding)
|
@@ -93,7 +102,9 @@ module Embulk
|
|
93
102
|
end
|
94
103
|
}.compact
|
95
104
|
task["transaction_id"] = transaction_id
|
96
|
-
|
105
|
+
if procs.empty? && row_procs.empty? && page_procs.empty? && skip_row_procs.empty?
|
106
|
+
raise "Need columns or rows or pages parameter"
|
107
|
+
end
|
97
108
|
|
98
109
|
before_procs.each do |pr|
|
99
110
|
pr.call
|
@@ -122,6 +133,10 @@ module Embulk
|
|
122
133
|
@row_proc_store
|
123
134
|
end
|
124
135
|
|
136
|
+
def self.page_proc_store
|
137
|
+
@page_proc_store
|
138
|
+
end
|
139
|
+
|
125
140
|
def self.skip_row_proc_store
|
126
141
|
@skip_row_proc_store
|
127
142
|
end
|
@@ -146,19 +161,31 @@ module Embulk
|
|
146
161
|
}.compact
|
147
162
|
end
|
148
163
|
|
164
|
+
def self.parse_page_procs(pages, evaluator_binding)
|
165
|
+
pages.map {|page|
|
166
|
+
if page["proc"]
|
167
|
+
eval(page["proc"], evaluator_binding)
|
168
|
+
else
|
169
|
+
eval(File.read(page["proc_file"]), evaluator_binding, File.expand_path(page["proc_file"]))
|
170
|
+
end
|
171
|
+
}.compact
|
172
|
+
end
|
173
|
+
|
149
174
|
def init
|
150
175
|
task["requires"].each do |lib|
|
151
176
|
require lib
|
152
177
|
end
|
153
178
|
|
154
|
-
if self.class.proc_store.nil? || self.class.row_proc_store.nil? || self.class.skip_row_proc_store.nil?
|
179
|
+
if self.class.proc_store.nil? || self.class.row_proc_store.nil? || self.class.page_proc_store.nil? || self.class.skip_row_proc_store.nil?
|
155
180
|
evaluator_binding = Evaluator.new(task["variables"]).get_binding
|
156
181
|
@procs = self.class.parse_col_procs(task["columns"], evaluator_binding)
|
157
182
|
@row_procs = self.class.parse_row_procs(task["rows"], evaluator_binding)
|
183
|
+
@page_procs = self.class.parse_page_procs(task["pages"], evaluator_binding)
|
158
184
|
@skip_row_procs = self.class.parse_row_procs(task["skip_rows"], evaluator_binding)
|
159
185
|
else
|
160
186
|
@procs = self.class.proc_store[task["transaction_id"]]
|
161
187
|
@row_procs = self.class.row_proc_store[task["transaction_id"]]
|
188
|
+
@page_procs = self.class.page_proc_store[task["transaction_id"]]
|
162
189
|
@skip_row_procs = self.class.skip_row_proc_store[task["transaction_id"]]
|
163
190
|
end
|
164
191
|
@skip_nils = Hash[task["columns"].map {|col|
|
@@ -170,6 +197,7 @@ module Embulk
|
|
170
197
|
end
|
171
198
|
|
172
199
|
def add(page)
|
200
|
+
proc_records = []
|
173
201
|
page.each do |record|
|
174
202
|
if row_procs.empty?
|
175
203
|
record_hashes = [hashrize(record)]
|
@@ -207,10 +235,21 @@ module Embulk
|
|
207
235
|
record_hash[col] = pr.call(record_hash[col], record_hash)
|
208
236
|
end
|
209
237
|
end
|
210
|
-
|
238
|
+
if page_procs.empty?
|
239
|
+
page_builder.add(record_hash.values)
|
240
|
+
else
|
241
|
+
proc_records << record_hash
|
242
|
+
end
|
211
243
|
end
|
212
244
|
end
|
213
245
|
end
|
246
|
+
|
247
|
+
unless page_procs.empty?
|
248
|
+
page_procs.each do |pr|
|
249
|
+
result = pr.call(proc_records)
|
250
|
+
result.each { |record| page_builder.add(record.values) }
|
251
|
+
end
|
252
|
+
end
|
214
253
|
end
|
215
254
|
|
216
255
|
def finish
|
@@ -231,6 +270,10 @@ module Embulk
|
|
231
270
|
@row_procs
|
232
271
|
end
|
233
272
|
|
273
|
+
def page_procs
|
274
|
+
@page_procs
|
275
|
+
end
|
276
|
+
|
234
277
|
def skip_row_procs
|
235
278
|
@skip_row_procs
|
236
279
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-ruby_proc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joker1007
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-08-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: embulk
|
@@ -60,7 +60,6 @@ extensions: []
|
|
60
60
|
extra_rdoc_files: []
|
61
61
|
files:
|
62
62
|
- ".gitignore"
|
63
|
-
- ".ruby-version"
|
64
63
|
- Gemfile
|
65
64
|
- LICENSE.txt
|
66
65
|
- README.md
|
@@ -89,8 +88,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
89
88
|
- !ruby/object:Gem::Version
|
90
89
|
version: '0'
|
91
90
|
requirements: []
|
92
|
-
|
93
|
-
rubygems_version: 2.7.4
|
91
|
+
rubygems_version: 3.0.3
|
94
92
|
signing_key:
|
95
93
|
specification_version: 4
|
96
94
|
summary: Ruby Proc filter plugin for Embulk
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
jruby-9.0.4.0
|