embulk-filter-ruby_proc 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +17 -2
- data/embulk-filter-ruby_proc.gemspec +1 -1
- data/example/config.yml +17 -2
- data/lib/embulk/filter/ruby_proc.rb +44 -9
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7745c13afc58b5b1e5257cf7f1025fc3292b8103
|
4
|
+
data.tar.gz: fc8310448ff1cadba48dfb3cf59b64048749fc70
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9ca41e0fff89bb94f82f1c5035682ee48203ccae6e6493bd8867f6da3eaef1580618a15e442776ad21280d9cf96c8e8e5c5381a0ac1fbe811fb843702b38ca28
|
7
|
+
data.tar.gz: 8ba158fad7a3a5570bfdb3ad5186381cec5cf4a86eb927c022989e59a1e782fb5efa835ddc5d6470160b16edc28d1612402ad06ae2ce2be3019507b994422680
|
data/README.md
CHANGED
@@ -34,6 +34,18 @@ filters:
|
|
34
34
|
- cgi
|
35
35
|
variables:
|
36
36
|
multiply: 3
|
37
|
+
before:
|
38
|
+
- proc: |
|
39
|
+
-> do
|
40
|
+
puts "before proc"
|
41
|
+
@started_at = Time.now
|
42
|
+
end
|
43
|
+
after:
|
44
|
+
- proc: |
|
45
|
+
-> do
|
46
|
+
puts "after proc"
|
47
|
+
p Time.now - @started_at
|
48
|
+
end
|
37
49
|
rows:
|
38
50
|
- proc: |
|
39
51
|
->(record) do
|
@@ -72,8 +84,11 @@ filters:
|
|
72
84
|
end
|
73
85
|
```
|
74
86
|
|
75
|
-
|
76
|
-
|
87
|
+
- `before` and `after` is executed at once
|
88
|
+
- procs is evaluated on same binding (instance of Evaluator class)
|
89
|
+
- instance variable is shared
|
90
|
+
- rows proc must return record hash or array of record hash.
|
91
|
+
- user must take care of object identity. Otherwise, error may be occurred when plugin applys column procs.
|
77
92
|
|
78
93
|
### preview
|
79
94
|
```
|
data/example/config.yml
CHANGED
@@ -23,10 +23,20 @@ in:
|
|
23
23
|
|
24
24
|
filters:
|
25
25
|
- type: ruby_proc
|
26
|
-
requires:
|
27
|
-
- cgi
|
28
26
|
variables:
|
29
27
|
multiply: 3
|
28
|
+
before:
|
29
|
+
- proc: |
|
30
|
+
-> do
|
31
|
+
puts "before proc"
|
32
|
+
@started_at = Time.now
|
33
|
+
end
|
34
|
+
after:
|
35
|
+
- proc: |
|
36
|
+
-> do
|
37
|
+
puts "after proc"
|
38
|
+
p Time.now - @started_at
|
39
|
+
end
|
30
40
|
rows:
|
31
41
|
- proc: |
|
32
42
|
->(record) do
|
@@ -47,6 +57,11 @@ filters:
|
|
47
57
|
id * variables["multiply"]
|
48
58
|
end
|
49
59
|
type: string
|
60
|
+
|
61
|
+
- type: ruby_proc
|
62
|
+
requires:
|
63
|
+
- cgi
|
64
|
+
columns:
|
50
65
|
- name: comment
|
51
66
|
proc_file: comment_upcase.rb
|
52
67
|
skip_nil: false
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'thread'
|
2
|
+
require 'securerandom'
|
2
3
|
|
3
4
|
module Embulk
|
4
5
|
module Filter
|
@@ -32,6 +33,8 @@ module Embulk
|
|
32
33
|
task = {
|
33
34
|
"columns" => config.param("columns", :array, default: []),
|
34
35
|
"rows" => config.param("rows", :array, default: []),
|
36
|
+
"before" => config.param("before", :array, default: []),
|
37
|
+
"after" => config.param("after", :array, default: []),
|
35
38
|
"requires" => config.param("requires", :array, default: []),
|
36
39
|
"variables" => config.param("variables", :hash, default: {}),
|
37
40
|
}
|
@@ -50,37 +53,69 @@ module Embulk
|
|
50
53
|
require lib
|
51
54
|
end
|
52
55
|
|
56
|
+
@proc_store ||= {}
|
57
|
+
@row_proc_store ||= {}
|
58
|
+
transaction_id = rand(100000000)
|
59
|
+
until !@proc_store.has_key?(transaction_id)
|
60
|
+
transaction_id = rand(100000000)
|
61
|
+
end
|
53
62
|
evaluator_binding = Evaluator.new(task["variables"]).get_binding
|
54
63
|
|
55
64
|
# In order to avoid multithread probrem, initialize procs here
|
56
|
-
|
65
|
+
before_procs = task["before"].map {|before|
|
66
|
+
if before["proc"]
|
67
|
+
eval(before["proc"], evaluator_binding)
|
68
|
+
else
|
69
|
+
eval(File.read(before["proc_file"]), evaluator_binding, File.expand_path(before["proc_file"]))
|
70
|
+
end
|
71
|
+
}
|
72
|
+
@proc_store[transaction_id] = procs = Hash[task["columns"].map {|col|
|
57
73
|
if col["proc"]
|
58
74
|
[col["name"], eval(col["proc"], evaluator_binding)]
|
59
75
|
else
|
60
76
|
[col["name"], eval(File.read(col["proc_file"]), evaluator_binding, File.expand_path(col["proc_file"]))]
|
61
77
|
end
|
62
78
|
}]
|
63
|
-
@row_procs = task["rows"].map {|rowdef|
|
79
|
+
@row_proc_store[transaction_id] = row_procs = task["rows"].map {|rowdef|
|
64
80
|
if rowdef["proc"]
|
65
81
|
eval(rowdef["proc"], evaluator_binding)
|
66
82
|
else
|
67
83
|
eval(File.read(rowdef["proc_file"]), evaluator_binding, File.expand_path(rowdef["proc_file"]))
|
68
84
|
end
|
69
85
|
}.compact
|
70
|
-
|
86
|
+
task["transaction_id"] = transaction_id
|
87
|
+
raise "Need columns or rows parameter" if procs.empty? && row_procs.empty?
|
88
|
+
|
89
|
+
before_procs.each do |pr|
|
90
|
+
pr.call
|
91
|
+
end
|
71
92
|
|
72
93
|
yield(task, out_columns)
|
94
|
+
|
95
|
+
after_procs = task["after"].map {|after|
|
96
|
+
if after["proc"]
|
97
|
+
eval(after["proc"], evaluator_binding)
|
98
|
+
else
|
99
|
+
eval(File.read(after["proc_file"]), evaluator_binding, File.expand_path(after["proc_file"]))
|
100
|
+
end
|
101
|
+
}
|
102
|
+
|
103
|
+
after_procs.each do |pr|
|
104
|
+
pr.call
|
105
|
+
end
|
73
106
|
end
|
74
107
|
|
75
|
-
def self.
|
76
|
-
@
|
108
|
+
def self.proc_store
|
109
|
+
@proc_store
|
77
110
|
end
|
78
111
|
|
79
|
-
def self.
|
80
|
-
@
|
112
|
+
def self.row_proc_store
|
113
|
+
@row_proc_store
|
81
114
|
end
|
82
115
|
|
83
116
|
def init
|
117
|
+
@procs = self.class.proc_store[task["transaction_id"]]
|
118
|
+
@row_procs = self.class.row_proc_store[task["transaction_id"]]
|
84
119
|
@skip_nils = Hash[task["columns"].map {|col|
|
85
120
|
[col["name"], col["skip_nil"].nil? ? true : !!col["skip_nil"]]
|
86
121
|
}]
|
@@ -136,11 +171,11 @@ module Embulk
|
|
136
171
|
end
|
137
172
|
|
138
173
|
def procs
|
139
|
-
|
174
|
+
@procs
|
140
175
|
end
|
141
176
|
|
142
177
|
def row_procs
|
143
|
-
|
178
|
+
@row_procs
|
144
179
|
end
|
145
180
|
|
146
181
|
def skip_nils
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-ruby_proc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joker1007
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-04-
|
11
|
+
date: 2016-04-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: embulk
|