embulk-filter-ruby_proc 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +17 -2
- data/embulk-filter-ruby_proc.gemspec +1 -1
- data/example/config.yml +17 -2
- data/lib/embulk/filter/ruby_proc.rb +44 -9
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7745c13afc58b5b1e5257cf7f1025fc3292b8103
|
4
|
+
data.tar.gz: fc8310448ff1cadba48dfb3cf59b64048749fc70
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9ca41e0fff89bb94f82f1c5035682ee48203ccae6e6493bd8867f6da3eaef1580618a15e442776ad21280d9cf96c8e8e5c5381a0ac1fbe811fb843702b38ca28
|
7
|
+
data.tar.gz: 8ba158fad7a3a5570bfdb3ad5186381cec5cf4a86eb927c022989e59a1e782fb5efa835ddc5d6470160b16edc28d1612402ad06ae2ce2be3019507b994422680
|
data/README.md
CHANGED
@@ -34,6 +34,18 @@ filters:
|
|
34
34
|
- cgi
|
35
35
|
variables:
|
36
36
|
multiply: 3
|
37
|
+
before:
|
38
|
+
- proc: |
|
39
|
+
-> do
|
40
|
+
puts "before proc"
|
41
|
+
@started_at = Time.now
|
42
|
+
end
|
43
|
+
after:
|
44
|
+
- proc: |
|
45
|
+
-> do
|
46
|
+
puts "after proc"
|
47
|
+
p Time.now - @started_at
|
48
|
+
end
|
37
49
|
rows:
|
38
50
|
- proc: |
|
39
51
|
->(record) do
|
@@ -72,8 +84,11 @@ filters:
|
|
72
84
|
end
|
73
85
|
```
|
74
86
|
|
75
|
-
|
76
|
-
|
87
|
+
- `before` and `after` is executed at once
|
88
|
+
- procs is evaluated on same binding (instance of Evaluator class)
|
89
|
+
- instance variable is shared
|
90
|
+
- rows proc must return record hash or array of record hash.
|
91
|
+
- user must take care of object identity. Otherwise, error may be occurred when plugin applys column procs.
|
77
92
|
|
78
93
|
### preview
|
79
94
|
```
|
data/example/config.yml
CHANGED
@@ -23,10 +23,20 @@ in:
|
|
23
23
|
|
24
24
|
filters:
|
25
25
|
- type: ruby_proc
|
26
|
-
requires:
|
27
|
-
- cgi
|
28
26
|
variables:
|
29
27
|
multiply: 3
|
28
|
+
before:
|
29
|
+
- proc: |
|
30
|
+
-> do
|
31
|
+
puts "before proc"
|
32
|
+
@started_at = Time.now
|
33
|
+
end
|
34
|
+
after:
|
35
|
+
- proc: |
|
36
|
+
-> do
|
37
|
+
puts "after proc"
|
38
|
+
p Time.now - @started_at
|
39
|
+
end
|
30
40
|
rows:
|
31
41
|
- proc: |
|
32
42
|
->(record) do
|
@@ -47,6 +57,11 @@ filters:
|
|
47
57
|
id * variables["multiply"]
|
48
58
|
end
|
49
59
|
type: string
|
60
|
+
|
61
|
+
- type: ruby_proc
|
62
|
+
requires:
|
63
|
+
- cgi
|
64
|
+
columns:
|
50
65
|
- name: comment
|
51
66
|
proc_file: comment_upcase.rb
|
52
67
|
skip_nil: false
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'thread'
|
2
|
+
require 'securerandom'
|
2
3
|
|
3
4
|
module Embulk
|
4
5
|
module Filter
|
@@ -32,6 +33,8 @@ module Embulk
|
|
32
33
|
task = {
|
33
34
|
"columns" => config.param("columns", :array, default: []),
|
34
35
|
"rows" => config.param("rows", :array, default: []),
|
36
|
+
"before" => config.param("before", :array, default: []),
|
37
|
+
"after" => config.param("after", :array, default: []),
|
35
38
|
"requires" => config.param("requires", :array, default: []),
|
36
39
|
"variables" => config.param("variables", :hash, default: {}),
|
37
40
|
}
|
@@ -50,37 +53,69 @@ module Embulk
|
|
50
53
|
require lib
|
51
54
|
end
|
52
55
|
|
56
|
+
@proc_store ||= {}
|
57
|
+
@row_proc_store ||= {}
|
58
|
+
transaction_id = rand(100000000)
|
59
|
+
until !@proc_store.has_key?(transaction_id)
|
60
|
+
transaction_id = rand(100000000)
|
61
|
+
end
|
53
62
|
evaluator_binding = Evaluator.new(task["variables"]).get_binding
|
54
63
|
|
55
64
|
# In order to avoid multithread probrem, initialize procs here
|
56
|
-
|
65
|
+
before_procs = task["before"].map {|before|
|
66
|
+
if before["proc"]
|
67
|
+
eval(before["proc"], evaluator_binding)
|
68
|
+
else
|
69
|
+
eval(File.read(before["proc_file"]), evaluator_binding, File.expand_path(before["proc_file"]))
|
70
|
+
end
|
71
|
+
}
|
72
|
+
@proc_store[transaction_id] = procs = Hash[task["columns"].map {|col|
|
57
73
|
if col["proc"]
|
58
74
|
[col["name"], eval(col["proc"], evaluator_binding)]
|
59
75
|
else
|
60
76
|
[col["name"], eval(File.read(col["proc_file"]), evaluator_binding, File.expand_path(col["proc_file"]))]
|
61
77
|
end
|
62
78
|
}]
|
63
|
-
@row_procs = task["rows"].map {|rowdef|
|
79
|
+
@row_proc_store[transaction_id] = row_procs = task["rows"].map {|rowdef|
|
64
80
|
if rowdef["proc"]
|
65
81
|
eval(rowdef["proc"], evaluator_binding)
|
66
82
|
else
|
67
83
|
eval(File.read(rowdef["proc_file"]), evaluator_binding, File.expand_path(rowdef["proc_file"]))
|
68
84
|
end
|
69
85
|
}.compact
|
70
|
-
|
86
|
+
task["transaction_id"] = transaction_id
|
87
|
+
raise "Need columns or rows parameter" if procs.empty? && row_procs.empty?
|
88
|
+
|
89
|
+
before_procs.each do |pr|
|
90
|
+
pr.call
|
91
|
+
end
|
71
92
|
|
72
93
|
yield(task, out_columns)
|
94
|
+
|
95
|
+
after_procs = task["after"].map {|after|
|
96
|
+
if after["proc"]
|
97
|
+
eval(after["proc"], evaluator_binding)
|
98
|
+
else
|
99
|
+
eval(File.read(after["proc_file"]), evaluator_binding, File.expand_path(after["proc_file"]))
|
100
|
+
end
|
101
|
+
}
|
102
|
+
|
103
|
+
after_procs.each do |pr|
|
104
|
+
pr.call
|
105
|
+
end
|
73
106
|
end
|
74
107
|
|
75
|
-
def self.
|
76
|
-
@
|
108
|
+
def self.proc_store
|
109
|
+
@proc_store
|
77
110
|
end
|
78
111
|
|
79
|
-
def self.
|
80
|
-
@
|
112
|
+
def self.row_proc_store
|
113
|
+
@row_proc_store
|
81
114
|
end
|
82
115
|
|
83
116
|
def init
|
117
|
+
@procs = self.class.proc_store[task["transaction_id"]]
|
118
|
+
@row_procs = self.class.row_proc_store[task["transaction_id"]]
|
84
119
|
@skip_nils = Hash[task["columns"].map {|col|
|
85
120
|
[col["name"], col["skip_nil"].nil? ? true : !!col["skip_nil"]]
|
86
121
|
}]
|
@@ -136,11 +171,11 @@ module Embulk
|
|
136
171
|
end
|
137
172
|
|
138
173
|
def procs
|
139
|
-
|
174
|
+
@procs
|
140
175
|
end
|
141
176
|
|
142
177
|
def row_procs
|
143
|
-
|
178
|
+
@row_procs
|
144
179
|
end
|
145
180
|
|
146
181
|
def skip_nils
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-ruby_proc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joker1007
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-04-
|
11
|
+
date: 2016-04-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: embulk
|