embulk-filter-ruby_proc 0.4.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/embulk-filter-ruby_proc.gemspec +1 -1
- data/lib/embulk/filter/ruby_proc.rb +51 -17
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bdb10f169cbbee39d9e79d590ba36cd7bc43e739
|
4
|
+
data.tar.gz: 9d734ba55ecd29f87afcb340c968b6d066b308d1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2778a56657b6b19743ec75021c0a9dbeee32015dd10b315a85d7ac669acfef0218b045cb17c7e60303edb6c54d7737f2329acb936d2bed5e4651778f8e6e1d2d
|
7
|
+
data.tar.gz: 6f97934d1f9e6c6f656aa0739790f58258db028857b84f254007a0ba2e538bcc896c4ec8eec3229699971130d273501ee79f5747300d895373ee79e43bd9b1ad
|
@@ -1,18 +1,31 @@
|
|
1
|
+
require 'thread'
|
2
|
+
|
1
3
|
module Embulk
|
2
4
|
module Filter
|
3
5
|
|
4
|
-
class
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
class RubyProc < FilterPlugin
|
7
|
+
|
8
|
+
class Evaluator
|
9
|
+
attr_reader :variables
|
10
|
+
@mutex = Mutex.new
|
11
|
+
|
12
|
+
def self.mutex
|
13
|
+
@mutex
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize(variables)
|
17
|
+
@variables = variables
|
18
|
+
end
|
9
19
|
|
10
|
-
|
11
|
-
|
20
|
+
def get_binding
|
21
|
+
binding
|
22
|
+
end
|
23
|
+
|
24
|
+
def mutex
|
25
|
+
self.class.mutex
|
26
|
+
end
|
12
27
|
end
|
13
|
-
end
|
14
28
|
|
15
|
-
class RubyProc < FilterPlugin
|
16
29
|
Plugin.register_filter("ruby_proc", self)
|
17
30
|
|
18
31
|
def self.transaction(config, in_schema, &control)
|
@@ -33,16 +46,13 @@ module Embulk
|
|
33
46
|
end
|
34
47
|
end
|
35
48
|
|
36
|
-
yield(task, out_columns)
|
37
|
-
end
|
38
|
-
|
39
|
-
def init
|
40
49
|
task["requires"].each do |lib|
|
41
50
|
require lib
|
42
51
|
end
|
43
52
|
|
44
53
|
evaluator_binding = Evaluator.new(task["variables"]).get_binding
|
45
54
|
|
55
|
+
# In order to avoid multithread probrem, initialize procs here
|
46
56
|
@procs = Hash[task["columns"].map {|col|
|
47
57
|
if col["proc"]
|
48
58
|
[col["name"], eval(col["proc"], evaluator_binding)]
|
@@ -59,6 +69,18 @@ module Embulk
|
|
59
69
|
}.compact
|
60
70
|
raise "Need columns or rows parameter" if @row_procs.empty? && @procs.empty?
|
61
71
|
|
72
|
+
yield(task, out_columns)
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.procs
|
76
|
+
@procs
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.row_procs
|
80
|
+
@row_procs
|
81
|
+
end
|
82
|
+
|
83
|
+
def init
|
62
84
|
@skip_nils = Hash[task["columns"].map {|col|
|
63
85
|
[col["name"], col["skip_nil"].nil? ? true : !!col["skip_nil"]]
|
64
86
|
}]
|
@@ -69,10 +91,10 @@ module Embulk
|
|
69
91
|
|
70
92
|
def add(page)
|
71
93
|
page.each do |record|
|
72
|
-
if
|
94
|
+
if row_procs.empty?
|
73
95
|
record_hashes = [hashrize(record)]
|
74
96
|
else
|
75
|
-
record_hashes =
|
97
|
+
record_hashes = row_procs.each_with_object([]) do |pr, arr|
|
76
98
|
result = pr.call(hashrize(record))
|
77
99
|
case result
|
78
100
|
when Array
|
@@ -88,9 +110,9 @@ module Embulk
|
|
88
110
|
end
|
89
111
|
|
90
112
|
record_hashes.each do |record_hash|
|
91
|
-
|
113
|
+
procs.each do |col, pr|
|
92
114
|
next unless record_hash.has_key?(col)
|
93
|
-
next if record_hash[col].nil? &&
|
115
|
+
next if record_hash[col].nil? && skip_nils[col]
|
94
116
|
|
95
117
|
if pr.arity == 1
|
96
118
|
record_hash[col] = pr.call(record_hash[col])
|
@@ -112,6 +134,18 @@ module Embulk
|
|
112
134
|
def hashrize(record)
|
113
135
|
Hash[in_schema.names.zip(record)]
|
114
136
|
end
|
137
|
+
|
138
|
+
def procs
|
139
|
+
self.class.procs
|
140
|
+
end
|
141
|
+
|
142
|
+
def row_procs
|
143
|
+
self.class.row_procs
|
144
|
+
end
|
145
|
+
|
146
|
+
def skip_nils
|
147
|
+
@skip_nils
|
148
|
+
end
|
115
149
|
end
|
116
150
|
|
117
151
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-ruby_proc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joker1007
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-04-
|
11
|
+
date: 2016-04-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: embulk
|