embulk-filter-ruby_proc 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bdb10f169cbbee39d9e79d590ba36cd7bc43e739
4
- data.tar.gz: 9d734ba55ecd29f87afcb340c968b6d066b308d1
3
+ metadata.gz: 7745c13afc58b5b1e5257cf7f1025fc3292b8103
4
+ data.tar.gz: fc8310448ff1cadba48dfb3cf59b64048749fc70
5
5
  SHA512:
6
- metadata.gz: 2778a56657b6b19743ec75021c0a9dbeee32015dd10b315a85d7ac669acfef0218b045cb17c7e60303edb6c54d7737f2329acb936d2bed5e4651778f8e6e1d2d
7
- data.tar.gz: 6f97934d1f9e6c6f656aa0739790f58258db028857b84f254007a0ba2e538bcc896c4ec8eec3229699971130d273501ee79f5747300d895373ee79e43bd9b1ad
6
+ metadata.gz: 9ca41e0fff89bb94f82f1c5035682ee48203ccae6e6493bd8867f6da3eaef1580618a15e442776ad21280d9cf96c8e8e5c5381a0ac1fbe811fb843702b38ca28
7
+ data.tar.gz: 8ba158fad7a3a5570bfdb3ad5186381cec5cf4a86eb927c022989e59a1e782fb5efa835ddc5d6470160b16edc28d1612402ad06ae2ce2be3019507b994422680
data/README.md CHANGED
@@ -34,6 +34,18 @@ filters:
34
34
  - cgi
35
35
  variables:
36
36
  multiply: 3
37
+ before:
38
+ - proc: |
39
+ -> do
40
+ puts "before proc"
41
+ @started_at = Time.now
42
+ end
43
+ after:
44
+ - proc: |
45
+ -> do
46
+ puts "after proc"
47
+ p Time.now - @started_at
48
+ end
37
49
  rows:
38
50
  - proc: |
39
51
  ->(record) do
@@ -72,8 +84,11 @@ filters:
72
84
  end
73
85
  ```
74
86
 
75
- rows proc must return array of record hash.
76
- And user must take care of object identity. Otherwise, error may be occurred when plugin applys column procs.
87
+ - `before` and `after` is executed at once
88
+ - procs is evaluated on same binding (instance of Evaluator class)
89
+ - instance variable is shared
90
+ - rows proc must return record hash or array of record hash.
91
+ - user must take care of object identity. Otherwise, error may be occurred when plugin applys column procs.
77
92
 
78
93
  ### preview
79
94
  ```
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-filter-ruby_proc"
4
- spec.version = "0.5.0"
4
+ spec.version = "0.6.0"
5
5
  spec.authors = ["joker1007"]
6
6
  spec.summary = "Ruby Proc filter plugin for Embulk"
7
7
  spec.description = "Filter each record by ruby proc"
data/example/config.yml CHANGED
@@ -23,10 +23,20 @@ in:
23
23
 
24
24
  filters:
25
25
  - type: ruby_proc
26
- requires:
27
- - cgi
28
26
  variables:
29
27
  multiply: 3
28
+ before:
29
+ - proc: |
30
+ -> do
31
+ puts "before proc"
32
+ @started_at = Time.now
33
+ end
34
+ after:
35
+ - proc: |
36
+ -> do
37
+ puts "after proc"
38
+ p Time.now - @started_at
39
+ end
30
40
  rows:
31
41
  - proc: |
32
42
  ->(record) do
@@ -47,6 +57,11 @@ filters:
47
57
  id * variables["multiply"]
48
58
  end
49
59
  type: string
60
+
61
+ - type: ruby_proc
62
+ requires:
63
+ - cgi
64
+ columns:
50
65
  - name: comment
51
66
  proc_file: comment_upcase.rb
52
67
  skip_nil: false
@@ -1,4 +1,5 @@
1
1
  require 'thread'
2
+ require 'securerandom'
2
3
 
3
4
  module Embulk
4
5
  module Filter
@@ -32,6 +33,8 @@ module Embulk
32
33
  task = {
33
34
  "columns" => config.param("columns", :array, default: []),
34
35
  "rows" => config.param("rows", :array, default: []),
36
+ "before" => config.param("before", :array, default: []),
37
+ "after" => config.param("after", :array, default: []),
35
38
  "requires" => config.param("requires", :array, default: []),
36
39
  "variables" => config.param("variables", :hash, default: {}),
37
40
  }
@@ -50,37 +53,69 @@ module Embulk
50
53
  require lib
51
54
  end
52
55
 
56
+ @proc_store ||= {}
57
+ @row_proc_store ||= {}
58
+ transaction_id = rand(100000000)
59
+ until !@proc_store.has_key?(transaction_id)
60
+ transaction_id = rand(100000000)
61
+ end
53
62
  evaluator_binding = Evaluator.new(task["variables"]).get_binding
54
63
 
55
64
  # In order to avoid multithread probrem, initialize procs here
56
- @procs = Hash[task["columns"].map {|col|
65
+ before_procs = task["before"].map {|before|
66
+ if before["proc"]
67
+ eval(before["proc"], evaluator_binding)
68
+ else
69
+ eval(File.read(before["proc_file"]), evaluator_binding, File.expand_path(before["proc_file"]))
70
+ end
71
+ }
72
+ @proc_store[transaction_id] = procs = Hash[task["columns"].map {|col|
57
73
  if col["proc"]
58
74
  [col["name"], eval(col["proc"], evaluator_binding)]
59
75
  else
60
76
  [col["name"], eval(File.read(col["proc_file"]), evaluator_binding, File.expand_path(col["proc_file"]))]
61
77
  end
62
78
  }]
63
- @row_procs = task["rows"].map {|rowdef|
79
+ @row_proc_store[transaction_id] = row_procs = task["rows"].map {|rowdef|
64
80
  if rowdef["proc"]
65
81
  eval(rowdef["proc"], evaluator_binding)
66
82
  else
67
83
  eval(File.read(rowdef["proc_file"]), evaluator_binding, File.expand_path(rowdef["proc_file"]))
68
84
  end
69
85
  }.compact
70
- raise "Need columns or rows parameter" if @row_procs.empty? && @procs.empty?
86
+ task["transaction_id"] = transaction_id
87
+ raise "Need columns or rows parameter" if procs.empty? && row_procs.empty?
88
+
89
+ before_procs.each do |pr|
90
+ pr.call
91
+ end
71
92
 
72
93
  yield(task, out_columns)
94
+
95
+ after_procs = task["after"].map {|after|
96
+ if after["proc"]
97
+ eval(after["proc"], evaluator_binding)
98
+ else
99
+ eval(File.read(after["proc_file"]), evaluator_binding, File.expand_path(after["proc_file"]))
100
+ end
101
+ }
102
+
103
+ after_procs.each do |pr|
104
+ pr.call
105
+ end
73
106
  end
74
107
 
75
- def self.procs
76
- @procs
108
+ def self.proc_store
109
+ @proc_store
77
110
  end
78
111
 
79
- def self.row_procs
80
- @row_procs
112
+ def self.row_proc_store
113
+ @row_proc_store
81
114
  end
82
115
 
83
116
  def init
117
+ @procs = self.class.proc_store[task["transaction_id"]]
118
+ @row_procs = self.class.row_proc_store[task["transaction_id"]]
84
119
  @skip_nils = Hash[task["columns"].map {|col|
85
120
  [col["name"], col["skip_nil"].nil? ? true : !!col["skip_nil"]]
86
121
  }]
@@ -136,11 +171,11 @@ module Embulk
136
171
  end
137
172
 
138
173
  def procs
139
- self.class.procs
174
+ @procs
140
175
  end
141
176
 
142
177
  def row_procs
143
- self.class.row_procs
178
+ @row_procs
144
179
  end
145
180
 
146
181
  def skip_nils
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-ruby_proc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-14 00:00:00.000000000 Z
11
+ date: 2016-04-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: embulk