embulk-filter-ruby_proc 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bdb10f169cbbee39d9e79d590ba36cd7bc43e739
4
- data.tar.gz: 9d734ba55ecd29f87afcb340c968b6d066b308d1
3
+ metadata.gz: 7745c13afc58b5b1e5257cf7f1025fc3292b8103
4
+ data.tar.gz: fc8310448ff1cadba48dfb3cf59b64048749fc70
5
5
  SHA512:
6
- metadata.gz: 2778a56657b6b19743ec75021c0a9dbeee32015dd10b315a85d7ac669acfef0218b045cb17c7e60303edb6c54d7737f2329acb936d2bed5e4651778f8e6e1d2d
7
- data.tar.gz: 6f97934d1f9e6c6f656aa0739790f58258db028857b84f254007a0ba2e538bcc896c4ec8eec3229699971130d273501ee79f5747300d895373ee79e43bd9b1ad
6
+ metadata.gz: 9ca41e0fff89bb94f82f1c5035682ee48203ccae6e6493bd8867f6da3eaef1580618a15e442776ad21280d9cf96c8e8e5c5381a0ac1fbe811fb843702b38ca28
7
+ data.tar.gz: 8ba158fad7a3a5570bfdb3ad5186381cec5cf4a86eb927c022989e59a1e782fb5efa835ddc5d6470160b16edc28d1612402ad06ae2ce2be3019507b994422680
data/README.md CHANGED
@@ -34,6 +34,18 @@ filters:
34
34
  - cgi
35
35
  variables:
36
36
  multiply: 3
37
+ before:
38
+ - proc: |
39
+ -> do
40
+ puts "before proc"
41
+ @started_at = Time.now
42
+ end
43
+ after:
44
+ - proc: |
45
+ -> do
46
+ puts "after proc"
47
+ p Time.now - @started_at
48
+ end
37
49
  rows:
38
50
  - proc: |
39
51
  ->(record) do
@@ -72,8 +84,11 @@ filters:
72
84
  end
73
85
  ```
74
86
 
75
- rows proc must return array of record hash.
76
- And user must take care of object identity. Otherwise, error may be occurred when plugin applys column procs.
87
+ - `before` and `after` is executed at once
88
+ - procs is evaluated on same binding (instance of Evaluator class)
89
+ - instance variable is shared
90
+ - rows proc must return record hash or array of record hash.
91
+ - user must take care of object identity. Otherwise, error may be occurred when plugin applys column procs.
77
92
 
78
93
  ### preview
79
94
  ```
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-filter-ruby_proc"
4
- spec.version = "0.5.0"
4
+ spec.version = "0.6.0"
5
5
  spec.authors = ["joker1007"]
6
6
  spec.summary = "Ruby Proc filter plugin for Embulk"
7
7
  spec.description = "Filter each record by ruby proc"
data/example/config.yml CHANGED
@@ -23,10 +23,20 @@ in:
23
23
 
24
24
  filters:
25
25
  - type: ruby_proc
26
- requires:
27
- - cgi
28
26
  variables:
29
27
  multiply: 3
28
+ before:
29
+ - proc: |
30
+ -> do
31
+ puts "before proc"
32
+ @started_at = Time.now
33
+ end
34
+ after:
35
+ - proc: |
36
+ -> do
37
+ puts "after proc"
38
+ p Time.now - @started_at
39
+ end
30
40
  rows:
31
41
  - proc: |
32
42
  ->(record) do
@@ -47,6 +57,11 @@ filters:
47
57
  id * variables["multiply"]
48
58
  end
49
59
  type: string
60
+
61
+ - type: ruby_proc
62
+ requires:
63
+ - cgi
64
+ columns:
50
65
  - name: comment
51
66
  proc_file: comment_upcase.rb
52
67
  skip_nil: false
@@ -1,4 +1,5 @@
1
1
  require 'thread'
2
+ require 'securerandom'
2
3
 
3
4
  module Embulk
4
5
  module Filter
@@ -32,6 +33,8 @@ module Embulk
32
33
  task = {
33
34
  "columns" => config.param("columns", :array, default: []),
34
35
  "rows" => config.param("rows", :array, default: []),
36
+ "before" => config.param("before", :array, default: []),
37
+ "after" => config.param("after", :array, default: []),
35
38
  "requires" => config.param("requires", :array, default: []),
36
39
  "variables" => config.param("variables", :hash, default: {}),
37
40
  }
@@ -50,37 +53,69 @@ module Embulk
50
53
  require lib
51
54
  end
52
55
 
56
+ @proc_store ||= {}
57
+ @row_proc_store ||= {}
58
+ transaction_id = rand(100000000)
59
+ until !@proc_store.has_key?(transaction_id)
60
+ transaction_id = rand(100000000)
61
+ end
53
62
  evaluator_binding = Evaluator.new(task["variables"]).get_binding
54
63
 
55
64
  # In order to avoid multithread probrem, initialize procs here
56
- @procs = Hash[task["columns"].map {|col|
65
+ before_procs = task["before"].map {|before|
66
+ if before["proc"]
67
+ eval(before["proc"], evaluator_binding)
68
+ else
69
+ eval(File.read(before["proc_file"]), evaluator_binding, File.expand_path(before["proc_file"]))
70
+ end
71
+ }
72
+ @proc_store[transaction_id] = procs = Hash[task["columns"].map {|col|
57
73
  if col["proc"]
58
74
  [col["name"], eval(col["proc"], evaluator_binding)]
59
75
  else
60
76
  [col["name"], eval(File.read(col["proc_file"]), evaluator_binding, File.expand_path(col["proc_file"]))]
61
77
  end
62
78
  }]
63
- @row_procs = task["rows"].map {|rowdef|
79
+ @row_proc_store[transaction_id] = row_procs = task["rows"].map {|rowdef|
64
80
  if rowdef["proc"]
65
81
  eval(rowdef["proc"], evaluator_binding)
66
82
  else
67
83
  eval(File.read(rowdef["proc_file"]), evaluator_binding, File.expand_path(rowdef["proc_file"]))
68
84
  end
69
85
  }.compact
70
- raise "Need columns or rows parameter" if @row_procs.empty? && @procs.empty?
86
+ task["transaction_id"] = transaction_id
87
+ raise "Need columns or rows parameter" if procs.empty? && row_procs.empty?
88
+
89
+ before_procs.each do |pr|
90
+ pr.call
91
+ end
71
92
 
72
93
  yield(task, out_columns)
94
+
95
+ after_procs = task["after"].map {|after|
96
+ if after["proc"]
97
+ eval(after["proc"], evaluator_binding)
98
+ else
99
+ eval(File.read(after["proc_file"]), evaluator_binding, File.expand_path(after["proc_file"]))
100
+ end
101
+ }
102
+
103
+ after_procs.each do |pr|
104
+ pr.call
105
+ end
73
106
  end
74
107
 
75
- def self.procs
76
- @procs
108
+ def self.proc_store
109
+ @proc_store
77
110
  end
78
111
 
79
- def self.row_procs
80
- @row_procs
112
+ def self.row_proc_store
113
+ @row_proc_store
81
114
  end
82
115
 
83
116
  def init
117
+ @procs = self.class.proc_store[task["transaction_id"]]
118
+ @row_procs = self.class.row_proc_store[task["transaction_id"]]
84
119
  @skip_nils = Hash[task["columns"].map {|col|
85
120
  [col["name"], col["skip_nil"].nil? ? true : !!col["skip_nil"]]
86
121
  }]
@@ -136,11 +171,11 @@ module Embulk
136
171
  end
137
172
 
138
173
  def procs
139
- self.class.procs
174
+ @procs
140
175
  end
141
176
 
142
177
  def row_procs
143
- self.class.row_procs
178
+ @row_procs
144
179
  end
145
180
 
146
181
  def skip_nils
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-ruby_proc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-14 00:00:00.000000000 Z
11
+ date: 2016-04-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: embulk