embulk-filter-ruby_proc 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4abe5f3c208a186c9d25e8a8996cd9c7b210b9c2
4
- data.tar.gz: eb284df63dabdc64a7068ab98bba99ea60b51139
3
+ metadata.gz: b9a7bffd90b4924602a9fc0378f77b781ab05376
4
+ data.tar.gz: 48134bc5631972efc8286573543dbfb9d312abab
5
5
  SHA512:
6
- metadata.gz: e616ad4b49d0d5b05b336ff66bbf522ed18240973c1e192315b71db0096d37d88f1cabb31b6f621de471a6973faffcccf1e8dbf6020bcadaec3e8b16111e618c
7
- data.tar.gz: 5f8e9e54f3ee58debbf2fd9a2b45a0e73e7b4b1014c4fef25bdb9ab684a90522cdd50c34628f754d553ea762fe713da2b1dcdb613b41bd1c78013505f9126850
6
+ metadata.gz: c708ef11682d2b028b4f8adb70b41cb56b25ed63ef6575ffed9e7e4ab09d221d33e43eccde221923582924a342e57a4fea1e9cb4260838f98b9fc4e0dbf22647
7
+ data.tar.gz: 583880f5a19829ed75ac4c11ef34c55ae12c5a1291f2a414890d9ce4f7a4e9de7c78a627e96d12a0807b5f5bd2f01a93112e37819c3d65f0b9c1f3c484a78ded
data/README.md CHANGED
@@ -32,6 +32,11 @@ filters:
32
32
  - type: ruby_proc
33
33
  requires:
34
34
  - cgi
35
+ rows:
36
+ - proc: |
37
+ ->(record) do
38
+ [record.dup, record.dup.tap { |r| r["id"] += 10 }]
39
+ end
35
40
  columns:
36
41
  - name: data
37
42
  proc: |
@@ -51,7 +56,6 @@ filters:
51
56
  proc_file: comment_upcase.rb
52
57
  skip_nil: false
53
58
  type: json
54
- target: events
55
59
 
56
60
  # ...
57
61
 
@@ -66,15 +70,22 @@ filters:
66
70
  end
67
71
  ```
68
72
 
73
+ rows proc must return array of record hash.
74
+ And user must take care of object identity. Otherwise, error may be occurred when plugin applys column procs.
75
+
69
76
  ### preview
70
77
  ```
71
78
  +-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
72
79
  | id:string | account:long | time:timestamp | purchase:timestamp | comment:json | data:json |
73
80
  +-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
74
81
  | 2 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK"] | {"events":[{"id":1,"name":"Name1","idx":0},{"id":2,"name":"Name2","idx":1}],"foo":"bar"} |
82
+ | 22 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK"] | {"events":[{"id":1,"name":"Name1","idx":0},{"id":2,"name":"Name2","idx":1}],"foo":"bar"} |
75
83
  | 4 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK","JRUBY"] | |
84
+ | 24 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK","JRUBY"] | |
76
85
  | 6 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | ["EMBULK","%22CSV%22","PARSER","PLUGIN"] | |
86
+ | 26 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | ["EMBULK","%22CSV%22","PARSER","PLUGIN"] | |
77
87
  | 8 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | ["11270"] | |
88
+ | 28 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | ["11270"] | |
78
89
  +-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
79
90
  ```
80
91
 
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-filter-ruby_proc"
4
- spec.version = "0.2.0"
4
+ spec.version = "0.3.0"
5
5
  spec.authors = ["joker1007"]
6
6
  spec.summary = "Ruby Proc filter plugin for Embulk"
7
7
  spec.description = "Filter each record by ruby proc"
data/example/config.yml CHANGED
@@ -25,6 +25,11 @@ filters:
25
25
  - type: ruby_proc
26
26
  requires:
27
27
  - cgi
28
+ rows:
29
+ - proc: |
30
+ ->(record) do
31
+ [record.dup, record.dup.tap { |r| r["id"] += 10 }]
32
+ end
28
33
  columns:
29
34
  - name: data
30
35
  proc: |
@@ -44,7 +49,6 @@ filters:
44
49
  proc_file: comment_upcase.rb
45
50
  skip_nil: false
46
51
  type: json
47
- target: events
48
52
 
49
53
  out:
50
54
  type: file
@@ -6,7 +6,8 @@ module Embulk
6
6
 
7
7
  def self.transaction(config, in_schema, &control)
8
8
  task = {
9
- "columns" => config.param("columns", :array),
9
+ "columns" => config.param("columns", :array, default: []),
10
+ "rows" => config.param("rows", :array, default: []),
10
11
  "requires" => config.param("requires", :array, default: []),
11
12
  }
12
13
 
@@ -27,6 +28,7 @@ module Embulk
27
28
  task["requires"].each do |lib|
28
29
  require lib
29
30
  end
31
+
30
32
  @procs = Hash[task["columns"].map {|col|
31
33
  if col["proc"]
32
34
  [col["name"], eval(col["proc"])]
@@ -34,6 +36,15 @@ module Embulk
34
36
  [col["name"], eval(File.read(col["proc_file"]), binding, File.expand_path(col["proc_file"]))]
35
37
  end
36
38
  }]
39
+ @row_procs = task["rows"].map {|rowdef|
40
+ if rowdef["proc"]
41
+ eval(rowdef["proc"])
42
+ else
43
+ eval(File.read(rowdef["proc_file"]), binding, File.expand_path(rowdef["proc_file"]))
44
+ end
45
+ }.compact
46
+ raise "Need columns or rows parameter" if @row_procs.empty? && @procs.empty?
47
+
37
48
  @skip_nils = Hash[task["columns"].map {|col|
38
49
  [col["name"], col["skip_nil"].nil? ? true : !!col["skip_nil"]]
39
50
  }]
@@ -44,18 +55,27 @@ module Embulk
44
55
 
45
56
  def add(page)
46
57
  page.each do |record|
47
- record_hash = hashrize(record)
48
- @procs.each do |col, pr|
49
- next unless record_hash.has_key?(col)
50
- next if record_hash[col].nil? && @skip_nils[col]
58
+ if @row_procs.empty?
59
+ record_hashes = [hashrize(record)]
60
+ else
61
+ record_hashes = @row_procs.flat_map do |pr|
62
+ pr.call(hashrize(record))
63
+ end
64
+ end
65
+
66
+ record_hashes.each do |record_hash|
67
+ @procs.each do |col, pr|
68
+ next unless record_hash.has_key?(col)
69
+ next if record_hash[col].nil? && @skip_nils[col]
51
70
 
52
- if pr.arity == 1
53
- record_hash[col] = pr.call(record_hash[col])
54
- else
55
- record_hash[col] = pr.call(record_hash[col], record_hash)
71
+ if pr.arity == 1
72
+ record_hash[col] = pr.call(record_hash[col])
73
+ else
74
+ record_hash[col] = pr.call(record_hash[col], record_hash)
75
+ end
56
76
  end
77
+ page_builder.add(record_hash.values)
57
78
  end
58
- page_builder.add(record_hash.values)
59
79
  end
60
80
  end
61
81
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-ruby_proc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-18 00:00:00.000000000 Z
11
+ date: 2016-03-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: embulk