embulk-filter-ruby_proc 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4abe5f3c208a186c9d25e8a8996cd9c7b210b9c2
4
- data.tar.gz: eb284df63dabdc64a7068ab98bba99ea60b51139
3
+ metadata.gz: b9a7bffd90b4924602a9fc0378f77b781ab05376
4
+ data.tar.gz: 48134bc5631972efc8286573543dbfb9d312abab
5
5
  SHA512:
6
- metadata.gz: e616ad4b49d0d5b05b336ff66bbf522ed18240973c1e192315b71db0096d37d88f1cabb31b6f621de471a6973faffcccf1e8dbf6020bcadaec3e8b16111e618c
7
- data.tar.gz: 5f8e9e54f3ee58debbf2fd9a2b45a0e73e7b4b1014c4fef25bdb9ab684a90522cdd50c34628f754d553ea762fe713da2b1dcdb613b41bd1c78013505f9126850
6
+ metadata.gz: c708ef11682d2b028b4f8adb70b41cb56b25ed63ef6575ffed9e7e4ab09d221d33e43eccde221923582924a342e57a4fea1e9cb4260838f98b9fc4e0dbf22647
7
+ data.tar.gz: 583880f5a19829ed75ac4c11ef34c55ae12c5a1291f2a414890d9ce4f7a4e9de7c78a627e96d12a0807b5f5bd2f01a93112e37819c3d65f0b9c1f3c484a78ded
data/README.md CHANGED
@@ -32,6 +32,11 @@ filters:
32
32
  - type: ruby_proc
33
33
  requires:
34
34
  - cgi
35
+ rows:
36
+ - proc: |
37
+ ->(record) do
38
+ [record.dup, record.dup.tap { |r| r["id"] += 10 }]
39
+ end
35
40
  columns:
36
41
  - name: data
37
42
  proc: |
@@ -51,7 +56,6 @@ filters:
51
56
  proc_file: comment_upcase.rb
52
57
  skip_nil: false
53
58
  type: json
54
- target: events
55
59
 
56
60
  # ...
57
61
 
@@ -66,15 +70,22 @@ filters:
66
70
  end
67
71
  ```
68
72
 
73
+ rows proc must return array of record hash.
74
+ And user must take care of object identity. Otherwise, error may be occurred when plugin applys column procs.
75
+
69
76
  ### preview
70
77
  ```
71
78
  +-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
72
79
  | id:string | account:long | time:timestamp | purchase:timestamp | comment:json | data:json |
73
80
  +-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
74
81
  | 2 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK"] | {"events":[{"id":1,"name":"Name1","idx":0},{"id":2,"name":"Name2","idx":1}],"foo":"bar"} |
82
+ | 22 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK"] | {"events":[{"id":1,"name":"Name1","idx":0},{"id":2,"name":"Name2","idx":1}],"foo":"bar"} |
75
83
  | 4 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK","JRUBY"] | |
84
+ | 24 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK","JRUBY"] | |
76
85
  | 6 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | ["EMBULK","%22CSV%22","PARSER","PLUGIN"] | |
86
+ | 26 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | ["EMBULK","%22CSV%22","PARSER","PLUGIN"] | |
77
87
  | 8 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | ["11270"] | |
88
+ | 28 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | ["11270"] | |
78
89
  +-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
79
90
  ```
80
91
 
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-filter-ruby_proc"
4
- spec.version = "0.2.0"
4
+ spec.version = "0.3.0"
5
5
  spec.authors = ["joker1007"]
6
6
  spec.summary = "Ruby Proc filter plugin for Embulk"
7
7
  spec.description = "Filter each record by ruby proc"
data/example/config.yml CHANGED
@@ -25,6 +25,11 @@ filters:
25
25
  - type: ruby_proc
26
26
  requires:
27
27
  - cgi
28
+ rows:
29
+ - proc: |
30
+ ->(record) do
31
+ [record.dup, record.dup.tap { |r| r["id"] += 10 }]
32
+ end
28
33
  columns:
29
34
  - name: data
30
35
  proc: |
@@ -44,7 +49,6 @@ filters:
44
49
  proc_file: comment_upcase.rb
45
50
  skip_nil: false
46
51
  type: json
47
- target: events
48
52
 
49
53
  out:
50
54
  type: file
@@ -6,7 +6,8 @@ module Embulk
6
6
 
7
7
  def self.transaction(config, in_schema, &control)
8
8
  task = {
9
- "columns" => config.param("columns", :array),
9
+ "columns" => config.param("columns", :array, default: []),
10
+ "rows" => config.param("rows", :array, default: []),
10
11
  "requires" => config.param("requires", :array, default: []),
11
12
  }
12
13
 
@@ -27,6 +28,7 @@ module Embulk
27
28
  task["requires"].each do |lib|
28
29
  require lib
29
30
  end
31
+
30
32
  @procs = Hash[task["columns"].map {|col|
31
33
  if col["proc"]
32
34
  [col["name"], eval(col["proc"])]
@@ -34,6 +36,15 @@ module Embulk
34
36
  [col["name"], eval(File.read(col["proc_file"]), binding, File.expand_path(col["proc_file"]))]
35
37
  end
36
38
  }]
39
+ @row_procs = task["rows"].map {|rowdef|
40
+ if rowdef["proc"]
41
+ eval(rowdef["proc"])
42
+ else
43
+ eval(File.read(rowdef["proc_file"]), binding, File.expand_path(rowdef["proc_file"]))
44
+ end
45
+ }.compact
46
+ raise "Need columns or rows parameter" if @row_procs.empty? && @procs.empty?
47
+
37
48
  @skip_nils = Hash[task["columns"].map {|col|
38
49
  [col["name"], col["skip_nil"].nil? ? true : !!col["skip_nil"]]
39
50
  }]
@@ -44,18 +55,27 @@ module Embulk
44
55
 
45
56
  def add(page)
46
57
  page.each do |record|
47
- record_hash = hashrize(record)
48
- @procs.each do |col, pr|
49
- next unless record_hash.has_key?(col)
50
- next if record_hash[col].nil? && @skip_nils[col]
58
+ if @row_procs.empty?
59
+ record_hashes = [hashrize(record)]
60
+ else
61
+ record_hashes = @row_procs.flat_map do |pr|
62
+ pr.call(hashrize(record))
63
+ end
64
+ end
65
+
66
+ record_hashes.each do |record_hash|
67
+ @procs.each do |col, pr|
68
+ next unless record_hash.has_key?(col)
69
+ next if record_hash[col].nil? && @skip_nils[col]
51
70
 
52
- if pr.arity == 1
53
- record_hash[col] = pr.call(record_hash[col])
54
- else
55
- record_hash[col] = pr.call(record_hash[col], record_hash)
71
+ if pr.arity == 1
72
+ record_hash[col] = pr.call(record_hash[col])
73
+ else
74
+ record_hash[col] = pr.call(record_hash[col], record_hash)
75
+ end
56
76
  end
77
+ page_builder.add(record_hash.values)
57
78
  end
58
- page_builder.add(record_hash.values)
59
79
  end
60
80
  end
61
81
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-ruby_proc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-18 00:00:00.000000000 Z
11
+ date: 2016-03-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: embulk