embulk-filter-ruby_proc 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -1
- data/embulk-filter-ruby_proc.gemspec +1 -1
- data/example/config.yml +5 -1
- data/lib/embulk/filter/ruby_proc.rb +30 -10
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b9a7bffd90b4924602a9fc0378f77b781ab05376
|
4
|
+
data.tar.gz: 48134bc5631972efc8286573543dbfb9d312abab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c708ef11682d2b028b4f8adb70b41cb56b25ed63ef6575ffed9e7e4ab09d221d33e43eccde221923582924a342e57a4fea1e9cb4260838f98b9fc4e0dbf22647
|
7
|
+
data.tar.gz: 583880f5a19829ed75ac4c11ef34c55ae12c5a1291f2a414890d9ce4f7a4e9de7c78a627e96d12a0807b5f5bd2f01a93112e37819c3d65f0b9c1f3c484a78ded
|
data/README.md
CHANGED
@@ -32,6 +32,11 @@ filters:
|
|
32
32
|
- type: ruby_proc
|
33
33
|
requires:
|
34
34
|
- cgi
|
35
|
+
rows:
|
36
|
+
- proc: |
|
37
|
+
->(record) do
|
38
|
+
[record.dup, record.dup.tap { |r| r["id"] += 10 }]
|
39
|
+
end
|
35
40
|
columns:
|
36
41
|
- name: data
|
37
42
|
proc: |
|
@@ -51,7 +56,6 @@ filters:
|
|
51
56
|
proc_file: comment_upcase.rb
|
52
57
|
skip_nil: false
|
53
58
|
type: json
|
54
|
-
target: events
|
55
59
|
|
56
60
|
# ...
|
57
61
|
|
@@ -66,15 +70,22 @@ filters:
|
|
66
70
|
end
|
67
71
|
```
|
68
72
|
|
73
|
+
rows proc must return array of record hash.
|
74
|
+
And user must take care of object identity. Otherwise, error may be occurred when plugin applys column procs.
|
75
|
+
|
69
76
|
### preview
|
70
77
|
```
|
71
78
|
+-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
|
72
79
|
| id:string | account:long | time:timestamp | purchase:timestamp | comment:json | data:json |
|
73
80
|
+-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
|
74
81
|
| 2 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK"] | {"events":[{"id":1,"name":"Name1","idx":0},{"id":2,"name":"Name2","idx":1}],"foo":"bar"} |
|
82
|
+
| 22 | 32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK"] | {"events":[{"id":1,"name":"Name1","idx":0},{"id":2,"name":"Name2","idx":1}],"foo":"bar"} |
|
75
83
|
| 4 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK","JRUBY"] | |
|
84
|
+
| 24 | 14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC | ["EMBULK","JRUBY"] | |
|
76
85
|
| 6 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | ["EMBULK","%22CSV%22","PARSER","PLUGIN"] | |
|
86
|
+
| 26 | 27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | ["EMBULK","%22CSV%22","PARSER","PLUGIN"] | |
|
77
87
|
| 8 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | ["11270"] | |
|
88
|
+
| 28 | 11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC | ["11270"] | |
|
78
89
|
+-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
|
79
90
|
```
|
80
91
|
|
data/example/config.yml
CHANGED
@@ -25,6 +25,11 @@ filters:
|
|
25
25
|
- type: ruby_proc
|
26
26
|
requires:
|
27
27
|
- cgi
|
28
|
+
rows:
|
29
|
+
- proc: |
|
30
|
+
->(record) do
|
31
|
+
[record.dup, record.dup.tap { |r| r["id"] += 10 }]
|
32
|
+
end
|
28
33
|
columns:
|
29
34
|
- name: data
|
30
35
|
proc: |
|
@@ -44,7 +49,6 @@ filters:
|
|
44
49
|
proc_file: comment_upcase.rb
|
45
50
|
skip_nil: false
|
46
51
|
type: json
|
47
|
-
target: events
|
48
52
|
|
49
53
|
out:
|
50
54
|
type: file
|
@@ -6,7 +6,8 @@ module Embulk
|
|
6
6
|
|
7
7
|
def self.transaction(config, in_schema, &control)
|
8
8
|
task = {
|
9
|
-
"columns" => config.param("columns", :array),
|
9
|
+
"columns" => config.param("columns", :array, default: []),
|
10
|
+
"rows" => config.param("rows", :array, default: []),
|
10
11
|
"requires" => config.param("requires", :array, default: []),
|
11
12
|
}
|
12
13
|
|
@@ -27,6 +28,7 @@ module Embulk
|
|
27
28
|
task["requires"].each do |lib|
|
28
29
|
require lib
|
29
30
|
end
|
31
|
+
|
30
32
|
@procs = Hash[task["columns"].map {|col|
|
31
33
|
if col["proc"]
|
32
34
|
[col["name"], eval(col["proc"])]
|
@@ -34,6 +36,15 @@ module Embulk
|
|
34
36
|
[col["name"], eval(File.read(col["proc_file"]), binding, File.expand_path(col["proc_file"]))]
|
35
37
|
end
|
36
38
|
}]
|
39
|
+
@row_procs = task["rows"].map {|rowdef|
|
40
|
+
if rowdef["proc"]
|
41
|
+
eval(rowdef["proc"])
|
42
|
+
else
|
43
|
+
eval(File.read(rowdef["proc_file"]), binding, File.expand_path(rowdef["proc_file"]))
|
44
|
+
end
|
45
|
+
}.compact
|
46
|
+
raise "Need columns or rows parameter" if @row_procs.empty? && @procs.empty?
|
47
|
+
|
37
48
|
@skip_nils = Hash[task["columns"].map {|col|
|
38
49
|
[col["name"], col["skip_nil"].nil? ? true : !!col["skip_nil"]]
|
39
50
|
}]
|
@@ -44,18 +55,27 @@ module Embulk
|
|
44
55
|
|
45
56
|
def add(page)
|
46
57
|
page.each do |record|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
58
|
+
if @row_procs.empty?
|
59
|
+
record_hashes = [hashrize(record)]
|
60
|
+
else
|
61
|
+
record_hashes = @row_procs.flat_map do |pr|
|
62
|
+
pr.call(hashrize(record))
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
record_hashes.each do |record_hash|
|
67
|
+
@procs.each do |col, pr|
|
68
|
+
next unless record_hash.has_key?(col)
|
69
|
+
next if record_hash[col].nil? && @skip_nils[col]
|
51
70
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
71
|
+
if pr.arity == 1
|
72
|
+
record_hash[col] = pr.call(record_hash[col])
|
73
|
+
else
|
74
|
+
record_hash[col] = pr.call(record_hash[col], record_hash)
|
75
|
+
end
|
56
76
|
end
|
77
|
+
page_builder.add(record_hash.values)
|
57
78
|
end
|
58
|
-
page_builder.add(record_hash.values)
|
59
79
|
end
|
60
80
|
end
|
61
81
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-ruby_proc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joker1007
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-03-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: embulk
|