kiba 0.6.0 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/Changes.md +7 -1
- data/README.md +3 -1
- data/lib/kiba/context.rb +3 -8
- data/lib/kiba/runner.rb +45 -27
- data/lib/kiba/version.rb +1 -1
- data/test/support/test_source_that_reads_at_instantiation_time.rb +8 -0
- data/test/test_cli.rb +1 -1
- data/test/test_integration.rb +23 -3
- data/test/test_parser.rb +3 -3
- data/test/test_runner.rb +24 -8
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b46106e516df027f47b160c8a015ef86379c3994
|
4
|
+
data.tar.gz: da5ecf0dd661b1dc4960bf4bfc149df9be4c3a89
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 04d98ac5c1b9d2bed003930cbf1f70230b9639758b09562f0e4cdb69a5fb566bb2fcae7f392617f3dae44d929d405eb915db0982ba7a5388c505ce1295c91f40
|
7
|
+
data.tar.gz: a561008267ef7b2b436095187592aaa811aaec788453d7f6edfb129950a9e9e058862ea22b812dd7caecffc10a257cbf4c069f6f7e524ef74541257ff637bb9b
|
data/.travis.yml
CHANGED
data/Changes.md
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
Unreleased
|
2
2
|
----------
|
3
3
|
|
4
|
+
0.6.1
|
5
|
+
-----
|
6
|
+
|
7
|
+
- Bugfix: make sure files generated by `pre_process` can safely be read from constructor of sources [#16]. Thanks @bcherrington for catching this.
|
8
|
+
- Internal refactoring of processing engine (should not affect regular use)
|
9
|
+
|
4
10
|
0.6.0
|
5
11
|
-----
|
6
12
|
|
@@ -9,4 +15,4 @@ Unreleased
|
|
9
15
|
0.5.0
|
10
16
|
-----
|
11
17
|
|
12
|
-
- Initial release
|
18
|
+
- Initial release
|
data/README.md
CHANGED
@@ -6,6 +6,8 @@ Learn more on the [Kiba blog](http://thibautbarrere.com):
|
|
6
6
|
|
7
7
|
* [Rubyists - are you doing ETL unknowningly?](http://thibautbarrere.com/2015/03/25/rubyists-are-you-doing-etl-unknowingly/)
|
8
8
|
* [How to write solid data processing code](http://thibautbarrere.com/2015/04/05/how-to-write-solid-data-processing-code/)
|
9
|
+
* [How to reformat CSV files with Kiba](http://thibautbarrere.com/2015/06/04/how-to-reformat-csv-files-with-kiba/) (in-depth, hands-on tutorial)
|
10
|
+
* [How to explode multivalued attributes with Kiba ETL?](http://thibautbarrere.com/2015/06/25/how-to-explode-multivalued-attributes-with-kiba/)
|
9
11
|
|
10
12
|
[![Gem Version](https://badge.fury.io/rb/kiba.svg)](http://badge.fury.io/rb/kiba)
|
11
13
|
[![Build Status](https://travis-ci.org/thbar/kiba.svg?branch=master)](https://travis-ci.org/thbar/kiba) [![Code Climate](https://codeclimate.com/github/thbar/kiba/badges/gpa.svg)](https://codeclimate.com/github/thbar/kiba) [![Dependency Status](https://gemnasium.com/thbar/kiba.svg)](https://gemnasium.com/thbar/kiba)
|
@@ -254,7 +256,7 @@ Make sure to subscribe to my [Ruby ETL blog](http://thibautbarrere.com) where I'
|
|
254
256
|
|
255
257
|
## Supported Ruby versions
|
256
258
|
|
257
|
-
Kiba currently supports Ruby 2.0
|
259
|
+
Kiba currently supports Ruby 2.0+, JRuby (with its default 1.9 syntax) and Rubinius (see [test matrix](https://travis-ci.org/thbar/kiba)).
|
258
260
|
|
259
261
|
## History & Credits
|
260
262
|
|
data/lib/kiba/context.rb
CHANGED
@@ -1,12 +1,11 @@
|
|
1
1
|
module Kiba
|
2
2
|
class Context
|
3
3
|
def initialize(control)
|
4
|
-
# TODO: forbid access to control from context? use cleanroom?
|
5
4
|
@control = control
|
6
5
|
end
|
7
6
|
|
8
7
|
def pre_process(&block)
|
9
|
-
@control.pre_processes << block
|
8
|
+
@control.pre_processes << { block: block }
|
10
9
|
end
|
11
10
|
|
12
11
|
def source(klass, *initialization_params)
|
@@ -14,11 +13,7 @@ module Kiba
|
|
14
13
|
end
|
15
14
|
|
16
15
|
def transform(klass = nil, *initialization_params, &block)
|
17
|
-
|
18
|
-
@control.transforms << { klass: klass, args: initialization_params }
|
19
|
-
else
|
20
|
-
@control.transforms << block
|
21
|
-
end
|
16
|
+
@control.transforms << { klass: klass, args: initialization_params, block: block }
|
22
17
|
end
|
23
18
|
|
24
19
|
def destination(klass, *initialization_params)
|
@@ -26,7 +21,7 @@ module Kiba
|
|
26
21
|
end
|
27
22
|
|
28
23
|
def post_process(&block)
|
29
|
-
@control.post_processes << block
|
24
|
+
@control.post_processes << { block: block }
|
30
25
|
end
|
31
26
|
end
|
32
27
|
end
|
data/lib/kiba/runner.rb
CHANGED
@@ -1,30 +1,38 @@
|
|
1
1
|
module Kiba
|
2
2
|
module Runner
|
3
|
+
# allow to handle a block form just like a regular transform
|
4
|
+
class AliasingProc < Proc
|
5
|
+
alias_method :process, :call
|
6
|
+
end
|
7
|
+
|
3
8
|
def run(control)
|
4
|
-
#
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
9
|
+
# TODO: add a dry-run (not instantiating mode) to_instances call
|
10
|
+
# that will validate the job definition from a syntax pov before
|
11
|
+
# going any further. This could be shared with the parser.
|
12
|
+
run_pre_processes(control)
|
13
|
+
process_rows(
|
14
|
+
to_instances(control.sources),
|
15
|
+
to_instances(control.transforms, true),
|
16
|
+
to_instances(control.destinations)
|
17
|
+
)
|
18
|
+
# TODO: when I add post processes as class, I'll have to add a test to
|
19
|
+
# make sure instantiation occurs after the main processing is done (#16)
|
20
|
+
run_post_processes(control)
|
21
|
+
end
|
10
22
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
23
|
+
def run_pre_processes(control)
|
24
|
+
to_instances(control.pre_processes, true, false).each(&:call)
|
25
|
+
end
|
26
|
+
|
27
|
+
def run_post_processes(control)
|
28
|
+
to_instances(control.post_processes, true, false).each(&:call)
|
15
29
|
end
|
16
|
-
|
30
|
+
|
17
31
|
def process_rows(sources, transforms, destinations)
|
18
32
|
sources.each do |source|
|
19
33
|
source.each do |row|
|
20
34
|
transforms.each do |transform|
|
21
|
-
|
22
|
-
# and aliasing `process` to `call`. Benchmark needed first though.
|
23
|
-
if transform.is_a?(Proc)
|
24
|
-
row = transform.call(row)
|
25
|
-
else
|
26
|
-
row = transform.process(row)
|
27
|
-
end
|
35
|
+
row = transform.process(row)
|
28
36
|
break unless row
|
29
37
|
end
|
30
38
|
next unless row
|
@@ -33,19 +41,29 @@ module Kiba
|
|
33
41
|
end
|
34
42
|
end
|
35
43
|
end
|
44
|
+
destinations.each(&:close)
|
36
45
|
end
|
37
46
|
|
38
47
|
# not using keyword args because JRuby defaults to 1.9 syntax currently
|
39
48
|
def to_instances(definitions, allow_block = false, allow_class = true)
|
40
|
-
definitions.map do |
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
+
definitions.map do |definition|
|
50
|
+
to_instance(
|
51
|
+
*definition.values_at(:klass, :args, :block),
|
52
|
+
allow_block, allow_class
|
53
|
+
)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def to_instance(klass, args, block, allow_block, allow_class)
|
58
|
+
if klass
|
59
|
+
fail 'Class form is not allowed here' unless allow_class
|
60
|
+
klass.new(*args)
|
61
|
+
elsif block
|
62
|
+
fail 'Block form is not allowed here' unless allow_block
|
63
|
+
AliasingProc.new(&block)
|
64
|
+
else
|
65
|
+
# TODO: support block passing to a class form definition?
|
66
|
+
fail 'Class and block form cannot be used together at the moment'
|
49
67
|
end
|
50
68
|
end
|
51
69
|
end
|
data/lib/kiba/version.rb
CHANGED
data/test/test_cli.rb
CHANGED
@@ -11,7 +11,7 @@ class TestCli < Kiba::Test
|
|
11
11
|
Kiba::Cli.run([fixture('bogus.etl')])
|
12
12
|
end
|
13
13
|
|
14
|
-
assert_match(/uninitialized constant
|
14
|
+
assert_match(/uninitialized constant(.*)UnknownThing/, exception.message)
|
15
15
|
assert_includes exception.backtrace.to_s, 'test/fixtures/bogus.etl:2:in'
|
16
16
|
end
|
17
17
|
end
|
data/test/test_integration.rb
CHANGED
@@ -3,6 +3,8 @@ require_relative 'helper'
|
|
3
3
|
require_relative 'support/test_csv_source'
|
4
4
|
require_relative 'support/test_csv_destination'
|
5
5
|
require_relative 'support/test_rename_field_transform'
|
6
|
+
require_relative 'support/test_enumerable_source'
|
7
|
+
require_relative 'support/test_source_that_reads_at_instantiation_time'
|
6
8
|
|
7
9
|
# End-to-end tests go here
|
8
10
|
class TestIntegration < Kiba::Test
|
@@ -19,13 +21,17 @@ Patrick,McWire,M
|
|
19
21
|
CSV
|
20
22
|
end
|
21
23
|
|
24
|
+
def clean
|
25
|
+
remove_files(*Dir['test/tmp/*.csv'])
|
26
|
+
end
|
27
|
+
|
22
28
|
def setup
|
23
|
-
|
29
|
+
clean
|
24
30
|
IO.write(input_file, sample_csv_data)
|
25
31
|
end
|
26
32
|
|
27
33
|
def teardown
|
28
|
-
|
34
|
+
clean
|
29
35
|
end
|
30
36
|
|
31
37
|
def test_csv_to_csv
|
@@ -71,7 +77,7 @@ CSV
|
|
71
77
|
|
72
78
|
# assign a first value at parsing time
|
73
79
|
count = 0
|
74
|
-
|
80
|
+
|
75
81
|
pre_process do
|
76
82
|
# then change it from there (run time)
|
77
83
|
count += 100
|
@@ -93,4 +99,18 @@ CSV
|
|
93
99
|
|
94
100
|
assert_equal 'Count is now 103', message
|
95
101
|
end
|
102
|
+
|
103
|
+
def test_file_created_by_pre_process_can_be_read_by_source_at_instantiation_time
|
104
|
+
remove_files('test/tmp/eager.csv')
|
105
|
+
|
106
|
+
control = Kiba.parse do
|
107
|
+
pre_process do
|
108
|
+
IO.write('test/tmp/eager.csv', 'something')
|
109
|
+
end
|
110
|
+
|
111
|
+
source SourceThatReadsAtInstantionTime, 'test/tmp/eager.csv'
|
112
|
+
end
|
113
|
+
|
114
|
+
Kiba.run(control)
|
115
|
+
end
|
96
116
|
end
|
data/test/test_parser.rb
CHANGED
@@ -20,7 +20,7 @@ class TestParser < Kiba::Test
|
|
20
20
|
transform { |row| row }
|
21
21
|
end
|
22
22
|
|
23
|
-
assert_instance_of Proc, control.transforms[0]
|
23
|
+
assert_instance_of Proc, control.transforms[0][:block]
|
24
24
|
end
|
25
25
|
|
26
26
|
def test_class_transform_definition
|
@@ -46,7 +46,7 @@ class TestParser < Kiba::Test
|
|
46
46
|
post_process {}
|
47
47
|
end
|
48
48
|
|
49
|
-
assert_instance_of Proc, control.post_processes[0]
|
49
|
+
assert_instance_of Proc, control.post_processes[0][:block]
|
50
50
|
end
|
51
51
|
|
52
52
|
def test_block_pre_process_definition
|
@@ -54,7 +54,7 @@ class TestParser < Kiba::Test
|
|
54
54
|
pre_process {}
|
55
55
|
end
|
56
56
|
|
57
|
-
assert_instance_of Proc, control.pre_processes[0]
|
57
|
+
assert_instance_of Proc, control.pre_processes[0][:block]
|
58
58
|
end
|
59
59
|
|
60
60
|
def test_source_as_string_parsing
|
data/test/test_runner.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
require_relative 'helper'
|
2
|
-
|
2
|
+
require 'minitest/mock'
|
3
3
|
require_relative 'support/test_enumerable_source'
|
4
4
|
|
5
5
|
class TestRunner < Kiba::Test
|
@@ -22,14 +22,14 @@ class TestRunner < Kiba::Test
|
|
22
22
|
|
23
23
|
def test_block_transform_processing
|
24
24
|
# is there a better way to assert a block was called in minitest?
|
25
|
-
control.transforms << lambda { |r| @called = true; r }
|
25
|
+
control.transforms << { block: lambda { |r| @called = true; r } }
|
26
26
|
Kiba.run(control)
|
27
27
|
assert_equal true, @called
|
28
28
|
end
|
29
29
|
|
30
30
|
def test_dismissed_row_not_passed_to_next_transform
|
31
|
-
control.transforms << lambda { |_| nil }
|
32
|
-
control.transforms << lambda { |_| @called = true; nil }
|
31
|
+
control.transforms << { block: lambda { |_| nil } }
|
32
|
+
control.transforms << { block: lambda { |_| @called = true; nil } }
|
33
33
|
Kiba.run(control)
|
34
34
|
assert_nil @called
|
35
35
|
end
|
@@ -37,14 +37,14 @@ class TestRunner < Kiba::Test
|
|
37
37
|
def test_post_process_runs_once
|
38
38
|
assert_equal 2, rows.size
|
39
39
|
@called = 0
|
40
|
-
control.post_processes << lambda { @called += 1 }
|
40
|
+
control.post_processes << { block: lambda { @called += 1 } }
|
41
41
|
Kiba.run(control)
|
42
42
|
assert_equal 1, @called
|
43
43
|
end
|
44
44
|
|
45
45
|
def test_post_process_not_called_after_row_failure
|
46
|
-
control.transforms << lambda { |_| fail 'FAIL' }
|
47
|
-
control.post_processes << lambda { @called = true }
|
46
|
+
control.transforms << { block: lambda { |_| fail 'FAIL' } }
|
47
|
+
control.post_processes << { block: lambda { @called = true } }
|
48
48
|
assert_raises(RuntimeError, 'FAIL') { Kiba.run(control) }
|
49
49
|
assert_nil @called
|
50
50
|
end
|
@@ -52,8 +52,24 @@ class TestRunner < Kiba::Test
|
|
52
52
|
def test_pre_process_runs_once
|
53
53
|
assert_equal 2, rows.size
|
54
54
|
@called = 0
|
55
|
-
control.pre_processes << lambda { @called += 1 }
|
55
|
+
control.pre_processes << { block: lambda { @called += 1 } }
|
56
56
|
Kiba.run(control)
|
57
57
|
assert_equal 1, @called
|
58
58
|
end
|
59
|
+
|
60
|
+
def test_pre_process_runs_before_source_is_instantiated
|
61
|
+
calls = []
|
62
|
+
|
63
|
+
mock_source_class = MiniTest::Mock.new
|
64
|
+
mock_source_class.expect(:new, TestEnumerableSource.new([1, 2, 3])) do
|
65
|
+
calls << :source_instantiated
|
66
|
+
end
|
67
|
+
|
68
|
+
control = Kiba::Control.new
|
69
|
+
control.pre_processes << { block: lambda { calls << :pre_processor_executed } }
|
70
|
+
control.sources << { klass: mock_source_class }
|
71
|
+
Kiba.run(control)
|
72
|
+
|
73
|
+
assert_equal [:pre_processor_executed, :source_instantiated], calls
|
74
|
+
end
|
59
75
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kiba
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thibaut Barrère
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-06-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -82,6 +82,7 @@ files:
|
|
82
82
|
- test/support/test_csv_source.rb
|
83
83
|
- test/support/test_enumerable_source.rb
|
84
84
|
- test/support/test_rename_field_transform.rb
|
85
|
+
- test/support/test_source_that_reads_at_instantiation_time.rb
|
85
86
|
- test/test_cli.rb
|
86
87
|
- test/test_integration.rb
|
87
88
|
- test/test_parser.rb
|
@@ -119,6 +120,7 @@ test_files:
|
|
119
120
|
- test/support/test_csv_source.rb
|
120
121
|
- test/support/test_enumerable_source.rb
|
121
122
|
- test/support/test_rename_field_transform.rb
|
123
|
+
- test/support/test_source_that_reads_at_instantiation_time.rb
|
122
124
|
- test/test_cli.rb
|
123
125
|
- test/test_integration.rb
|
124
126
|
- test/test_parser.rb
|