kiba 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/Changes.md +7 -1
- data/README.md +3 -1
- data/lib/kiba/context.rb +3 -8
- data/lib/kiba/runner.rb +45 -27
- data/lib/kiba/version.rb +1 -1
- data/test/support/test_source_that_reads_at_instantiation_time.rb +8 -0
- data/test/test_cli.rb +1 -1
- data/test/test_integration.rb +23 -3
- data/test/test_parser.rb +3 -3
- data/test/test_runner.rb +24 -8
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b46106e516df027f47b160c8a015ef86379c3994
|
4
|
+
data.tar.gz: da5ecf0dd661b1dc4960bf4bfc149df9be4c3a89
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 04d98ac5c1b9d2bed003930cbf1f70230b9639758b09562f0e4cdb69a5fb566bb2fcae7f392617f3dae44d929d405eb915db0982ba7a5388c505ce1295c91f40
|
7
|
+
data.tar.gz: a561008267ef7b2b436095187592aaa811aaec788453d7f6edfb129950a9e9e058862ea22b812dd7caecffc10a257cbf4c069f6f7e524ef74541257ff637bb9b
|
data/.travis.yml
CHANGED
data/Changes.md
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
Unreleased
|
2
2
|
----------
|
3
3
|
|
4
|
+
0.6.1
|
5
|
+
-----
|
6
|
+
|
7
|
+
- Bugfix: make sure files generated by `pre_process` can safely be read from constructor of sources [#16]. Thanks @bcherrington for catching this.
|
8
|
+
- Internal refactoring of processing engine (should not affect regular use)
|
9
|
+
|
4
10
|
0.6.0
|
5
11
|
-----
|
6
12
|
|
@@ -9,4 +15,4 @@ Unreleased
|
|
9
15
|
0.5.0
|
10
16
|
-----
|
11
17
|
|
12
|
-
- Initial release
|
18
|
+
- Initial release
|
data/README.md
CHANGED
@@ -6,6 +6,8 @@ Learn more on the [Kiba blog](http://thibautbarrere.com):
|
|
6
6
|
|
7
7
|
* [Rubyists - are you doing ETL unknowningly?](http://thibautbarrere.com/2015/03/25/rubyists-are-you-doing-etl-unknowingly/)
|
8
8
|
* [How to write solid data processing code](http://thibautbarrere.com/2015/04/05/how-to-write-solid-data-processing-code/)
|
9
|
+
* [How to reformat CSV files with Kiba](http://thibautbarrere.com/2015/06/04/how-to-reformat-csv-files-with-kiba/) (in-depth, hands-on tutorial)
|
10
|
+
* [How to explode multivalued attributes with Kiba ETL?](http://thibautbarrere.com/2015/06/25/how-to-explode-multivalued-attributes-with-kiba/)
|
9
11
|
|
10
12
|
[](http://badge.fury.io/rb/kiba)
|
11
13
|
[](https://travis-ci.org/thbar/kiba) [](https://codeclimate.com/github/thbar/kiba) [](https://gemnasium.com/thbar/kiba)
|
@@ -254,7 +256,7 @@ Make sure to subscribe to my [Ruby ETL blog](http://thibautbarrere.com) where I'
|
|
254
256
|
|
255
257
|
## Supported Ruby versions
|
256
258
|
|
257
|
-
Kiba currently supports Ruby 2.0
|
259
|
+
Kiba currently supports Ruby 2.0+, JRuby (with its default 1.9 syntax) and Rubinius (see [test matrix](https://travis-ci.org/thbar/kiba)).
|
258
260
|
|
259
261
|
## History & Credits
|
260
262
|
|
data/lib/kiba/context.rb
CHANGED
@@ -1,12 +1,11 @@
|
|
1
1
|
module Kiba
|
2
2
|
class Context
|
3
3
|
def initialize(control)
|
4
|
-
# TODO: forbid access to control from context? use cleanroom?
|
5
4
|
@control = control
|
6
5
|
end
|
7
6
|
|
8
7
|
def pre_process(&block)
|
9
|
-
@control.pre_processes << block
|
8
|
+
@control.pre_processes << { block: block }
|
10
9
|
end
|
11
10
|
|
12
11
|
def source(klass, *initialization_params)
|
@@ -14,11 +13,7 @@ module Kiba
|
|
14
13
|
end
|
15
14
|
|
16
15
|
def transform(klass = nil, *initialization_params, &block)
|
17
|
-
|
18
|
-
@control.transforms << { klass: klass, args: initialization_params }
|
19
|
-
else
|
20
|
-
@control.transforms << block
|
21
|
-
end
|
16
|
+
@control.transforms << { klass: klass, args: initialization_params, block: block }
|
22
17
|
end
|
23
18
|
|
24
19
|
def destination(klass, *initialization_params)
|
@@ -26,7 +21,7 @@ module Kiba
|
|
26
21
|
end
|
27
22
|
|
28
23
|
def post_process(&block)
|
29
|
-
@control.post_processes << block
|
24
|
+
@control.post_processes << { block: block }
|
30
25
|
end
|
31
26
|
end
|
32
27
|
end
|
data/lib/kiba/runner.rb
CHANGED
@@ -1,30 +1,38 @@
|
|
1
1
|
module Kiba
|
2
2
|
module Runner
|
3
|
+
# allow to handle a block form just like a regular transform
|
4
|
+
class AliasingProc < Proc
|
5
|
+
alias_method :process, :call
|
6
|
+
end
|
7
|
+
|
3
8
|
def run(control)
|
4
|
-
#
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
9
|
+
# TODO: add a dry-run (not instantiating mode) to_instances call
|
10
|
+
# that will validate the job definition from a syntax pov before
|
11
|
+
# going any further. This could be shared with the parser.
|
12
|
+
run_pre_processes(control)
|
13
|
+
process_rows(
|
14
|
+
to_instances(control.sources),
|
15
|
+
to_instances(control.transforms, true),
|
16
|
+
to_instances(control.destinations)
|
17
|
+
)
|
18
|
+
# TODO: when I add post processes as class, I'll have to add a test to
|
19
|
+
# make sure instantiation occurs after the main processing is done (#16)
|
20
|
+
run_post_processes(control)
|
21
|
+
end
|
10
22
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
23
|
+
def run_pre_processes(control)
|
24
|
+
to_instances(control.pre_processes, true, false).each(&:call)
|
25
|
+
end
|
26
|
+
|
27
|
+
def run_post_processes(control)
|
28
|
+
to_instances(control.post_processes, true, false).each(&:call)
|
15
29
|
end
|
16
|
-
|
30
|
+
|
17
31
|
def process_rows(sources, transforms, destinations)
|
18
32
|
sources.each do |source|
|
19
33
|
source.each do |row|
|
20
34
|
transforms.each do |transform|
|
21
|
-
|
22
|
-
# and aliasing `process` to `call`. Benchmark needed first though.
|
23
|
-
if transform.is_a?(Proc)
|
24
|
-
row = transform.call(row)
|
25
|
-
else
|
26
|
-
row = transform.process(row)
|
27
|
-
end
|
35
|
+
row = transform.process(row)
|
28
36
|
break unless row
|
29
37
|
end
|
30
38
|
next unless row
|
@@ -33,19 +41,29 @@ module Kiba
|
|
33
41
|
end
|
34
42
|
end
|
35
43
|
end
|
44
|
+
destinations.each(&:close)
|
36
45
|
end
|
37
46
|
|
38
47
|
# not using keyword args because JRuby defaults to 1.9 syntax currently
|
39
48
|
def to_instances(definitions, allow_block = false, allow_class = true)
|
40
|
-
definitions.map do |
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
+
definitions.map do |definition|
|
50
|
+
to_instance(
|
51
|
+
*definition.values_at(:klass, :args, :block),
|
52
|
+
allow_block, allow_class
|
53
|
+
)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def to_instance(klass, args, block, allow_block, allow_class)
|
58
|
+
if klass
|
59
|
+
fail 'Class form is not allowed here' unless allow_class
|
60
|
+
klass.new(*args)
|
61
|
+
elsif block
|
62
|
+
fail 'Block form is not allowed here' unless allow_block
|
63
|
+
AliasingProc.new(&block)
|
64
|
+
else
|
65
|
+
# TODO: support block passing to a class form definition?
|
66
|
+
fail 'Class and block form cannot be used together at the moment'
|
49
67
|
end
|
50
68
|
end
|
51
69
|
end
|
data/lib/kiba/version.rb
CHANGED
data/test/test_cli.rb
CHANGED
@@ -11,7 +11,7 @@ class TestCli < Kiba::Test
|
|
11
11
|
Kiba::Cli.run([fixture('bogus.etl')])
|
12
12
|
end
|
13
13
|
|
14
|
-
assert_match(/uninitialized constant
|
14
|
+
assert_match(/uninitialized constant(.*)UnknownThing/, exception.message)
|
15
15
|
assert_includes exception.backtrace.to_s, 'test/fixtures/bogus.etl:2:in'
|
16
16
|
end
|
17
17
|
end
|
data/test/test_integration.rb
CHANGED
@@ -3,6 +3,8 @@ require_relative 'helper'
|
|
3
3
|
require_relative 'support/test_csv_source'
|
4
4
|
require_relative 'support/test_csv_destination'
|
5
5
|
require_relative 'support/test_rename_field_transform'
|
6
|
+
require_relative 'support/test_enumerable_source'
|
7
|
+
require_relative 'support/test_source_that_reads_at_instantiation_time'
|
6
8
|
|
7
9
|
# End-to-end tests go here
|
8
10
|
class TestIntegration < Kiba::Test
|
@@ -19,13 +21,17 @@ Patrick,McWire,M
|
|
19
21
|
CSV
|
20
22
|
end
|
21
23
|
|
24
|
+
def clean
|
25
|
+
remove_files(*Dir['test/tmp/*.csv'])
|
26
|
+
end
|
27
|
+
|
22
28
|
def setup
|
23
|
-
|
29
|
+
clean
|
24
30
|
IO.write(input_file, sample_csv_data)
|
25
31
|
end
|
26
32
|
|
27
33
|
def teardown
|
28
|
-
|
34
|
+
clean
|
29
35
|
end
|
30
36
|
|
31
37
|
def test_csv_to_csv
|
@@ -71,7 +77,7 @@ CSV
|
|
71
77
|
|
72
78
|
# assign a first value at parsing time
|
73
79
|
count = 0
|
74
|
-
|
80
|
+
|
75
81
|
pre_process do
|
76
82
|
# then change it from there (run time)
|
77
83
|
count += 100
|
@@ -93,4 +99,18 @@ CSV
|
|
93
99
|
|
94
100
|
assert_equal 'Count is now 103', message
|
95
101
|
end
|
102
|
+
|
103
|
+
def test_file_created_by_pre_process_can_be_read_by_source_at_instantiation_time
|
104
|
+
remove_files('test/tmp/eager.csv')
|
105
|
+
|
106
|
+
control = Kiba.parse do
|
107
|
+
pre_process do
|
108
|
+
IO.write('test/tmp/eager.csv', 'something')
|
109
|
+
end
|
110
|
+
|
111
|
+
source SourceThatReadsAtInstantionTime, 'test/tmp/eager.csv'
|
112
|
+
end
|
113
|
+
|
114
|
+
Kiba.run(control)
|
115
|
+
end
|
96
116
|
end
|
data/test/test_parser.rb
CHANGED
@@ -20,7 +20,7 @@ class TestParser < Kiba::Test
|
|
20
20
|
transform { |row| row }
|
21
21
|
end
|
22
22
|
|
23
|
-
assert_instance_of Proc, control.transforms[0]
|
23
|
+
assert_instance_of Proc, control.transforms[0][:block]
|
24
24
|
end
|
25
25
|
|
26
26
|
def test_class_transform_definition
|
@@ -46,7 +46,7 @@ class TestParser < Kiba::Test
|
|
46
46
|
post_process {}
|
47
47
|
end
|
48
48
|
|
49
|
-
assert_instance_of Proc, control.post_processes[0]
|
49
|
+
assert_instance_of Proc, control.post_processes[0][:block]
|
50
50
|
end
|
51
51
|
|
52
52
|
def test_block_pre_process_definition
|
@@ -54,7 +54,7 @@ class TestParser < Kiba::Test
|
|
54
54
|
pre_process {}
|
55
55
|
end
|
56
56
|
|
57
|
-
assert_instance_of Proc, control.pre_processes[0]
|
57
|
+
assert_instance_of Proc, control.pre_processes[0][:block]
|
58
58
|
end
|
59
59
|
|
60
60
|
def test_source_as_string_parsing
|
data/test/test_runner.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
require_relative 'helper'
|
2
|
-
|
2
|
+
require 'minitest/mock'
|
3
3
|
require_relative 'support/test_enumerable_source'
|
4
4
|
|
5
5
|
class TestRunner < Kiba::Test
|
@@ -22,14 +22,14 @@ class TestRunner < Kiba::Test
|
|
22
22
|
|
23
23
|
def test_block_transform_processing
|
24
24
|
# is there a better way to assert a block was called in minitest?
|
25
|
-
control.transforms << lambda { |r| @called = true; r }
|
25
|
+
control.transforms << { block: lambda { |r| @called = true; r } }
|
26
26
|
Kiba.run(control)
|
27
27
|
assert_equal true, @called
|
28
28
|
end
|
29
29
|
|
30
30
|
def test_dismissed_row_not_passed_to_next_transform
|
31
|
-
control.transforms << lambda { |_| nil }
|
32
|
-
control.transforms << lambda { |_| @called = true; nil }
|
31
|
+
control.transforms << { block: lambda { |_| nil } }
|
32
|
+
control.transforms << { block: lambda { |_| @called = true; nil } }
|
33
33
|
Kiba.run(control)
|
34
34
|
assert_nil @called
|
35
35
|
end
|
@@ -37,14 +37,14 @@ class TestRunner < Kiba::Test
|
|
37
37
|
def test_post_process_runs_once
|
38
38
|
assert_equal 2, rows.size
|
39
39
|
@called = 0
|
40
|
-
control.post_processes << lambda { @called += 1 }
|
40
|
+
control.post_processes << { block: lambda { @called += 1 } }
|
41
41
|
Kiba.run(control)
|
42
42
|
assert_equal 1, @called
|
43
43
|
end
|
44
44
|
|
45
45
|
def test_post_process_not_called_after_row_failure
|
46
|
-
control.transforms << lambda { |_| fail 'FAIL' }
|
47
|
-
control.post_processes << lambda { @called = true }
|
46
|
+
control.transforms << { block: lambda { |_| fail 'FAIL' } }
|
47
|
+
control.post_processes << { block: lambda { @called = true } }
|
48
48
|
assert_raises(RuntimeError, 'FAIL') { Kiba.run(control) }
|
49
49
|
assert_nil @called
|
50
50
|
end
|
@@ -52,8 +52,24 @@ class TestRunner < Kiba::Test
|
|
52
52
|
def test_pre_process_runs_once
|
53
53
|
assert_equal 2, rows.size
|
54
54
|
@called = 0
|
55
|
-
control.pre_processes << lambda { @called += 1 }
|
55
|
+
control.pre_processes << { block: lambda { @called += 1 } }
|
56
56
|
Kiba.run(control)
|
57
57
|
assert_equal 1, @called
|
58
58
|
end
|
59
|
+
|
60
|
+
def test_pre_process_runs_before_source_is_instantiated
|
61
|
+
calls = []
|
62
|
+
|
63
|
+
mock_source_class = MiniTest::Mock.new
|
64
|
+
mock_source_class.expect(:new, TestEnumerableSource.new([1, 2, 3])) do
|
65
|
+
calls << :source_instantiated
|
66
|
+
end
|
67
|
+
|
68
|
+
control = Kiba::Control.new
|
69
|
+
control.pre_processes << { block: lambda { calls << :pre_processor_executed } }
|
70
|
+
control.sources << { klass: mock_source_class }
|
71
|
+
Kiba.run(control)
|
72
|
+
|
73
|
+
assert_equal [:pre_processor_executed, :source_instantiated], calls
|
74
|
+
end
|
59
75
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kiba
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thibaut Barrère
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-06-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -82,6 +82,7 @@ files:
|
|
82
82
|
- test/support/test_csv_source.rb
|
83
83
|
- test/support/test_enumerable_source.rb
|
84
84
|
- test/support/test_rename_field_transform.rb
|
85
|
+
- test/support/test_source_that_reads_at_instantiation_time.rb
|
85
86
|
- test/test_cli.rb
|
86
87
|
- test/test_integration.rb
|
87
88
|
- test/test_parser.rb
|
@@ -119,6 +120,7 @@ test_files:
|
|
119
120
|
- test/support/test_csv_source.rb
|
120
121
|
- test/support/test_enumerable_source.rb
|
121
122
|
- test/support/test_rename_field_transform.rb
|
123
|
+
- test/support/test_source_that_reads_at_instantiation_time.rb
|
122
124
|
- test/test_cli.rb
|
123
125
|
- test/test_integration.rb
|
124
126
|
- test/test_parser.rb
|