kiba 1.0.0 → 3.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/FUNDING.yml +1 -0
- data/.travis.yml +11 -9
- data/COMM-LICENSE.md +348 -0
- data/Changes.md +28 -0
- data/ISSUE_TEMPLATE.md +7 -0
- data/LICENSE +7 -0
- data/Pro-Changes.md +108 -0
- data/README.md +15 -282
- data/Rakefile +6 -1
- data/appveyor.yml +19 -9
- data/bin/kiba +12 -2
- data/kiba.gemspec +6 -1
- data/lib/kiba.rb +10 -1
- data/lib/kiba/context.rb +4 -0
- data/lib/kiba/control.rb +4 -0
- data/lib/kiba/dsl_extensions/config.rb +9 -0
- data/lib/kiba/parser.rb +4 -9
- data/lib/kiba/runner.rb +14 -5
- data/lib/kiba/streaming_runner.rb +38 -0
- data/lib/kiba/version.rb +1 -1
- data/test/helper.rb +11 -2
- data/test/shared_runner_tests.rb +228 -0
- data/test/support/shared_tests.rb +10 -0
- data/test/support/test_aggregate_transform.rb +19 -0
- data/test/support/test_array_destination.rb +9 -0
- data/test/support/test_close_yielding_transform.rb +11 -0
- data/test/support/test_destination_returning_nil.rb +12 -0
- data/test/support/test_duplicate_row_transform.rb +9 -0
- data/test/support/test_keyword_arguments_component.rb +14 -0
- data/test/support/test_mixed_arguments_component.rb +14 -0
- data/test/support/test_non_closing_transform.rb +5 -0
- data/test/support/test_yielding_transform.rb +8 -0
- data/test/test_integration.rb +3 -3
- data/test/test_parser.rb +34 -29
- data/test/test_run.rb +12 -0
- data/test/test_runner.rb +5 -81
- data/test/test_streaming_runner.rb +70 -0
- metadata +57 -16
- data/lib/kiba/cli.rb +0 -16
- data/test/fixtures/bogus.etl +0 -2
- data/test/fixtures/valid.etl +0 -1
- data/test/test_cli.rb +0 -17
data/lib/kiba/parser.rb
CHANGED
@@ -1,14 +1,9 @@
|
|
1
1
|
module Kiba
|
2
2
|
module Parser
|
3
|
-
def parse(
|
4
|
-
control = Control.new
|
5
|
-
context = Context.new(control)
|
6
|
-
|
7
|
-
# this somewhat weird construct allows to remove a nil source_file
|
8
|
-
context.instance_eval(*[source_as_string, source_file].compact)
|
9
|
-
else
|
10
|
-
context.instance_eval(&source_as_block)
|
11
|
-
end
|
3
|
+
def parse(&source_as_block)
|
4
|
+
control = Kiba::Control.new
|
5
|
+
context = Kiba::Context.new(control)
|
6
|
+
context.instance_eval(&source_as_block)
|
12
7
|
control
|
13
8
|
end
|
14
9
|
end
|
data/lib/kiba/runner.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
module Kiba
|
2
2
|
module Runner
|
3
|
+
extend self
|
4
|
+
|
3
5
|
# allow to handle a block form just like a regular transform
|
4
6
|
class AliasingProc < Proc
|
5
7
|
alias_method :process, :call
|
@@ -13,8 +15,9 @@ module Kiba
|
|
13
15
|
process_rows(
|
14
16
|
to_instances(control.sources),
|
15
17
|
to_instances(control.transforms, true),
|
16
|
-
to_instances(control.destinations)
|
18
|
+
destinations = to_instances(control.destinations)
|
17
19
|
)
|
20
|
+
close_destinations(destinations)
|
18
21
|
# TODO: when I add post processes as class, I'll have to add a test to
|
19
22
|
# make sure instantiation occurs after the main processing is done (#16)
|
20
23
|
run_post_processes(control)
|
@@ -28,6 +31,12 @@ module Kiba
|
|
28
31
|
to_instances(control.post_processes, true, false).each(&:call)
|
29
32
|
end
|
30
33
|
|
34
|
+
def close_destinations(destinations)
|
35
|
+
destinations
|
36
|
+
.find_all { |d| d.respond_to?(:close) }
|
37
|
+
.each(&:close)
|
38
|
+
end
|
39
|
+
|
31
40
|
def process_rows(sources, transforms, destinations)
|
32
41
|
sources.each do |source|
|
33
42
|
source.each do |row|
|
@@ -41,7 +50,6 @@ module Kiba
|
|
41
50
|
end
|
42
51
|
end
|
43
52
|
end
|
44
|
-
destinations.find_all { |d| d.respond_to?(:close) }.each(&:close)
|
45
53
|
end
|
46
54
|
|
47
55
|
# not using keyword args because JRuby defaults to 1.9 syntax currently
|
@@ -55,15 +63,16 @@ module Kiba
|
|
55
63
|
end
|
56
64
|
|
57
65
|
def to_instance(klass, args, block, allow_block, allow_class)
|
58
|
-
if klass
|
66
|
+
if klass && block
|
67
|
+
fail 'Class and block form cannot be used together at the moment'
|
68
|
+
elsif klass
|
59
69
|
fail 'Class form is not allowed here' unless allow_class
|
60
70
|
klass.new(*args)
|
61
71
|
elsif block
|
62
72
|
fail 'Block form is not allowed here' unless allow_block
|
63
73
|
AliasingProc.new(&block)
|
64
74
|
else
|
65
|
-
|
66
|
-
fail 'Class and block form cannot be used together at the moment'
|
75
|
+
fail 'Nil parameters not allowed here'
|
67
76
|
end
|
68
77
|
end
|
69
78
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Kiba
|
2
|
+
module StreamingRunner
|
3
|
+
include Runner
|
4
|
+
extend self
|
5
|
+
|
6
|
+
def transform_stream(stream, t)
|
7
|
+
Enumerator.new do |y|
|
8
|
+
stream.each do |input_row|
|
9
|
+
returned_row = t.process(input_row) do |yielded_row|
|
10
|
+
y << yielded_row
|
11
|
+
end
|
12
|
+
y << returned_row if returned_row
|
13
|
+
end
|
14
|
+
if t.respond_to?(:close)
|
15
|
+
t.close do |close_row|
|
16
|
+
y << close_row
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def source_stream(sources)
|
23
|
+
Enumerator.new do |y|
|
24
|
+
sources.each do |source|
|
25
|
+
source.each { |r| y << r }
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def process_rows(sources, transforms, destinations)
|
31
|
+
stream = source_stream(sources)
|
32
|
+
recurser = lambda { |s,t| transform_stream(s, t) }
|
33
|
+
transforms.inject(stream, &recurser).each do |r|
|
34
|
+
destinations.each { |d| d.write(r) }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/kiba/version.rb
CHANGED
data/test/helper.rb
CHANGED
@@ -1,10 +1,13 @@
|
|
1
1
|
require 'minitest/autorun'
|
2
2
|
require 'minitest/pride'
|
3
|
+
require 'minitest/focus'
|
3
4
|
require 'kiba'
|
4
5
|
|
5
|
-
|
6
|
-
|
6
|
+
if ENV['CI'] == 'true'
|
7
|
+
puts "Running with MiniTest version #{MiniTest::VERSION}"
|
8
|
+
end
|
7
9
|
|
10
|
+
class Kiba::Test < Minitest::Test
|
8
11
|
def remove_files(*files)
|
9
12
|
files.each do |file|
|
10
13
|
File.delete(file) if File.exist?(file)
|
@@ -14,4 +17,10 @@ class Kiba::Test < Minitest::Test
|
|
14
17
|
def fixture(file)
|
15
18
|
File.join(File.dirname(__FILE__), 'fixtures', file)
|
16
19
|
end
|
20
|
+
|
21
|
+
unless self.method_defined?(:assert_mock)
|
22
|
+
def assert_mock(mock)
|
23
|
+
mock.verify
|
24
|
+
end
|
25
|
+
end
|
17
26
|
end
|
@@ -0,0 +1,228 @@
|
|
1
|
+
require 'minitest/mock'
|
2
|
+
require_relative 'support/test_enumerable_source'
|
3
|
+
require_relative 'support/test_destination_returning_nil'
|
4
|
+
|
5
|
+
module SharedRunnerTests
|
6
|
+
def rows
|
7
|
+
@rows ||= [
|
8
|
+
{ identifier: 'first-row' },
|
9
|
+
{ identifier: 'second-row' }
|
10
|
+
]
|
11
|
+
end
|
12
|
+
|
13
|
+
def control
|
14
|
+
@control ||= begin
|
15
|
+
control = Kiba::Control.new
|
16
|
+
# this will yield a single row for testing
|
17
|
+
control.sources << {
|
18
|
+
klass: TestEnumerableSource,
|
19
|
+
args: [rows]
|
20
|
+
}
|
21
|
+
control
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_block_transform_processing
|
26
|
+
# is there a better way to assert a block was called in minitest?
|
27
|
+
control.transforms << { block: lambda { |r| @called = true; r } }
|
28
|
+
kiba_run(control)
|
29
|
+
assert_equal true, @called
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_dismissed_row_not_passed_to_next_transform
|
33
|
+
@called = nil
|
34
|
+
control.transforms << { block: lambda { |_| nil } }
|
35
|
+
control.transforms << { block: lambda { |_| @called = true; nil } }
|
36
|
+
kiba_run(control)
|
37
|
+
assert_nil @called
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_post_process_runs_once
|
41
|
+
assert_equal 2, rows.size
|
42
|
+
@called = 0
|
43
|
+
control.post_processes << { block: lambda { @called += 1 } }
|
44
|
+
kiba_run(control)
|
45
|
+
assert_equal 1, @called
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_post_process_not_called_after_row_failure
|
49
|
+
@called = nil
|
50
|
+
control.transforms << { block: lambda { |_| fail 'FAIL' } }
|
51
|
+
control.post_processes << { block: lambda { @called = true } }
|
52
|
+
assert_raises(RuntimeError, 'FAIL') { kiba_run(control) }
|
53
|
+
assert_nil @called
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_pre_process_runs_once
|
57
|
+
assert_equal 2, rows.size
|
58
|
+
@called = 0
|
59
|
+
control.pre_processes << { block: lambda { @called += 1 } }
|
60
|
+
kiba_run(control)
|
61
|
+
assert_equal 1, @called
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_pre_process_runs_before_source_is_instantiated
|
65
|
+
calls = []
|
66
|
+
|
67
|
+
mock_source_class = MiniTest::Mock.new
|
68
|
+
mock_source_class.expect(:new, TestEnumerableSource.new([1, 2, 3])) do
|
69
|
+
calls << :source_instantiated
|
70
|
+
end
|
71
|
+
|
72
|
+
control = Kiba::Control.new
|
73
|
+
control.pre_processes << { block: lambda { calls << :pre_processor_executed } }
|
74
|
+
control.sources << { klass: mock_source_class }
|
75
|
+
kiba_run(control)
|
76
|
+
|
77
|
+
assert_equal [:pre_processor_executed, :source_instantiated], calls
|
78
|
+
assert_mock mock_source_class
|
79
|
+
end
|
80
|
+
|
81
|
+
def test_no_error_raised_if_destination_close_not_implemented
|
82
|
+
# NOTE: this fake destination does not implement `close`
|
83
|
+
destination_instance = MiniTest::Mock.new
|
84
|
+
|
85
|
+
mock_destination_class = MiniTest::Mock.new
|
86
|
+
mock_destination_class.expect(:new, destination_instance)
|
87
|
+
|
88
|
+
control = Kiba::Control.new
|
89
|
+
control.destinations << { klass: mock_destination_class }
|
90
|
+
kiba_run(control)
|
91
|
+
assert_mock mock_destination_class
|
92
|
+
end
|
93
|
+
|
94
|
+
def test_destination_close_called_if_defined
|
95
|
+
destination_instance = MiniTest::Mock.new
|
96
|
+
destination_instance.expect(:close, nil)
|
97
|
+
mock_destination_class = MiniTest::Mock.new
|
98
|
+
mock_destination_class.expect(:new, destination_instance)
|
99
|
+
|
100
|
+
control = Kiba::Control.new
|
101
|
+
control.destinations << { klass: mock_destination_class }
|
102
|
+
kiba_run(control)
|
103
|
+
assert_mock destination_instance
|
104
|
+
assert_mock mock_destination_class
|
105
|
+
end
|
106
|
+
|
107
|
+
def test_use_next_to_exit_early_from_block_transform
|
108
|
+
assert_equal 2, rows.size
|
109
|
+
|
110
|
+
# calling "return row" from a block is forbidden, but you can use "next" instead
|
111
|
+
b = lambda do |row|
|
112
|
+
if row.fetch(:identifier) == 'first-row'
|
113
|
+
# demonstrate how to remove a row from the pipeline via next
|
114
|
+
next
|
115
|
+
else
|
116
|
+
# demonstrate how you can reformat via next
|
117
|
+
next({new_identifier: row.fetch(:identifier)})
|
118
|
+
end
|
119
|
+
fail "This should not be called"
|
120
|
+
end
|
121
|
+
control.transforms << { block: b }
|
122
|
+
|
123
|
+
# keep track of the rows
|
124
|
+
@remaining_rows = []
|
125
|
+
checker = lambda { |row| @remaining_rows << row; row }
|
126
|
+
control.transforms << { block: checker }
|
127
|
+
|
128
|
+
kiba_run(control)
|
129
|
+
|
130
|
+
# the first row should have been removed
|
131
|
+
# and the second row should have been reformatted
|
132
|
+
assert_equal [{new_identifier: 'second-row'}], @remaining_rows
|
133
|
+
end
|
134
|
+
|
135
|
+
def test_destination_returning_nil_does_not_remove_row_from_pipeline
|
136
|
+
# safeguard to avoid modification on the support code
|
137
|
+
assert_nil TestDestinationReturningNil.new.write("FOOBAR")
|
138
|
+
|
139
|
+
destinations = []
|
140
|
+
control = Kiba.parse do
|
141
|
+
source TestEnumerableSource, [{key: 'value'}]
|
142
|
+
2.times do
|
143
|
+
destination TestDestinationReturningNil, on_init: lambda { |d| destinations << d }
|
144
|
+
end
|
145
|
+
end
|
146
|
+
kiba_run(control)
|
147
|
+
2.times do |i|
|
148
|
+
assert_equal [{key: 'value'}], destinations[i].instance_variable_get(:@written_rows)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def test_nil_transform_error_message
|
153
|
+
control = Kiba.parse do
|
154
|
+
transform
|
155
|
+
end
|
156
|
+
assert_raises(RuntimeError, 'Nil parameters not allowed here') { kiba_run(control) }
|
157
|
+
end
|
158
|
+
|
159
|
+
def test_ruby_3_source_kwargs
|
160
|
+
# NOTE: before Ruby 3 kwargs support, a Ruby warning would
|
161
|
+
# be captured here with Ruby 2.7 & ensure we fail,
|
162
|
+
# and an error would be raised with Ruby 2.8.0-dev
|
163
|
+
# NOTE: only the first warning will be captured, though, but
|
164
|
+
# having 3 different tests is still better
|
165
|
+
storage = nil
|
166
|
+
assert_silent do
|
167
|
+
Kiba.run(Kiba.parse do
|
168
|
+
source TestKeywordArgumentsComponent,
|
169
|
+
mandatory: "first",
|
170
|
+
on_init: -> (values) { storage = values }
|
171
|
+
end)
|
172
|
+
end
|
173
|
+
assert_equal({
|
174
|
+
mandatory: "first",
|
175
|
+
optional: nil
|
176
|
+
}, storage)
|
177
|
+
end
|
178
|
+
|
179
|
+
def test_ruby_3_transform_kwargs
|
180
|
+
storage = nil
|
181
|
+
assert_silent do
|
182
|
+
Kiba.run(Kiba.parse do
|
183
|
+
transform TestKeywordArgumentsComponent,
|
184
|
+
mandatory: "first",
|
185
|
+
on_init: -> (values) { storage = values }
|
186
|
+
end)
|
187
|
+
end
|
188
|
+
assert_equal({
|
189
|
+
mandatory: "first",
|
190
|
+
optional: nil
|
191
|
+
}, storage)
|
192
|
+
end
|
193
|
+
|
194
|
+
def test_ruby_3_destination_kwargs
|
195
|
+
storage = nil
|
196
|
+
assert_silent do
|
197
|
+
Kiba.run(Kiba.parse do
|
198
|
+
destination TestKeywordArgumentsComponent,
|
199
|
+
mandatory: "first",
|
200
|
+
on_init: -> (values) { storage = values }
|
201
|
+
end)
|
202
|
+
end
|
203
|
+
assert_equal({
|
204
|
+
mandatory: "first",
|
205
|
+
optional: nil
|
206
|
+
}, storage)
|
207
|
+
end
|
208
|
+
|
209
|
+
def test_positional_plus_keyword_arguments
|
210
|
+
storage = nil
|
211
|
+
assert_silent do
|
212
|
+
Kiba.run(Kiba.parse do
|
213
|
+
source TestMixedArgumentsComponent,
|
214
|
+
"some positional argument",
|
215
|
+
mandatory: "first",
|
216
|
+
on_init: -> (values) {
|
217
|
+
storage = values
|
218
|
+
}
|
219
|
+
end)
|
220
|
+
end
|
221
|
+
|
222
|
+
assert_equal({
|
223
|
+
some_value: "some positional argument",
|
224
|
+
mandatory: "first",
|
225
|
+
optional: nil
|
226
|
+
}, storage)
|
227
|
+
end
|
228
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class AggregateTransform
|
2
|
+
def initialize(options)
|
3
|
+
@aggregate_size = options.fetch(:aggregate_size)
|
4
|
+
end
|
5
|
+
|
6
|
+
def process(row)
|
7
|
+
@buffer ||= []
|
8
|
+
@buffer << row
|
9
|
+
if @buffer.size == @aggregate_size
|
10
|
+
yield @buffer
|
11
|
+
@buffer = []
|
12
|
+
end
|
13
|
+
nil
|
14
|
+
end
|
15
|
+
|
16
|
+
def close
|
17
|
+
yield @buffer unless @buffer.empty?
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
class TestDestinationReturningNil
|
2
|
+
def initialize(options = {})
|
3
|
+
on_init = options[:on_init]
|
4
|
+
# A little trick to allow outer references to this instance
|
5
|
+
on_init.call(self) if on_init
|
6
|
+
end
|
7
|
+
|
8
|
+
def write(row)
|
9
|
+
(@written_rows ||= []) << row
|
10
|
+
nil
|
11
|
+
end
|
12
|
+
end
|