kiba 1.0.0 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/FUNDING.yml +1 -0
- data/.travis.yml +11 -9
- data/COMM-LICENSE.md +348 -0
- data/Changes.md +28 -0
- data/ISSUE_TEMPLATE.md +7 -0
- data/LICENSE +7 -0
- data/Pro-Changes.md +108 -0
- data/README.md +15 -282
- data/Rakefile +6 -1
- data/appveyor.yml +19 -9
- data/bin/kiba +12 -2
- data/kiba.gemspec +6 -1
- data/lib/kiba.rb +10 -1
- data/lib/kiba/context.rb +4 -0
- data/lib/kiba/control.rb +4 -0
- data/lib/kiba/dsl_extensions/config.rb +9 -0
- data/lib/kiba/parser.rb +4 -9
- data/lib/kiba/runner.rb +14 -5
- data/lib/kiba/streaming_runner.rb +38 -0
- data/lib/kiba/version.rb +1 -1
- data/test/helper.rb +11 -2
- data/test/shared_runner_tests.rb +228 -0
- data/test/support/shared_tests.rb +10 -0
- data/test/support/test_aggregate_transform.rb +19 -0
- data/test/support/test_array_destination.rb +9 -0
- data/test/support/test_close_yielding_transform.rb +11 -0
- data/test/support/test_destination_returning_nil.rb +12 -0
- data/test/support/test_duplicate_row_transform.rb +9 -0
- data/test/support/test_keyword_arguments_component.rb +14 -0
- data/test/support/test_mixed_arguments_component.rb +14 -0
- data/test/support/test_non_closing_transform.rb +5 -0
- data/test/support/test_yielding_transform.rb +8 -0
- data/test/test_integration.rb +3 -3
- data/test/test_parser.rb +34 -29
- data/test/test_run.rb +12 -0
- data/test/test_runner.rb +5 -81
- data/test/test_streaming_runner.rb +70 -0
- metadata +57 -16
- data/lib/kiba/cli.rb +0 -16
- data/test/fixtures/bogus.etl +0 -2
- data/test/fixtures/valid.etl +0 -1
- data/test/test_cli.rb +0 -17
data/lib/kiba/parser.rb
CHANGED
@@ -1,14 +1,9 @@
|
|
1
1
|
module Kiba
|
2
2
|
module Parser
|
3
|
-
def parse(
|
4
|
-
control = Control.new
|
5
|
-
context = Context.new(control)
|
6
|
-
|
7
|
-
# this somewhat weird construct allows to remove a nil source_file
|
8
|
-
context.instance_eval(*[source_as_string, source_file].compact)
|
9
|
-
else
|
10
|
-
context.instance_eval(&source_as_block)
|
11
|
-
end
|
3
|
+
def parse(&source_as_block)
|
4
|
+
control = Kiba::Control.new
|
5
|
+
context = Kiba::Context.new(control)
|
6
|
+
context.instance_eval(&source_as_block)
|
12
7
|
control
|
13
8
|
end
|
14
9
|
end
|
data/lib/kiba/runner.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
module Kiba
|
2
2
|
module Runner
|
3
|
+
extend self
|
4
|
+
|
3
5
|
# allow to handle a block form just like a regular transform
|
4
6
|
class AliasingProc < Proc
|
5
7
|
alias_method :process, :call
|
@@ -13,8 +15,9 @@ module Kiba
|
|
13
15
|
process_rows(
|
14
16
|
to_instances(control.sources),
|
15
17
|
to_instances(control.transforms, true),
|
16
|
-
to_instances(control.destinations)
|
18
|
+
destinations = to_instances(control.destinations)
|
17
19
|
)
|
20
|
+
close_destinations(destinations)
|
18
21
|
# TODO: when I add post processes as class, I'll have to add a test to
|
19
22
|
# make sure instantiation occurs after the main processing is done (#16)
|
20
23
|
run_post_processes(control)
|
@@ -28,6 +31,12 @@ module Kiba
|
|
28
31
|
to_instances(control.post_processes, true, false).each(&:call)
|
29
32
|
end
|
30
33
|
|
34
|
+
def close_destinations(destinations)
|
35
|
+
destinations
|
36
|
+
.find_all { |d| d.respond_to?(:close) }
|
37
|
+
.each(&:close)
|
38
|
+
end
|
39
|
+
|
31
40
|
def process_rows(sources, transforms, destinations)
|
32
41
|
sources.each do |source|
|
33
42
|
source.each do |row|
|
@@ -41,7 +50,6 @@ module Kiba
|
|
41
50
|
end
|
42
51
|
end
|
43
52
|
end
|
44
|
-
destinations.find_all { |d| d.respond_to?(:close) }.each(&:close)
|
45
53
|
end
|
46
54
|
|
47
55
|
# not using keyword args because JRuby defaults to 1.9 syntax currently
|
@@ -55,15 +63,16 @@ module Kiba
|
|
55
63
|
end
|
56
64
|
|
57
65
|
def to_instance(klass, args, block, allow_block, allow_class)
|
58
|
-
if klass
|
66
|
+
if klass && block
|
67
|
+
fail 'Class and block form cannot be used together at the moment'
|
68
|
+
elsif klass
|
59
69
|
fail 'Class form is not allowed here' unless allow_class
|
60
70
|
klass.new(*args)
|
61
71
|
elsif block
|
62
72
|
fail 'Block form is not allowed here' unless allow_block
|
63
73
|
AliasingProc.new(&block)
|
64
74
|
else
|
65
|
-
|
66
|
-
fail 'Class and block form cannot be used together at the moment'
|
75
|
+
fail 'Nil parameters not allowed here'
|
67
76
|
end
|
68
77
|
end
|
69
78
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Kiba
|
2
|
+
module StreamingRunner
|
3
|
+
include Runner
|
4
|
+
extend self
|
5
|
+
|
6
|
+
def transform_stream(stream, t)
|
7
|
+
Enumerator.new do |y|
|
8
|
+
stream.each do |input_row|
|
9
|
+
returned_row = t.process(input_row) do |yielded_row|
|
10
|
+
y << yielded_row
|
11
|
+
end
|
12
|
+
y << returned_row if returned_row
|
13
|
+
end
|
14
|
+
if t.respond_to?(:close)
|
15
|
+
t.close do |close_row|
|
16
|
+
y << close_row
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def source_stream(sources)
|
23
|
+
Enumerator.new do |y|
|
24
|
+
sources.each do |source|
|
25
|
+
source.each { |r| y << r }
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def process_rows(sources, transforms, destinations)
|
31
|
+
stream = source_stream(sources)
|
32
|
+
recurser = lambda { |s,t| transform_stream(s, t) }
|
33
|
+
transforms.inject(stream, &recurser).each do |r|
|
34
|
+
destinations.each { |d| d.write(r) }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/kiba/version.rb
CHANGED
data/test/helper.rb
CHANGED
@@ -1,10 +1,13 @@
|
|
1
1
|
require 'minitest/autorun'
|
2
2
|
require 'minitest/pride'
|
3
|
+
require 'minitest/focus'
|
3
4
|
require 'kiba'
|
4
5
|
|
5
|
-
|
6
|
-
|
6
|
+
if ENV['CI'] == 'true'
|
7
|
+
puts "Running with MiniTest version #{MiniTest::VERSION}"
|
8
|
+
end
|
7
9
|
|
10
|
+
class Kiba::Test < Minitest::Test
|
8
11
|
def remove_files(*files)
|
9
12
|
files.each do |file|
|
10
13
|
File.delete(file) if File.exist?(file)
|
@@ -14,4 +17,10 @@ class Kiba::Test < Minitest::Test
|
|
14
17
|
def fixture(file)
|
15
18
|
File.join(File.dirname(__FILE__), 'fixtures', file)
|
16
19
|
end
|
20
|
+
|
21
|
+
unless self.method_defined?(:assert_mock)
|
22
|
+
def assert_mock(mock)
|
23
|
+
mock.verify
|
24
|
+
end
|
25
|
+
end
|
17
26
|
end
|
@@ -0,0 +1,228 @@
|
|
1
|
+
require 'minitest/mock'
|
2
|
+
require_relative 'support/test_enumerable_source'
|
3
|
+
require_relative 'support/test_destination_returning_nil'
|
4
|
+
|
5
|
+
module SharedRunnerTests
|
6
|
+
def rows
|
7
|
+
@rows ||= [
|
8
|
+
{ identifier: 'first-row' },
|
9
|
+
{ identifier: 'second-row' }
|
10
|
+
]
|
11
|
+
end
|
12
|
+
|
13
|
+
def control
|
14
|
+
@control ||= begin
|
15
|
+
control = Kiba::Control.new
|
16
|
+
# this will yield a single row for testing
|
17
|
+
control.sources << {
|
18
|
+
klass: TestEnumerableSource,
|
19
|
+
args: [rows]
|
20
|
+
}
|
21
|
+
control
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_block_transform_processing
|
26
|
+
# is there a better way to assert a block was called in minitest?
|
27
|
+
control.transforms << { block: lambda { |r| @called = true; r } }
|
28
|
+
kiba_run(control)
|
29
|
+
assert_equal true, @called
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_dismissed_row_not_passed_to_next_transform
|
33
|
+
@called = nil
|
34
|
+
control.transforms << { block: lambda { |_| nil } }
|
35
|
+
control.transforms << { block: lambda { |_| @called = true; nil } }
|
36
|
+
kiba_run(control)
|
37
|
+
assert_nil @called
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_post_process_runs_once
|
41
|
+
assert_equal 2, rows.size
|
42
|
+
@called = 0
|
43
|
+
control.post_processes << { block: lambda { @called += 1 } }
|
44
|
+
kiba_run(control)
|
45
|
+
assert_equal 1, @called
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_post_process_not_called_after_row_failure
|
49
|
+
@called = nil
|
50
|
+
control.transforms << { block: lambda { |_| fail 'FAIL' } }
|
51
|
+
control.post_processes << { block: lambda { @called = true } }
|
52
|
+
assert_raises(RuntimeError, 'FAIL') { kiba_run(control) }
|
53
|
+
assert_nil @called
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_pre_process_runs_once
|
57
|
+
assert_equal 2, rows.size
|
58
|
+
@called = 0
|
59
|
+
control.pre_processes << { block: lambda { @called += 1 } }
|
60
|
+
kiba_run(control)
|
61
|
+
assert_equal 1, @called
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_pre_process_runs_before_source_is_instantiated
|
65
|
+
calls = []
|
66
|
+
|
67
|
+
mock_source_class = MiniTest::Mock.new
|
68
|
+
mock_source_class.expect(:new, TestEnumerableSource.new([1, 2, 3])) do
|
69
|
+
calls << :source_instantiated
|
70
|
+
end
|
71
|
+
|
72
|
+
control = Kiba::Control.new
|
73
|
+
control.pre_processes << { block: lambda { calls << :pre_processor_executed } }
|
74
|
+
control.sources << { klass: mock_source_class }
|
75
|
+
kiba_run(control)
|
76
|
+
|
77
|
+
assert_equal [:pre_processor_executed, :source_instantiated], calls
|
78
|
+
assert_mock mock_source_class
|
79
|
+
end
|
80
|
+
|
81
|
+
def test_no_error_raised_if_destination_close_not_implemented
|
82
|
+
# NOTE: this fake destination does not implement `close`
|
83
|
+
destination_instance = MiniTest::Mock.new
|
84
|
+
|
85
|
+
mock_destination_class = MiniTest::Mock.new
|
86
|
+
mock_destination_class.expect(:new, destination_instance)
|
87
|
+
|
88
|
+
control = Kiba::Control.new
|
89
|
+
control.destinations << { klass: mock_destination_class }
|
90
|
+
kiba_run(control)
|
91
|
+
assert_mock mock_destination_class
|
92
|
+
end
|
93
|
+
|
94
|
+
def test_destination_close_called_if_defined
|
95
|
+
destination_instance = MiniTest::Mock.new
|
96
|
+
destination_instance.expect(:close, nil)
|
97
|
+
mock_destination_class = MiniTest::Mock.new
|
98
|
+
mock_destination_class.expect(:new, destination_instance)
|
99
|
+
|
100
|
+
control = Kiba::Control.new
|
101
|
+
control.destinations << { klass: mock_destination_class }
|
102
|
+
kiba_run(control)
|
103
|
+
assert_mock destination_instance
|
104
|
+
assert_mock mock_destination_class
|
105
|
+
end
|
106
|
+
|
107
|
+
def test_use_next_to_exit_early_from_block_transform
|
108
|
+
assert_equal 2, rows.size
|
109
|
+
|
110
|
+
# calling "return row" from a block is forbidden, but you can use "next" instead
|
111
|
+
b = lambda do |row|
|
112
|
+
if row.fetch(:identifier) == 'first-row'
|
113
|
+
# demonstrate how to remove a row from the pipeline via next
|
114
|
+
next
|
115
|
+
else
|
116
|
+
# demonstrate how you can reformat via next
|
117
|
+
next({new_identifier: row.fetch(:identifier)})
|
118
|
+
end
|
119
|
+
fail "This should not be called"
|
120
|
+
end
|
121
|
+
control.transforms << { block: b }
|
122
|
+
|
123
|
+
# keep track of the rows
|
124
|
+
@remaining_rows = []
|
125
|
+
checker = lambda { |row| @remaining_rows << row; row }
|
126
|
+
control.transforms << { block: checker }
|
127
|
+
|
128
|
+
kiba_run(control)
|
129
|
+
|
130
|
+
# the first row should have been removed
|
131
|
+
# and the second row should have been reformatted
|
132
|
+
assert_equal [{new_identifier: 'second-row'}], @remaining_rows
|
133
|
+
end
|
134
|
+
|
135
|
+
def test_destination_returning_nil_does_not_remove_row_from_pipeline
|
136
|
+
# safeguard to avoid modification on the support code
|
137
|
+
assert_nil TestDestinationReturningNil.new.write("FOOBAR")
|
138
|
+
|
139
|
+
destinations = []
|
140
|
+
control = Kiba.parse do
|
141
|
+
source TestEnumerableSource, [{key: 'value'}]
|
142
|
+
2.times do
|
143
|
+
destination TestDestinationReturningNil, on_init: lambda { |d| destinations << d }
|
144
|
+
end
|
145
|
+
end
|
146
|
+
kiba_run(control)
|
147
|
+
2.times do |i|
|
148
|
+
assert_equal [{key: 'value'}], destinations[i].instance_variable_get(:@written_rows)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def test_nil_transform_error_message
|
153
|
+
control = Kiba.parse do
|
154
|
+
transform
|
155
|
+
end
|
156
|
+
assert_raises(RuntimeError, 'Nil parameters not allowed here') { kiba_run(control) }
|
157
|
+
end
|
158
|
+
|
159
|
+
def test_ruby_3_source_kwargs
|
160
|
+
# NOTE: before Ruby 3 kwargs support, a Ruby warning would
|
161
|
+
# be captured here with Ruby 2.7 & ensure we fail,
|
162
|
+
# and an error would be raised with Ruby 2.8.0-dev
|
163
|
+
# NOTE: only the first warning will be captured, though, but
|
164
|
+
# having 3 different tests is still better
|
165
|
+
storage = nil
|
166
|
+
assert_silent do
|
167
|
+
Kiba.run(Kiba.parse do
|
168
|
+
source TestKeywordArgumentsComponent,
|
169
|
+
mandatory: "first",
|
170
|
+
on_init: -> (values) { storage = values }
|
171
|
+
end)
|
172
|
+
end
|
173
|
+
assert_equal({
|
174
|
+
mandatory: "first",
|
175
|
+
optional: nil
|
176
|
+
}, storage)
|
177
|
+
end
|
178
|
+
|
179
|
+
def test_ruby_3_transform_kwargs
|
180
|
+
storage = nil
|
181
|
+
assert_silent do
|
182
|
+
Kiba.run(Kiba.parse do
|
183
|
+
transform TestKeywordArgumentsComponent,
|
184
|
+
mandatory: "first",
|
185
|
+
on_init: -> (values) { storage = values }
|
186
|
+
end)
|
187
|
+
end
|
188
|
+
assert_equal({
|
189
|
+
mandatory: "first",
|
190
|
+
optional: nil
|
191
|
+
}, storage)
|
192
|
+
end
|
193
|
+
|
194
|
+
def test_ruby_3_destination_kwargs
|
195
|
+
storage = nil
|
196
|
+
assert_silent do
|
197
|
+
Kiba.run(Kiba.parse do
|
198
|
+
destination TestKeywordArgumentsComponent,
|
199
|
+
mandatory: "first",
|
200
|
+
on_init: -> (values) { storage = values }
|
201
|
+
end)
|
202
|
+
end
|
203
|
+
assert_equal({
|
204
|
+
mandatory: "first",
|
205
|
+
optional: nil
|
206
|
+
}, storage)
|
207
|
+
end
|
208
|
+
|
209
|
+
def test_positional_plus_keyword_arguments
|
210
|
+
storage = nil
|
211
|
+
assert_silent do
|
212
|
+
Kiba.run(Kiba.parse do
|
213
|
+
source TestMixedArgumentsComponent,
|
214
|
+
"some positional argument",
|
215
|
+
mandatory: "first",
|
216
|
+
on_init: -> (values) {
|
217
|
+
storage = values
|
218
|
+
}
|
219
|
+
end)
|
220
|
+
end
|
221
|
+
|
222
|
+
assert_equal({
|
223
|
+
some_value: "some positional argument",
|
224
|
+
mandatory: "first",
|
225
|
+
optional: nil
|
226
|
+
}, storage)
|
227
|
+
end
|
228
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class AggregateTransform
|
2
|
+
def initialize(options)
|
3
|
+
@aggregate_size = options.fetch(:aggregate_size)
|
4
|
+
end
|
5
|
+
|
6
|
+
def process(row)
|
7
|
+
@buffer ||= []
|
8
|
+
@buffer << row
|
9
|
+
if @buffer.size == @aggregate_size
|
10
|
+
yield @buffer
|
11
|
+
@buffer = []
|
12
|
+
end
|
13
|
+
nil
|
14
|
+
end
|
15
|
+
|
16
|
+
def close
|
17
|
+
yield @buffer unless @buffer.empty?
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
class TestDestinationReturningNil
|
2
|
+
def initialize(options = {})
|
3
|
+
on_init = options[:on_init]
|
4
|
+
# A little trick to allow outer references to this instance
|
5
|
+
on_init.call(self) if on_init
|
6
|
+
end
|
7
|
+
|
8
|
+
def write(row)
|
9
|
+
(@written_rows ||= []) << row
|
10
|
+
nil
|
11
|
+
end
|
12
|
+
end
|