kiba 2.0.0 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/FUNDING.yml +1 -0
- data/.github/workflows/ci.yml +41 -0
- data/COMM-LICENSE.md +348 -0
- data/Changes.md +38 -2
- data/Gemfile +1 -1
- data/ISSUE_TEMPLATE.md +7 -0
- data/LICENSE +3 -1
- data/Pro-Changes.md +82 -5
- data/README.md +12 -65
- data/Rakefile +8 -3
- data/kiba.gemspec +20 -17
- data/lib/kiba.rb +14 -11
- data/lib/kiba/context.rb +9 -5
- data/lib/kiba/control.rb +1 -1
- data/lib/kiba/dsl_extensions/config.rb +1 -1
- data/lib/kiba/parser.rb +6 -22
- data/lib/kiba/streaming_runner.rb +62 -5
- data/lib/kiba/version.rb +1 -1
- data/test/helper.rb +15 -7
- data/test/shared_runner_tests.rb +227 -0
- data/test/support/shared_tests.rb +1 -1
- data/test/support/test_aggregate_transform.rb +19 -0
- data/test/support/test_array_destination.rb +2 -2
- data/test/support/test_close_yielding_transform.rb +11 -0
- data/test/support/test_csv_destination.rb +2 -2
- data/test/support/test_csv_source.rb +1 -1
- data/test/support/test_destination_returning_nil.rb +12 -0
- data/test/support/test_duplicate_row_transform.rb +1 -1
- data/test/support/test_keyword_arguments_component.rb +14 -0
- data/test/support/test_mixed_arguments_component.rb +14 -0
- data/test/support/test_non_closing_transform.rb +5 -0
- data/test/support/test_yielding_transform.rb +1 -1
- data/test/test_integration.rb +38 -33
- data/test/test_parser.rb +16 -50
- data/test/test_run.rb +37 -0
- data/test/test_streaming_runner.rb +44 -23
- metadata +45 -30
- data/.travis.yml +0 -15
- data/appveyor.yml +0 -26
- data/bin/kiba +0 -5
- data/lib/kiba/cli.rb +0 -16
- data/lib/kiba/runner.rb +0 -78
- data/test/common/runner.rb +0 -137
- data/test/fixtures/bogus.etl +0 -2
- data/test/fixtures/namespace_conflict.etl +0 -9
- data/test/fixtures/some_extension.rb +0 -4
- data/test/fixtures/valid.etl +0 -1
- data/test/test_cli.rb +0 -21
- data/test/test_runner.rb +0 -6
data/lib/kiba/version.rb
CHANGED
data/test/helper.rb
CHANGED
@@ -1,11 +1,13 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
1
|
+
require "minitest/autorun"
|
2
|
+
require "minitest/pride"
|
3
|
+
require "minitest/focus"
|
4
|
+
require "kiba"
|
5
5
|
|
6
|
-
|
7
|
-
|
6
|
+
if ENV["CI"] == "true"
|
7
|
+
puts "Running with MiniTest version #{MiniTest::VERSION}"
|
8
|
+
end
|
8
9
|
|
10
|
+
class Kiba::Test < Minitest::Test
|
9
11
|
def remove_files(*files)
|
10
12
|
files.each do |file|
|
11
13
|
File.delete(file) if File.exist?(file)
|
@@ -13,6 +15,12 @@ class Kiba::Test < Minitest::Test
|
|
13
15
|
end
|
14
16
|
|
15
17
|
def fixture(file)
|
16
|
-
File.join(File.dirname(__FILE__),
|
18
|
+
File.join(File.dirname(__FILE__), "fixtures", file)
|
19
|
+
end
|
20
|
+
|
21
|
+
unless method_defined?(:assert_mock)
|
22
|
+
def assert_mock(mock)
|
23
|
+
mock.verify
|
24
|
+
end
|
17
25
|
end
|
18
26
|
end
|
@@ -0,0 +1,227 @@
|
|
1
|
+
require "minitest/mock"
|
2
|
+
require_relative "support/test_enumerable_source"
|
3
|
+
require_relative "support/test_destination_returning_nil"
|
4
|
+
|
5
|
+
module SharedRunnerTests
|
6
|
+
def rows
|
7
|
+
@rows ||= [
|
8
|
+
{identifier: "first-row"},
|
9
|
+
{identifier: "second-row"}
|
10
|
+
]
|
11
|
+
end
|
12
|
+
|
13
|
+
def control
|
14
|
+
@control ||= begin
|
15
|
+
control = Kiba::Control.new
|
16
|
+
# this will yield a single row for testing
|
17
|
+
control.sources << {
|
18
|
+
klass: TestEnumerableSource,
|
19
|
+
args: [rows]
|
20
|
+
}
|
21
|
+
control
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_block_transform_processing
|
26
|
+
# is there a better way to assert a block was called in minitest?
|
27
|
+
control.transforms << {block: lambda { |r| (@called = true) && (return r) }}
|
28
|
+
kiba_run(control)
|
29
|
+
assert_equal true, @called
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_dismissed_row_not_passed_to_next_transform
|
33
|
+
@called = nil
|
34
|
+
control.transforms << {block: lambda { |_| }}
|
35
|
+
control.transforms << {block: lambda { |_| (@called = true) && (return nil) }}
|
36
|
+
kiba_run(control)
|
37
|
+
assert_nil @called
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_post_process_runs_once
|
41
|
+
assert_equal 2, rows.size
|
42
|
+
@called = 0
|
43
|
+
control.post_processes << {block: lambda { @called += 1 }}
|
44
|
+
kiba_run(control)
|
45
|
+
assert_equal 1, @called
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_post_process_not_called_after_row_failure
|
49
|
+
@called = nil
|
50
|
+
control.transforms << {block: lambda { |_| fail "FAIL" }}
|
51
|
+
control.post_processes << {block: lambda { @called = true }}
|
52
|
+
assert_raises(RuntimeError, "FAIL") { kiba_run(control) }
|
53
|
+
assert_nil @called
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_pre_process_runs_once
|
57
|
+
assert_equal 2, rows.size
|
58
|
+
@called = 0
|
59
|
+
control.pre_processes << {block: lambda { @called += 1 }}
|
60
|
+
kiba_run(control)
|
61
|
+
assert_equal 1, @called
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_pre_process_runs_before_source_is_instantiated
|
65
|
+
calls = []
|
66
|
+
|
67
|
+
mock_source_class = MiniTest::Mock.new
|
68
|
+
mock_source_class.expect(:new, TestEnumerableSource.new([1, 2, 3])) do
|
69
|
+
calls << :source_instantiated
|
70
|
+
end
|
71
|
+
|
72
|
+
control = Kiba::Control.new
|
73
|
+
control.pre_processes << {block: lambda { calls << :pre_processor_executed }}
|
74
|
+
control.sources << {klass: mock_source_class}
|
75
|
+
kiba_run(control)
|
76
|
+
|
77
|
+
assert_equal [:pre_processor_executed, :source_instantiated], calls
|
78
|
+
assert_mock mock_source_class
|
79
|
+
end
|
80
|
+
|
81
|
+
def test_no_error_raised_if_destination_close_not_implemented
|
82
|
+
# NOTE: this fake destination does not implement `close`
|
83
|
+
destination_instance = MiniTest::Mock.new
|
84
|
+
|
85
|
+
mock_destination_class = MiniTest::Mock.new
|
86
|
+
mock_destination_class.expect(:new, destination_instance)
|
87
|
+
|
88
|
+
control = Kiba::Control.new
|
89
|
+
control.destinations << {klass: mock_destination_class}
|
90
|
+
kiba_run(control)
|
91
|
+
assert_mock mock_destination_class
|
92
|
+
end
|
93
|
+
|
94
|
+
def test_destination_close_called_if_defined
|
95
|
+
destination_instance = MiniTest::Mock.new
|
96
|
+
destination_instance.expect(:close, nil)
|
97
|
+
mock_destination_class = MiniTest::Mock.new
|
98
|
+
mock_destination_class.expect(:new, destination_instance)
|
99
|
+
|
100
|
+
control = Kiba::Control.new
|
101
|
+
control.destinations << {klass: mock_destination_class}
|
102
|
+
kiba_run(control)
|
103
|
+
assert_mock destination_instance
|
104
|
+
assert_mock mock_destination_class
|
105
|
+
end
|
106
|
+
|
107
|
+
def test_use_next_to_exit_early_from_block_transform
|
108
|
+
assert_equal 2, rows.size
|
109
|
+
|
110
|
+
# calling "return row" from a block is forbidden, but you can use "next" instead
|
111
|
+
b = lambda do |row|
|
112
|
+
if row.fetch(:identifier) == "first-row"
|
113
|
+
# demonstrate how to remove a row from the pipeline via next
|
114
|
+
next
|
115
|
+
else
|
116
|
+
# demonstrate how you can reformat via next
|
117
|
+
next({new_identifier: row.fetch(:identifier)})
|
118
|
+
end
|
119
|
+
end
|
120
|
+
control.transforms << {block: b}
|
121
|
+
|
122
|
+
# keep track of the rows
|
123
|
+
@remaining_rows = []
|
124
|
+
checker = lambda { |row| (@remaining_rows << row) && (return row) }
|
125
|
+
control.transforms << {block: checker}
|
126
|
+
|
127
|
+
kiba_run(control)
|
128
|
+
|
129
|
+
# the first row should have been removed
|
130
|
+
# and the second row should have been reformatted
|
131
|
+
assert_equal [{new_identifier: "second-row"}], @remaining_rows
|
132
|
+
end
|
133
|
+
|
134
|
+
def test_destination_returning_nil_does_not_remove_row_from_pipeline
|
135
|
+
# safeguard to avoid modification on the support code
|
136
|
+
assert_nil TestDestinationReturningNil.new.write("FOOBAR")
|
137
|
+
|
138
|
+
destinations = []
|
139
|
+
control = Kiba.parse do
|
140
|
+
source TestEnumerableSource, [{key: "value"}]
|
141
|
+
2.times do
|
142
|
+
destination TestDestinationReturningNil, on_init: lambda { |d| destinations << d }
|
143
|
+
end
|
144
|
+
end
|
145
|
+
kiba_run(control)
|
146
|
+
2.times do |i|
|
147
|
+
assert_equal [{key: "value"}], destinations[i].instance_variable_get(:@written_rows)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def test_nil_transform_error_message
|
152
|
+
control = Kiba.parse do
|
153
|
+
transform
|
154
|
+
end
|
155
|
+
assert_raises(RuntimeError, "Nil parameters not allowed here") { kiba_run(control) }
|
156
|
+
end
|
157
|
+
|
158
|
+
def test_ruby_3_source_kwargs
|
159
|
+
# NOTE: before Ruby 3 kwargs support, a Ruby warning would
|
160
|
+
# be captured here with Ruby 2.7 & ensure we fail,
|
161
|
+
# and an error would be raised with Ruby 2.8.0-dev
|
162
|
+
# NOTE: only the first warning will be captured, though, but
|
163
|
+
# having 3 different tests is still better
|
164
|
+
storage = nil
|
165
|
+
assert_silent do
|
166
|
+
Kiba.run(Kiba.parse do
|
167
|
+
source TestKeywordArgumentsComponent,
|
168
|
+
mandatory: "first",
|
169
|
+
on_init: ->(values) { storage = values }
|
170
|
+
end)
|
171
|
+
end
|
172
|
+
assert_equal({
|
173
|
+
mandatory: "first",
|
174
|
+
optional: nil
|
175
|
+
}, storage)
|
176
|
+
end
|
177
|
+
|
178
|
+
def test_ruby_3_transform_kwargs
|
179
|
+
storage = nil
|
180
|
+
assert_silent do
|
181
|
+
Kiba.run(Kiba.parse do
|
182
|
+
transform TestKeywordArgumentsComponent,
|
183
|
+
mandatory: "first",
|
184
|
+
on_init: ->(values) { storage = values }
|
185
|
+
end)
|
186
|
+
end
|
187
|
+
assert_equal({
|
188
|
+
mandatory: "first",
|
189
|
+
optional: nil
|
190
|
+
}, storage)
|
191
|
+
end
|
192
|
+
|
193
|
+
def test_ruby_3_destination_kwargs
|
194
|
+
storage = nil
|
195
|
+
assert_silent do
|
196
|
+
Kiba.run(Kiba.parse do
|
197
|
+
destination TestKeywordArgumentsComponent,
|
198
|
+
mandatory: "first",
|
199
|
+
on_init: ->(values) { storage = values }
|
200
|
+
end)
|
201
|
+
end
|
202
|
+
assert_equal({
|
203
|
+
mandatory: "first",
|
204
|
+
optional: nil
|
205
|
+
}, storage)
|
206
|
+
end
|
207
|
+
|
208
|
+
def test_positional_plus_keyword_arguments
|
209
|
+
storage = nil
|
210
|
+
assert_silent do
|
211
|
+
Kiba.run(Kiba.parse do
|
212
|
+
source TestMixedArgumentsComponent,
|
213
|
+
"some positional argument",
|
214
|
+
mandatory: "first",
|
215
|
+
on_init: ->(values) {
|
216
|
+
storage = values
|
217
|
+
}
|
218
|
+
end)
|
219
|
+
end
|
220
|
+
|
221
|
+
assert_equal({
|
222
|
+
some_value: "some positional argument",
|
223
|
+
mandatory: "first",
|
224
|
+
optional: nil
|
225
|
+
}, storage)
|
226
|
+
end
|
227
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class AggregateTransform
|
2
|
+
def initialize(options)
|
3
|
+
@aggregate_size = options.fetch(:aggregate_size)
|
4
|
+
end
|
5
|
+
|
6
|
+
def process(row)
|
7
|
+
@buffer ||= []
|
8
|
+
@buffer << row
|
9
|
+
if @buffer.size == @aggregate_size
|
10
|
+
yield @buffer
|
11
|
+
@buffer = []
|
12
|
+
end
|
13
|
+
nil
|
14
|
+
end
|
15
|
+
|
16
|
+
def close
|
17
|
+
yield @buffer unless @buffer.empty?
|
18
|
+
end
|
19
|
+
end
|
@@ -1,9 +1,9 @@
|
|
1
|
-
require
|
1
|
+
require "csv"
|
2
2
|
|
3
3
|
# simple destination, not checking that each row has all the fields
|
4
4
|
class TestCsvDestination
|
5
5
|
def initialize(output_file)
|
6
|
-
@csv = CSV.open(output_file,
|
6
|
+
@csv = CSV.open(output_file, "w")
|
7
7
|
@headers_written = false
|
8
8
|
end
|
9
9
|
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# a mock component to test Ruby 3 keyword argument support
|
2
|
+
class TestKeywordArgumentsComponent
|
3
|
+
def initialize(mandatory:, optional: nil, on_init: nil)
|
4
|
+
values = {
|
5
|
+
mandatory: mandatory,
|
6
|
+
optional: optional
|
7
|
+
}
|
8
|
+
on_init&.call(values)
|
9
|
+
end
|
10
|
+
|
11
|
+
def each
|
12
|
+
# no-op
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# a mock component to test Ruby 3 keyword argument support
|
2
|
+
class TestMixedArgumentsComponent
|
3
|
+
def initialize(some_value, mandatory:, on_init:, optional: nil)
|
4
|
+
@values = {}
|
5
|
+
@values[:some_value] = some_value
|
6
|
+
@values[:mandatory] = mandatory
|
7
|
+
@values[:optional] = optional
|
8
|
+
on_init&.call(@values)
|
9
|
+
end
|
10
|
+
|
11
|
+
def each
|
12
|
+
# no-op
|
13
|
+
end
|
14
|
+
end
|
data/test/test_integration.rb
CHANGED
@@ -1,28 +1,33 @@
|
|
1
|
-
require_relative
|
1
|
+
require_relative "helper"
|
2
2
|
|
3
|
-
require_relative
|
4
|
-
require_relative
|
5
|
-
require_relative
|
6
|
-
require_relative
|
7
|
-
require_relative
|
3
|
+
require_relative "support/test_csv_source"
|
4
|
+
require_relative "support/test_csv_destination"
|
5
|
+
require_relative "support/test_rename_field_transform"
|
6
|
+
require_relative "support/test_enumerable_source"
|
7
|
+
require_relative "support/test_source_that_reads_at_instantiation_time"
|
8
8
|
|
9
9
|
# End-to-end tests go here
|
10
10
|
class TestIntegration < Kiba::Test
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
11
|
+
def output_file
|
12
|
+
"test/tmp/output.csv"
|
13
|
+
end
|
14
|
+
|
15
|
+
def input_file
|
16
|
+
"test/tmp/input.csv"
|
17
|
+
end
|
18
|
+
|
19
|
+
def sample_csv_data
|
20
|
+
<<~CSV
|
21
|
+
first_name,last_name,sex
|
22
|
+
John,Doe,M
|
23
|
+
Mary,Johnson,F
|
24
|
+
Cindy,Backgammon,F
|
25
|
+
Patrick,McWire,M
|
26
|
+
CSV
|
22
27
|
end
|
23
28
|
|
24
29
|
def clean
|
25
|
-
remove_files(*Dir[
|
30
|
+
remove_files(*Dir["test/tmp/*.csv"])
|
26
31
|
end
|
27
32
|
|
28
33
|
def setup
|
@@ -37,36 +42,36 @@ CSV
|
|
37
42
|
def test_csv_to_csv
|
38
43
|
# parse the ETL script (this won't run it)
|
39
44
|
control = Kiba.parse do
|
40
|
-
source TestCsvSource,
|
45
|
+
source TestCsvSource, "test/tmp/input.csv"
|
41
46
|
|
42
47
|
transform do |row|
|
43
48
|
row[:sex] = case row[:sex]
|
44
|
-
when
|
45
|
-
when
|
46
|
-
else
|
49
|
+
when "M" then "Male"
|
50
|
+
when "F" then "Female"
|
51
|
+
else "Unknown"
|
47
52
|
end
|
48
53
|
row # must be returned
|
49
54
|
end
|
50
55
|
|
51
56
|
# returning nil dismisses the row
|
52
57
|
transform do |row|
|
53
|
-
row[:sex] ==
|
58
|
+
row[:sex] == "Female" ? row : nil
|
54
59
|
end
|
55
60
|
|
56
61
|
transform TestRenameFieldTransform, :sex, :sex_2015
|
57
62
|
|
58
|
-
destination TestCsvDestination,
|
63
|
+
destination TestCsvDestination, "test/tmp/output.csv"
|
59
64
|
end
|
60
65
|
|
61
66
|
# run the parsed ETL script
|
62
67
|
Kiba.run(control)
|
63
68
|
|
64
69
|
# verify the output
|
65
|
-
assert_equal
|
66
|
-
first_name,last_name,sex_2015
|
67
|
-
Mary,Johnson,Female
|
68
|
-
Cindy,Backgammon,Female
|
69
|
-
CSV
|
70
|
+
assert_equal <<~CSV, IO.read(output_file)
|
71
|
+
first_name,last_name,sex_2015
|
72
|
+
Mary,Johnson,Female
|
73
|
+
Cindy,Backgammon,Female
|
74
|
+
CSV
|
70
75
|
end
|
71
76
|
|
72
77
|
def test_variable_access
|
@@ -97,18 +102,18 @@ CSV
|
|
97
102
|
|
98
103
|
Kiba.run(control)
|
99
104
|
|
100
|
-
assert_equal
|
105
|
+
assert_equal "Count is now 103", message
|
101
106
|
end
|
102
107
|
|
103
108
|
def test_file_created_by_pre_process_can_be_read_by_source_at_instantiation_time
|
104
|
-
remove_files(
|
109
|
+
remove_files("test/tmp/eager.csv")
|
105
110
|
|
106
111
|
control = Kiba.parse do
|
107
112
|
pre_process do
|
108
|
-
IO.write(
|
113
|
+
IO.write("test/tmp/eager.csv", "something")
|
109
114
|
end
|
110
115
|
|
111
|
-
source SourceThatReadsAtInstantionTime,
|
116
|
+
source SourceThatReadsAtInstantionTime, "test/tmp/eager.csv"
|
112
117
|
end
|
113
118
|
|
114
119
|
Kiba.run(control)
|