kiba 2.0.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/FUNDING.yml +1 -0
- data/.github/workflows/ci.yml +41 -0
- data/COMM-LICENSE.md +348 -0
- data/Changes.md +38 -2
- data/Gemfile +1 -1
- data/ISSUE_TEMPLATE.md +7 -0
- data/LICENSE +3 -1
- data/Pro-Changes.md +82 -5
- data/README.md +12 -65
- data/Rakefile +8 -3
- data/kiba.gemspec +20 -17
- data/lib/kiba.rb +14 -11
- data/lib/kiba/context.rb +9 -5
- data/lib/kiba/control.rb +1 -1
- data/lib/kiba/dsl_extensions/config.rb +1 -1
- data/lib/kiba/parser.rb +6 -22
- data/lib/kiba/streaming_runner.rb +62 -5
- data/lib/kiba/version.rb +1 -1
- data/test/helper.rb +15 -7
- data/test/shared_runner_tests.rb +227 -0
- data/test/support/shared_tests.rb +1 -1
- data/test/support/test_aggregate_transform.rb +19 -0
- data/test/support/test_array_destination.rb +2 -2
- data/test/support/test_close_yielding_transform.rb +11 -0
- data/test/support/test_csv_destination.rb +2 -2
- data/test/support/test_csv_source.rb +1 -1
- data/test/support/test_destination_returning_nil.rb +12 -0
- data/test/support/test_duplicate_row_transform.rb +1 -1
- data/test/support/test_keyword_arguments_component.rb +14 -0
- data/test/support/test_mixed_arguments_component.rb +14 -0
- data/test/support/test_non_closing_transform.rb +5 -0
- data/test/support/test_yielding_transform.rb +1 -1
- data/test/test_integration.rb +38 -33
- data/test/test_parser.rb +16 -50
- data/test/test_run.rb +37 -0
- data/test/test_streaming_runner.rb +44 -23
- metadata +45 -30
- data/.travis.yml +0 -15
- data/appveyor.yml +0 -26
- data/bin/kiba +0 -5
- data/lib/kiba/cli.rb +0 -16
- data/lib/kiba/runner.rb +0 -78
- data/test/common/runner.rb +0 -137
- data/test/fixtures/bogus.etl +0 -2
- data/test/fixtures/namespace_conflict.etl +0 -9
- data/test/fixtures/some_extension.rb +0 -4
- data/test/fixtures/valid.etl +0 -1
- data/test/test_cli.rb +0 -21
- data/test/test_runner.rb +0 -6
data/lib/kiba/version.rb
CHANGED
data/test/helper.rb
CHANGED
@@ -1,11 +1,13 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
1
|
+
require "minitest/autorun"
|
2
|
+
require "minitest/pride"
|
3
|
+
require "minitest/focus"
|
4
|
+
require "kiba"
|
5
5
|
|
6
|
-
|
7
|
-
|
6
|
+
if ENV["CI"] == "true"
|
7
|
+
puts "Running with MiniTest version #{MiniTest::VERSION}"
|
8
|
+
end
|
8
9
|
|
10
|
+
class Kiba::Test < Minitest::Test
|
9
11
|
def remove_files(*files)
|
10
12
|
files.each do |file|
|
11
13
|
File.delete(file) if File.exist?(file)
|
@@ -13,6 +15,12 @@ class Kiba::Test < Minitest::Test
|
|
13
15
|
end
|
14
16
|
|
15
17
|
def fixture(file)
|
16
|
-
File.join(File.dirname(__FILE__),
|
18
|
+
File.join(File.dirname(__FILE__), "fixtures", file)
|
19
|
+
end
|
20
|
+
|
21
|
+
unless method_defined?(:assert_mock)
|
22
|
+
def assert_mock(mock)
|
23
|
+
mock.verify
|
24
|
+
end
|
17
25
|
end
|
18
26
|
end
|
@@ -0,0 +1,227 @@
|
|
1
|
+
require "minitest/mock"
|
2
|
+
require_relative "support/test_enumerable_source"
|
3
|
+
require_relative "support/test_destination_returning_nil"
|
4
|
+
|
5
|
+
module SharedRunnerTests
|
6
|
+
def rows
|
7
|
+
@rows ||= [
|
8
|
+
{identifier: "first-row"},
|
9
|
+
{identifier: "second-row"}
|
10
|
+
]
|
11
|
+
end
|
12
|
+
|
13
|
+
def control
|
14
|
+
@control ||= begin
|
15
|
+
control = Kiba::Control.new
|
16
|
+
# this will yield a single row for testing
|
17
|
+
control.sources << {
|
18
|
+
klass: TestEnumerableSource,
|
19
|
+
args: [rows]
|
20
|
+
}
|
21
|
+
control
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_block_transform_processing
|
26
|
+
# is there a better way to assert a block was called in minitest?
|
27
|
+
control.transforms << {block: lambda { |r| (@called = true) && (return r) }}
|
28
|
+
kiba_run(control)
|
29
|
+
assert_equal true, @called
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_dismissed_row_not_passed_to_next_transform
|
33
|
+
@called = nil
|
34
|
+
control.transforms << {block: lambda { |_| }}
|
35
|
+
control.transforms << {block: lambda { |_| (@called = true) && (return nil) }}
|
36
|
+
kiba_run(control)
|
37
|
+
assert_nil @called
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_post_process_runs_once
|
41
|
+
assert_equal 2, rows.size
|
42
|
+
@called = 0
|
43
|
+
control.post_processes << {block: lambda { @called += 1 }}
|
44
|
+
kiba_run(control)
|
45
|
+
assert_equal 1, @called
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_post_process_not_called_after_row_failure
|
49
|
+
@called = nil
|
50
|
+
control.transforms << {block: lambda { |_| fail "FAIL" }}
|
51
|
+
control.post_processes << {block: lambda { @called = true }}
|
52
|
+
assert_raises(RuntimeError, "FAIL") { kiba_run(control) }
|
53
|
+
assert_nil @called
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_pre_process_runs_once
|
57
|
+
assert_equal 2, rows.size
|
58
|
+
@called = 0
|
59
|
+
control.pre_processes << {block: lambda { @called += 1 }}
|
60
|
+
kiba_run(control)
|
61
|
+
assert_equal 1, @called
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_pre_process_runs_before_source_is_instantiated
|
65
|
+
calls = []
|
66
|
+
|
67
|
+
mock_source_class = MiniTest::Mock.new
|
68
|
+
mock_source_class.expect(:new, TestEnumerableSource.new([1, 2, 3])) do
|
69
|
+
calls << :source_instantiated
|
70
|
+
end
|
71
|
+
|
72
|
+
control = Kiba::Control.new
|
73
|
+
control.pre_processes << {block: lambda { calls << :pre_processor_executed }}
|
74
|
+
control.sources << {klass: mock_source_class}
|
75
|
+
kiba_run(control)
|
76
|
+
|
77
|
+
assert_equal [:pre_processor_executed, :source_instantiated], calls
|
78
|
+
assert_mock mock_source_class
|
79
|
+
end
|
80
|
+
|
81
|
+
def test_no_error_raised_if_destination_close_not_implemented
|
82
|
+
# NOTE: this fake destination does not implement `close`
|
83
|
+
destination_instance = MiniTest::Mock.new
|
84
|
+
|
85
|
+
mock_destination_class = MiniTest::Mock.new
|
86
|
+
mock_destination_class.expect(:new, destination_instance)
|
87
|
+
|
88
|
+
control = Kiba::Control.new
|
89
|
+
control.destinations << {klass: mock_destination_class}
|
90
|
+
kiba_run(control)
|
91
|
+
assert_mock mock_destination_class
|
92
|
+
end
|
93
|
+
|
94
|
+
def test_destination_close_called_if_defined
|
95
|
+
destination_instance = MiniTest::Mock.new
|
96
|
+
destination_instance.expect(:close, nil)
|
97
|
+
mock_destination_class = MiniTest::Mock.new
|
98
|
+
mock_destination_class.expect(:new, destination_instance)
|
99
|
+
|
100
|
+
control = Kiba::Control.new
|
101
|
+
control.destinations << {klass: mock_destination_class}
|
102
|
+
kiba_run(control)
|
103
|
+
assert_mock destination_instance
|
104
|
+
assert_mock mock_destination_class
|
105
|
+
end
|
106
|
+
|
107
|
+
def test_use_next_to_exit_early_from_block_transform
|
108
|
+
assert_equal 2, rows.size
|
109
|
+
|
110
|
+
# calling "return row" from a block is forbidden, but you can use "next" instead
|
111
|
+
b = lambda do |row|
|
112
|
+
if row.fetch(:identifier) == "first-row"
|
113
|
+
# demonstrate how to remove a row from the pipeline via next
|
114
|
+
next
|
115
|
+
else
|
116
|
+
# demonstrate how you can reformat via next
|
117
|
+
next({new_identifier: row.fetch(:identifier)})
|
118
|
+
end
|
119
|
+
end
|
120
|
+
control.transforms << {block: b}
|
121
|
+
|
122
|
+
# keep track of the rows
|
123
|
+
@remaining_rows = []
|
124
|
+
checker = lambda { |row| (@remaining_rows << row) && (return row) }
|
125
|
+
control.transforms << {block: checker}
|
126
|
+
|
127
|
+
kiba_run(control)
|
128
|
+
|
129
|
+
# the first row should have been removed
|
130
|
+
# and the second row should have been reformatted
|
131
|
+
assert_equal [{new_identifier: "second-row"}], @remaining_rows
|
132
|
+
end
|
133
|
+
|
134
|
+
def test_destination_returning_nil_does_not_remove_row_from_pipeline
|
135
|
+
# safeguard to avoid modification on the support code
|
136
|
+
assert_nil TestDestinationReturningNil.new.write("FOOBAR")
|
137
|
+
|
138
|
+
destinations = []
|
139
|
+
control = Kiba.parse do
|
140
|
+
source TestEnumerableSource, [{key: "value"}]
|
141
|
+
2.times do
|
142
|
+
destination TestDestinationReturningNil, on_init: lambda { |d| destinations << d }
|
143
|
+
end
|
144
|
+
end
|
145
|
+
kiba_run(control)
|
146
|
+
2.times do |i|
|
147
|
+
assert_equal [{key: "value"}], destinations[i].instance_variable_get(:@written_rows)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def test_nil_transform_error_message
|
152
|
+
control = Kiba.parse do
|
153
|
+
transform
|
154
|
+
end
|
155
|
+
assert_raises(RuntimeError, "Nil parameters not allowed here") { kiba_run(control) }
|
156
|
+
end
|
157
|
+
|
158
|
+
def test_ruby_3_source_kwargs
|
159
|
+
# NOTE: before Ruby 3 kwargs support, a Ruby warning would
|
160
|
+
# be captured here with Ruby 2.7 & ensure we fail,
|
161
|
+
# and an error would be raised with Ruby 2.8.0-dev
|
162
|
+
# NOTE: only the first warning will be captured, though, but
|
163
|
+
# having 3 different tests is still better
|
164
|
+
storage = nil
|
165
|
+
assert_silent do
|
166
|
+
Kiba.run(Kiba.parse do
|
167
|
+
source TestKeywordArgumentsComponent,
|
168
|
+
mandatory: "first",
|
169
|
+
on_init: ->(values) { storage = values }
|
170
|
+
end)
|
171
|
+
end
|
172
|
+
assert_equal({
|
173
|
+
mandatory: "first",
|
174
|
+
optional: nil
|
175
|
+
}, storage)
|
176
|
+
end
|
177
|
+
|
178
|
+
def test_ruby_3_transform_kwargs
|
179
|
+
storage = nil
|
180
|
+
assert_silent do
|
181
|
+
Kiba.run(Kiba.parse do
|
182
|
+
transform TestKeywordArgumentsComponent,
|
183
|
+
mandatory: "first",
|
184
|
+
on_init: ->(values) { storage = values }
|
185
|
+
end)
|
186
|
+
end
|
187
|
+
assert_equal({
|
188
|
+
mandatory: "first",
|
189
|
+
optional: nil
|
190
|
+
}, storage)
|
191
|
+
end
|
192
|
+
|
193
|
+
def test_ruby_3_destination_kwargs
|
194
|
+
storage = nil
|
195
|
+
assert_silent do
|
196
|
+
Kiba.run(Kiba.parse do
|
197
|
+
destination TestKeywordArgumentsComponent,
|
198
|
+
mandatory: "first",
|
199
|
+
on_init: ->(values) { storage = values }
|
200
|
+
end)
|
201
|
+
end
|
202
|
+
assert_equal({
|
203
|
+
mandatory: "first",
|
204
|
+
optional: nil
|
205
|
+
}, storage)
|
206
|
+
end
|
207
|
+
|
208
|
+
def test_positional_plus_keyword_arguments
|
209
|
+
storage = nil
|
210
|
+
assert_silent do
|
211
|
+
Kiba.run(Kiba.parse do
|
212
|
+
source TestMixedArgumentsComponent,
|
213
|
+
"some positional argument",
|
214
|
+
mandatory: "first",
|
215
|
+
on_init: ->(values) {
|
216
|
+
storage = values
|
217
|
+
}
|
218
|
+
end)
|
219
|
+
end
|
220
|
+
|
221
|
+
assert_equal({
|
222
|
+
some_value: "some positional argument",
|
223
|
+
mandatory: "first",
|
224
|
+
optional: nil
|
225
|
+
}, storage)
|
226
|
+
end
|
227
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class AggregateTransform
|
2
|
+
def initialize(options)
|
3
|
+
@aggregate_size = options.fetch(:aggregate_size)
|
4
|
+
end
|
5
|
+
|
6
|
+
def process(row)
|
7
|
+
@buffer ||= []
|
8
|
+
@buffer << row
|
9
|
+
if @buffer.size == @aggregate_size
|
10
|
+
yield @buffer
|
11
|
+
@buffer = []
|
12
|
+
end
|
13
|
+
nil
|
14
|
+
end
|
15
|
+
|
16
|
+
def close
|
17
|
+
yield @buffer unless @buffer.empty?
|
18
|
+
end
|
19
|
+
end
|
@@ -1,9 +1,9 @@
|
|
1
|
-
require
|
1
|
+
require "csv"
|
2
2
|
|
3
3
|
# simple destination, not checking that each row has all the fields
|
4
4
|
class TestCsvDestination
|
5
5
|
def initialize(output_file)
|
6
|
-
@csv = CSV.open(output_file,
|
6
|
+
@csv = CSV.open(output_file, "w")
|
7
7
|
@headers_written = false
|
8
8
|
end
|
9
9
|
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# a mock component to test Ruby 3 keyword argument support
|
2
|
+
class TestKeywordArgumentsComponent
|
3
|
+
def initialize(mandatory:, optional: nil, on_init: nil)
|
4
|
+
values = {
|
5
|
+
mandatory: mandatory,
|
6
|
+
optional: optional
|
7
|
+
}
|
8
|
+
on_init&.call(values)
|
9
|
+
end
|
10
|
+
|
11
|
+
def each
|
12
|
+
# no-op
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# a mock component to test Ruby 3 keyword argument support
|
2
|
+
class TestMixedArgumentsComponent
|
3
|
+
def initialize(some_value, mandatory:, on_init:, optional: nil)
|
4
|
+
@values = {}
|
5
|
+
@values[:some_value] = some_value
|
6
|
+
@values[:mandatory] = mandatory
|
7
|
+
@values[:optional] = optional
|
8
|
+
on_init&.call(@values)
|
9
|
+
end
|
10
|
+
|
11
|
+
def each
|
12
|
+
# no-op
|
13
|
+
end
|
14
|
+
end
|
data/test/test_integration.rb
CHANGED
@@ -1,28 +1,33 @@
|
|
1
|
-
require_relative
|
1
|
+
require_relative "helper"
|
2
2
|
|
3
|
-
require_relative
|
4
|
-
require_relative
|
5
|
-
require_relative
|
6
|
-
require_relative
|
7
|
-
require_relative
|
3
|
+
require_relative "support/test_csv_source"
|
4
|
+
require_relative "support/test_csv_destination"
|
5
|
+
require_relative "support/test_rename_field_transform"
|
6
|
+
require_relative "support/test_enumerable_source"
|
7
|
+
require_relative "support/test_source_that_reads_at_instantiation_time"
|
8
8
|
|
9
9
|
# End-to-end tests go here
|
10
10
|
class TestIntegration < Kiba::Test
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
11
|
+
def output_file
|
12
|
+
"test/tmp/output.csv"
|
13
|
+
end
|
14
|
+
|
15
|
+
def input_file
|
16
|
+
"test/tmp/input.csv"
|
17
|
+
end
|
18
|
+
|
19
|
+
def sample_csv_data
|
20
|
+
<<~CSV
|
21
|
+
first_name,last_name,sex
|
22
|
+
John,Doe,M
|
23
|
+
Mary,Johnson,F
|
24
|
+
Cindy,Backgammon,F
|
25
|
+
Patrick,McWire,M
|
26
|
+
CSV
|
22
27
|
end
|
23
28
|
|
24
29
|
def clean
|
25
|
-
remove_files(*Dir[
|
30
|
+
remove_files(*Dir["test/tmp/*.csv"])
|
26
31
|
end
|
27
32
|
|
28
33
|
def setup
|
@@ -37,36 +42,36 @@ CSV
|
|
37
42
|
def test_csv_to_csv
|
38
43
|
# parse the ETL script (this won't run it)
|
39
44
|
control = Kiba.parse do
|
40
|
-
source TestCsvSource,
|
45
|
+
source TestCsvSource, "test/tmp/input.csv"
|
41
46
|
|
42
47
|
transform do |row|
|
43
48
|
row[:sex] = case row[:sex]
|
44
|
-
when
|
45
|
-
when
|
46
|
-
else
|
49
|
+
when "M" then "Male"
|
50
|
+
when "F" then "Female"
|
51
|
+
else "Unknown"
|
47
52
|
end
|
48
53
|
row # must be returned
|
49
54
|
end
|
50
55
|
|
51
56
|
# returning nil dismisses the row
|
52
57
|
transform do |row|
|
53
|
-
row[:sex] ==
|
58
|
+
row[:sex] == "Female" ? row : nil
|
54
59
|
end
|
55
60
|
|
56
61
|
transform TestRenameFieldTransform, :sex, :sex_2015
|
57
62
|
|
58
|
-
destination TestCsvDestination,
|
63
|
+
destination TestCsvDestination, "test/tmp/output.csv"
|
59
64
|
end
|
60
65
|
|
61
66
|
# run the parsed ETL script
|
62
67
|
Kiba.run(control)
|
63
68
|
|
64
69
|
# verify the output
|
65
|
-
assert_equal
|
66
|
-
first_name,last_name,sex_2015
|
67
|
-
Mary,Johnson,Female
|
68
|
-
Cindy,Backgammon,Female
|
69
|
-
CSV
|
70
|
+
assert_equal <<~CSV, IO.read(output_file)
|
71
|
+
first_name,last_name,sex_2015
|
72
|
+
Mary,Johnson,Female
|
73
|
+
Cindy,Backgammon,Female
|
74
|
+
CSV
|
70
75
|
end
|
71
76
|
|
72
77
|
def test_variable_access
|
@@ -97,18 +102,18 @@ CSV
|
|
97
102
|
|
98
103
|
Kiba.run(control)
|
99
104
|
|
100
|
-
assert_equal
|
105
|
+
assert_equal "Count is now 103", message
|
101
106
|
end
|
102
107
|
|
103
108
|
def test_file_created_by_pre_process_can_be_read_by_source_at_instantiation_time
|
104
|
-
remove_files(
|
109
|
+
remove_files("test/tmp/eager.csv")
|
105
110
|
|
106
111
|
control = Kiba.parse do
|
107
112
|
pre_process do
|
108
|
-
IO.write(
|
113
|
+
IO.write("test/tmp/eager.csv", "something")
|
109
114
|
end
|
110
115
|
|
111
|
-
source SourceThatReadsAtInstantionTime,
|
116
|
+
source SourceThatReadsAtInstantionTime, "test/tmp/eager.csv"
|
112
117
|
end
|
113
118
|
|
114
119
|
Kiba.run(control)
|