kiba 2.0.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +5 -5
  2. data/.github/FUNDING.yml +1 -0
  3. data/.github/workflows/ci.yml +41 -0
  4. data/COMM-LICENSE.md +348 -0
  5. data/Changes.md +38 -2
  6. data/Gemfile +1 -1
  7. data/ISSUE_TEMPLATE.md +7 -0
  8. data/LICENSE +3 -1
  9. data/Pro-Changes.md +82 -5
  10. data/README.md +12 -65
  11. data/Rakefile +8 -3
  12. data/kiba.gemspec +20 -17
  13. data/lib/kiba.rb +14 -11
  14. data/lib/kiba/context.rb +9 -5
  15. data/lib/kiba/control.rb +1 -1
  16. data/lib/kiba/dsl_extensions/config.rb +1 -1
  17. data/lib/kiba/parser.rb +6 -22
  18. data/lib/kiba/streaming_runner.rb +62 -5
  19. data/lib/kiba/version.rb +1 -1
  20. data/test/helper.rb +15 -7
  21. data/test/shared_runner_tests.rb +227 -0
  22. data/test/support/shared_tests.rb +1 -1
  23. data/test/support/test_aggregate_transform.rb +19 -0
  24. data/test/support/test_array_destination.rb +2 -2
  25. data/test/support/test_close_yielding_transform.rb +11 -0
  26. data/test/support/test_csv_destination.rb +2 -2
  27. data/test/support/test_csv_source.rb +1 -1
  28. data/test/support/test_destination_returning_nil.rb +12 -0
  29. data/test/support/test_duplicate_row_transform.rb +1 -1
  30. data/test/support/test_keyword_arguments_component.rb +14 -0
  31. data/test/support/test_mixed_arguments_component.rb +14 -0
  32. data/test/support/test_non_closing_transform.rb +5 -0
  33. data/test/support/test_yielding_transform.rb +1 -1
  34. data/test/test_integration.rb +38 -33
  35. data/test/test_parser.rb +16 -50
  36. data/test/test_run.rb +37 -0
  37. data/test/test_streaming_runner.rb +44 -23
  38. metadata +45 -30
  39. data/.travis.yml +0 -15
  40. data/appveyor.yml +0 -26
  41. data/bin/kiba +0 -5
  42. data/lib/kiba/cli.rb +0 -16
  43. data/lib/kiba/runner.rb +0 -78
  44. data/test/common/runner.rb +0 -137
  45. data/test/fixtures/bogus.etl +0 -2
  46. data/test/fixtures/namespace_conflict.etl +0 -9
  47. data/test/fixtures/some_extension.rb +0 -4
  48. data/test/fixtures/valid.etl +0 -1
  49. data/test/test_cli.rb +0 -21
  50. data/test/test_runner.rb +0 -6
data/lib/kiba/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Kiba
2
- VERSION = '2.0.0'
2
+ VERSION = "4.0.0"
3
3
  end
data/test/helper.rb CHANGED
@@ -1,11 +1,13 @@
1
- require 'minitest/autorun'
2
- require 'minitest/pride'
3
- require 'minitest/focus'
4
- require 'kiba'
1
+ require "minitest/autorun"
2
+ require "minitest/pride"
3
+ require "minitest/focus"
4
+ require "kiba"
5
5
 
6
- class Kiba::Test < Minitest::Test
7
- extend Minitest::Spec::DSL
6
+ if ENV["CI"] == "true"
7
+ puts "Running with MiniTest version #{MiniTest::VERSION}"
8
+ end
8
9
 
10
+ class Kiba::Test < Minitest::Test
9
11
  def remove_files(*files)
10
12
  files.each do |file|
11
13
  File.delete(file) if File.exist?(file)
@@ -13,6 +15,12 @@ class Kiba::Test < Minitest::Test
13
15
  end
14
16
 
15
17
  def fixture(file)
16
- File.join(File.dirname(__FILE__), 'fixtures', file)
18
+ File.join(File.dirname(__FILE__), "fixtures", file)
19
+ end
20
+
21
+ unless method_defined?(:assert_mock)
22
+ def assert_mock(mock)
23
+ mock.verify
24
+ end
17
25
  end
18
26
  end
@@ -0,0 +1,227 @@
1
+ require "minitest/mock"
2
+ require_relative "support/test_enumerable_source"
3
+ require_relative "support/test_destination_returning_nil"
4
+
5
+ module SharedRunnerTests
6
+ def rows
7
+ @rows ||= [
8
+ {identifier: "first-row"},
9
+ {identifier: "second-row"}
10
+ ]
11
+ end
12
+
13
+ def control
14
+ @control ||= begin
15
+ control = Kiba::Control.new
16
+ # this will yield a single row for testing
17
+ control.sources << {
18
+ klass: TestEnumerableSource,
19
+ args: [rows]
20
+ }
21
+ control
22
+ end
23
+ end
24
+
25
+ def test_block_transform_processing
26
+ # is there a better way to assert a block was called in minitest?
27
+ control.transforms << {block: lambda { |r| (@called = true) && (return r) }}
28
+ kiba_run(control)
29
+ assert_equal true, @called
30
+ end
31
+
32
+ def test_dismissed_row_not_passed_to_next_transform
33
+ @called = nil
34
+ control.transforms << {block: lambda { |_| }}
35
+ control.transforms << {block: lambda { |_| (@called = true) && (return nil) }}
36
+ kiba_run(control)
37
+ assert_nil @called
38
+ end
39
+
40
+ def test_post_process_runs_once
41
+ assert_equal 2, rows.size
42
+ @called = 0
43
+ control.post_processes << {block: lambda { @called += 1 }}
44
+ kiba_run(control)
45
+ assert_equal 1, @called
46
+ end
47
+
48
+ def test_post_process_not_called_after_row_failure
49
+ @called = nil
50
+ control.transforms << {block: lambda { |_| fail "FAIL" }}
51
+ control.post_processes << {block: lambda { @called = true }}
52
+ assert_raises(RuntimeError, "FAIL") { kiba_run(control) }
53
+ assert_nil @called
54
+ end
55
+
56
+ def test_pre_process_runs_once
57
+ assert_equal 2, rows.size
58
+ @called = 0
59
+ control.pre_processes << {block: lambda { @called += 1 }}
60
+ kiba_run(control)
61
+ assert_equal 1, @called
62
+ end
63
+
64
+ def test_pre_process_runs_before_source_is_instantiated
65
+ calls = []
66
+
67
+ mock_source_class = MiniTest::Mock.new
68
+ mock_source_class.expect(:new, TestEnumerableSource.new([1, 2, 3])) do
69
+ calls << :source_instantiated
70
+ end
71
+
72
+ control = Kiba::Control.new
73
+ control.pre_processes << {block: lambda { calls << :pre_processor_executed }}
74
+ control.sources << {klass: mock_source_class}
75
+ kiba_run(control)
76
+
77
+ assert_equal [:pre_processor_executed, :source_instantiated], calls
78
+ assert_mock mock_source_class
79
+ end
80
+
81
+ def test_no_error_raised_if_destination_close_not_implemented
82
+ # NOTE: this fake destination does not implement `close`
83
+ destination_instance = MiniTest::Mock.new
84
+
85
+ mock_destination_class = MiniTest::Mock.new
86
+ mock_destination_class.expect(:new, destination_instance)
87
+
88
+ control = Kiba::Control.new
89
+ control.destinations << {klass: mock_destination_class}
90
+ kiba_run(control)
91
+ assert_mock mock_destination_class
92
+ end
93
+
94
+ def test_destination_close_called_if_defined
95
+ destination_instance = MiniTest::Mock.new
96
+ destination_instance.expect(:close, nil)
97
+ mock_destination_class = MiniTest::Mock.new
98
+ mock_destination_class.expect(:new, destination_instance)
99
+
100
+ control = Kiba::Control.new
101
+ control.destinations << {klass: mock_destination_class}
102
+ kiba_run(control)
103
+ assert_mock destination_instance
104
+ assert_mock mock_destination_class
105
+ end
106
+
107
+ def test_use_next_to_exit_early_from_block_transform
108
+ assert_equal 2, rows.size
109
+
110
+ # calling "return row" from a block is forbidden, but you can use "next" instead
111
+ b = lambda do |row|
112
+ if row.fetch(:identifier) == "first-row"
113
+ # demonstrate how to remove a row from the pipeline via next
114
+ next
115
+ else
116
+ # demonstrate how you can reformat via next
117
+ next({new_identifier: row.fetch(:identifier)})
118
+ end
119
+ end
120
+ control.transforms << {block: b}
121
+
122
+ # keep track of the rows
123
+ @remaining_rows = []
124
+ checker = lambda { |row| (@remaining_rows << row) && (return row) }
125
+ control.transforms << {block: checker}
126
+
127
+ kiba_run(control)
128
+
129
+ # the first row should have been removed
130
+ # and the second row should have been reformatted
131
+ assert_equal [{new_identifier: "second-row"}], @remaining_rows
132
+ end
133
+
134
+ def test_destination_returning_nil_does_not_remove_row_from_pipeline
135
+ # safeguard to avoid modification on the support code
136
+ assert_nil TestDestinationReturningNil.new.write("FOOBAR")
137
+
138
+ destinations = []
139
+ control = Kiba.parse do
140
+ source TestEnumerableSource, [{key: "value"}]
141
+ 2.times do
142
+ destination TestDestinationReturningNil, on_init: lambda { |d| destinations << d }
143
+ end
144
+ end
145
+ kiba_run(control)
146
+ 2.times do |i|
147
+ assert_equal [{key: "value"}], destinations[i].instance_variable_get(:@written_rows)
148
+ end
149
+ end
150
+
151
+ def test_nil_transform_error_message
152
+ control = Kiba.parse do
153
+ transform
154
+ end
155
+ assert_raises(RuntimeError, "Nil parameters not allowed here") { kiba_run(control) }
156
+ end
157
+
158
+ def test_ruby_3_source_kwargs
159
+ # NOTE: before Ruby 3 kwargs support, a Ruby warning would
160
+ # be captured here with Ruby 2.7 & ensure we fail,
161
+ # and an error would be raised with Ruby 2.8.0-dev
162
+ # NOTE: only the first warning will be captured, though, but
163
+ # having 3 different tests is still better
164
+ storage = nil
165
+ assert_silent do
166
+ Kiba.run(Kiba.parse do
167
+ source TestKeywordArgumentsComponent,
168
+ mandatory: "first",
169
+ on_init: ->(values) { storage = values }
170
+ end)
171
+ end
172
+ assert_equal({
173
+ mandatory: "first",
174
+ optional: nil
175
+ }, storage)
176
+ end
177
+
178
+ def test_ruby_3_transform_kwargs
179
+ storage = nil
180
+ assert_silent do
181
+ Kiba.run(Kiba.parse do
182
+ transform TestKeywordArgumentsComponent,
183
+ mandatory: "first",
184
+ on_init: ->(values) { storage = values }
185
+ end)
186
+ end
187
+ assert_equal({
188
+ mandatory: "first",
189
+ optional: nil
190
+ }, storage)
191
+ end
192
+
193
+ def test_ruby_3_destination_kwargs
194
+ storage = nil
195
+ assert_silent do
196
+ Kiba.run(Kiba.parse do
197
+ destination TestKeywordArgumentsComponent,
198
+ mandatory: "first",
199
+ on_init: ->(values) { storage = values }
200
+ end)
201
+ end
202
+ assert_equal({
203
+ mandatory: "first",
204
+ optional: nil
205
+ }, storage)
206
+ end
207
+
208
+ def test_positional_plus_keyword_arguments
209
+ storage = nil
210
+ assert_silent do
211
+ Kiba.run(Kiba.parse do
212
+ source TestMixedArgumentsComponent,
213
+ "some positional argument",
214
+ mandatory: "first",
215
+ on_init: ->(values) {
216
+ storage = values
217
+ }
218
+ end)
219
+ end
220
+
221
+ assert_equal({
222
+ some_value: "some positional argument",
223
+ mandatory: "first",
224
+ optional: nil
225
+ }, storage)
226
+ end
227
+ end
@@ -5,6 +5,6 @@ module SharedTests
5
5
  end
6
6
 
7
7
  def shared_tests(desc, *args)
8
- self.class_exec(*args, &@@shared_tests.fetch(desc))
8
+ class_exec(*args, &@@shared_tests.fetch(desc))
9
9
  end
10
10
  end
@@ -0,0 +1,19 @@
1
+ class AggregateTransform
2
+ def initialize(options)
3
+ @aggregate_size = options.fetch(:aggregate_size)
4
+ end
5
+
6
+ def process(row)
7
+ @buffer ||= []
8
+ @buffer << row
9
+ if @buffer.size == @aggregate_size
10
+ yield @buffer
11
+ @buffer = []
12
+ end
13
+ nil
14
+ end
15
+
16
+ def close
17
+ yield @buffer unless @buffer.empty?
18
+ end
19
+ end
@@ -2,8 +2,8 @@ class TestArrayDestination
2
2
  def initialize(array)
3
3
  @array = array
4
4
  end
5
-
5
+
6
6
  def write(row)
7
7
  @array << row
8
8
  end
9
- end
9
+ end
@@ -0,0 +1,11 @@
1
+ class CloseYieldingTransform
2
+ def initialize(options)
3
+ @yield_on_close = options.fetch(:yield_on_close)
4
+ end
5
+
6
+ def close
7
+ @yield_on_close.each do |item|
8
+ yield item
9
+ end
10
+ end
11
+ end
@@ -1,9 +1,9 @@
1
- require 'csv'
1
+ require "csv"
2
2
 
3
3
  # simple destination, not checking that each row has all the fields
4
4
  class TestCsvDestination
5
5
  def initialize(output_file)
6
- @csv = CSV.open(output_file, 'w')
6
+ @csv = CSV.open(output_file, "w")
7
7
  @headers_written = false
8
8
  end
9
9
 
@@ -1,4 +1,4 @@
1
- require 'csv'
1
+ require "csv"
2
2
 
3
3
  class TestCsvSource
4
4
  def initialize(input_file)
@@ -0,0 +1,12 @@
1
+ class TestDestinationReturningNil
2
+ def initialize(options = {})
3
+ on_init = options[:on_init]
4
+ # A little trick to allow outer references to this instance
5
+ on_init&.call(self)
6
+ end
7
+
8
+ def write(row)
9
+ (@written_rows ||= []) << row
10
+ nil
11
+ end
12
+ end
@@ -6,4 +6,4 @@ class TestDuplicateRowTranform
6
6
  end
7
7
  nil
8
8
  end
9
- end
9
+ end
@@ -0,0 +1,14 @@
1
+ # a mock component to test Ruby 3 keyword argument support
2
+ class TestKeywordArgumentsComponent
3
+ def initialize(mandatory:, optional: nil, on_init: nil)
4
+ values = {
5
+ mandatory: mandatory,
6
+ optional: optional
7
+ }
8
+ on_init&.call(values)
9
+ end
10
+
11
+ def each
12
+ # no-op
13
+ end
14
+ end
@@ -0,0 +1,14 @@
1
+ # a mock component to test Ruby 3 keyword argument support
2
+ class TestMixedArgumentsComponent
3
+ def initialize(some_value, mandatory:, on_init:, optional: nil)
4
+ @values = {}
5
+ @values[:some_value] = some_value
6
+ @values[:mandatory] = mandatory
7
+ @values[:optional] = optional
8
+ on_init&.call(@values)
9
+ end
10
+
11
+ def each
12
+ # no-op
13
+ end
14
+ end
@@ -0,0 +1,5 @@
1
+ class NonClosingTransform
2
+ def process(row)
3
+ row
4
+ end
5
+ end
@@ -5,4 +5,4 @@ class TestYieldingTransform
5
5
  end
6
6
  {item: "classic-return-value"}
7
7
  end
8
- end
8
+ end
@@ -1,28 +1,33 @@
1
- require_relative 'helper'
1
+ require_relative "helper"
2
2
 
3
- require_relative 'support/test_csv_source'
4
- require_relative 'support/test_csv_destination'
5
- require_relative 'support/test_rename_field_transform'
6
- require_relative 'support/test_enumerable_source'
7
- require_relative 'support/test_source_that_reads_at_instantiation_time'
3
+ require_relative "support/test_csv_source"
4
+ require_relative "support/test_csv_destination"
5
+ require_relative "support/test_rename_field_transform"
6
+ require_relative "support/test_enumerable_source"
7
+ require_relative "support/test_source_that_reads_at_instantiation_time"
8
8
 
9
9
  # End-to-end tests go here
10
10
  class TestIntegration < Kiba::Test
11
- let(:output_file) { 'test/tmp/output.csv' }
12
- let(:input_file) { 'test/tmp/input.csv' }
13
-
14
- let(:sample_csv_data) do
15
- <<CSV
16
- first_name,last_name,sex
17
- John,Doe,M
18
- Mary,Johnson,F
19
- Cindy,Backgammon,F
20
- Patrick,McWire,M
21
- CSV
11
+ def output_file
12
+ "test/tmp/output.csv"
13
+ end
14
+
15
+ def input_file
16
+ "test/tmp/input.csv"
17
+ end
18
+
19
+ def sample_csv_data
20
+ <<~CSV
21
+ first_name,last_name,sex
22
+ John,Doe,M
23
+ Mary,Johnson,F
24
+ Cindy,Backgammon,F
25
+ Patrick,McWire,M
26
+ CSV
22
27
  end
23
28
 
24
29
  def clean
25
- remove_files(*Dir['test/tmp/*.csv'])
30
+ remove_files(*Dir["test/tmp/*.csv"])
26
31
  end
27
32
 
28
33
  def setup
@@ -37,36 +42,36 @@ CSV
37
42
  def test_csv_to_csv
38
43
  # parse the ETL script (this won't run it)
39
44
  control = Kiba.parse do
40
- source TestCsvSource, 'test/tmp/input.csv'
45
+ source TestCsvSource, "test/tmp/input.csv"
41
46
 
42
47
  transform do |row|
43
48
  row[:sex] = case row[:sex]
44
- when 'M' then 'Male'
45
- when 'F' then 'Female'
46
- else 'Unknown'
49
+ when "M" then "Male"
50
+ when "F" then "Female"
51
+ else "Unknown"
47
52
  end
48
53
  row # must be returned
49
54
  end
50
55
 
51
56
  # returning nil dismisses the row
52
57
  transform do |row|
53
- row[:sex] == 'Female' ? row : nil
58
+ row[:sex] == "Female" ? row : nil
54
59
  end
55
60
 
56
61
  transform TestRenameFieldTransform, :sex, :sex_2015
57
62
 
58
- destination TestCsvDestination, 'test/tmp/output.csv'
63
+ destination TestCsvDestination, "test/tmp/output.csv"
59
64
  end
60
65
 
61
66
  # run the parsed ETL script
62
67
  Kiba.run(control)
63
68
 
64
69
  # verify the output
65
- assert_equal <<CSV, IO.read(output_file)
66
- first_name,last_name,sex_2015
67
- Mary,Johnson,Female
68
- Cindy,Backgammon,Female
69
- CSV
70
+ assert_equal <<~CSV, IO.read(output_file)
71
+ first_name,last_name,sex_2015
72
+ Mary,Johnson,Female
73
+ Cindy,Backgammon,Female
74
+ CSV
70
75
  end
71
76
 
72
77
  def test_variable_access
@@ -97,18 +102,18 @@ CSV
97
102
 
98
103
  Kiba.run(control)
99
104
 
100
- assert_equal 'Count is now 103', message
105
+ assert_equal "Count is now 103", message
101
106
  end
102
107
 
103
108
  def test_file_created_by_pre_process_can_be_read_by_source_at_instantiation_time
104
- remove_files('test/tmp/eager.csv')
109
+ remove_files("test/tmp/eager.csv")
105
110
 
106
111
  control = Kiba.parse do
107
112
  pre_process do
108
- IO.write('test/tmp/eager.csv', 'something')
113
+ IO.write("test/tmp/eager.csv", "something")
109
114
  end
110
115
 
111
- source SourceThatReadsAtInstantionTime, 'test/tmp/eager.csv'
116
+ source SourceThatReadsAtInstantionTime, "test/tmp/eager.csv"
112
117
  end
113
118
 
114
119
  Kiba.run(control)