kiba 2.0.0 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +5 -5
  2. data/.github/FUNDING.yml +1 -0
  3. data/.github/workflows/ci.yml +41 -0
  4. data/COMM-LICENSE.md +348 -0
  5. data/Changes.md +38 -2
  6. data/Gemfile +1 -1
  7. data/ISSUE_TEMPLATE.md +7 -0
  8. data/LICENSE +3 -1
  9. data/Pro-Changes.md +82 -5
  10. data/README.md +12 -65
  11. data/Rakefile +8 -3
  12. data/kiba.gemspec +20 -17
  13. data/lib/kiba.rb +14 -11
  14. data/lib/kiba/context.rb +9 -5
  15. data/lib/kiba/control.rb +1 -1
  16. data/lib/kiba/dsl_extensions/config.rb +1 -1
  17. data/lib/kiba/parser.rb +6 -22
  18. data/lib/kiba/streaming_runner.rb +62 -5
  19. data/lib/kiba/version.rb +1 -1
  20. data/test/helper.rb +15 -7
  21. data/test/shared_runner_tests.rb +227 -0
  22. data/test/support/shared_tests.rb +1 -1
  23. data/test/support/test_aggregate_transform.rb +19 -0
  24. data/test/support/test_array_destination.rb +2 -2
  25. data/test/support/test_close_yielding_transform.rb +11 -0
  26. data/test/support/test_csv_destination.rb +2 -2
  27. data/test/support/test_csv_source.rb +1 -1
  28. data/test/support/test_destination_returning_nil.rb +12 -0
  29. data/test/support/test_duplicate_row_transform.rb +1 -1
  30. data/test/support/test_keyword_arguments_component.rb +14 -0
  31. data/test/support/test_mixed_arguments_component.rb +14 -0
  32. data/test/support/test_non_closing_transform.rb +5 -0
  33. data/test/support/test_yielding_transform.rb +1 -1
  34. data/test/test_integration.rb +38 -33
  35. data/test/test_parser.rb +16 -50
  36. data/test/test_run.rb +37 -0
  37. data/test/test_streaming_runner.rb +44 -23
  38. metadata +45 -30
  39. data/.travis.yml +0 -15
  40. data/appveyor.yml +0 -26
  41. data/bin/kiba +0 -5
  42. data/lib/kiba/cli.rb +0 -16
  43. data/lib/kiba/runner.rb +0 -78
  44. data/test/common/runner.rb +0 -137
  45. data/test/fixtures/bogus.etl +0 -2
  46. data/test/fixtures/namespace_conflict.etl +0 -9
  47. data/test/fixtures/some_extension.rb +0 -4
  48. data/test/fixtures/valid.etl +0 -1
  49. data/test/test_cli.rb +0 -21
  50. data/test/test_runner.rb +0 -6
data/lib/kiba/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Kiba
2
- VERSION = '2.0.0'
2
+ VERSION = "4.0.0"
3
3
  end
data/test/helper.rb CHANGED
@@ -1,11 +1,13 @@
1
- require 'minitest/autorun'
2
- require 'minitest/pride'
3
- require 'minitest/focus'
4
- require 'kiba'
1
+ require "minitest/autorun"
2
+ require "minitest/pride"
3
+ require "minitest/focus"
4
+ require "kiba"
5
5
 
6
- class Kiba::Test < Minitest::Test
7
- extend Minitest::Spec::DSL
6
+ if ENV["CI"] == "true"
7
+ puts "Running with MiniTest version #{MiniTest::VERSION}"
8
+ end
8
9
 
10
+ class Kiba::Test < Minitest::Test
9
11
  def remove_files(*files)
10
12
  files.each do |file|
11
13
  File.delete(file) if File.exist?(file)
@@ -13,6 +15,12 @@ class Kiba::Test < Minitest::Test
13
15
  end
14
16
 
15
17
  def fixture(file)
16
- File.join(File.dirname(__FILE__), 'fixtures', file)
18
+ File.join(File.dirname(__FILE__), "fixtures", file)
19
+ end
20
+
21
+ unless method_defined?(:assert_mock)
22
+ def assert_mock(mock)
23
+ mock.verify
24
+ end
17
25
  end
18
26
  end
@@ -0,0 +1,227 @@
1
+ require "minitest/mock"
2
+ require_relative "support/test_enumerable_source"
3
+ require_relative "support/test_destination_returning_nil"
4
+
5
+ module SharedRunnerTests
6
+ def rows
7
+ @rows ||= [
8
+ {identifier: "first-row"},
9
+ {identifier: "second-row"}
10
+ ]
11
+ end
12
+
13
+ def control
14
+ @control ||= begin
15
+ control = Kiba::Control.new
16
+ # this will yield a single row for testing
17
+ control.sources << {
18
+ klass: TestEnumerableSource,
19
+ args: [rows]
20
+ }
21
+ control
22
+ end
23
+ end
24
+
25
+ def test_block_transform_processing
26
+ # is there a better way to assert a block was called in minitest?
27
+ control.transforms << {block: lambda { |r| (@called = true) && (return r) }}
28
+ kiba_run(control)
29
+ assert_equal true, @called
30
+ end
31
+
32
+ def test_dismissed_row_not_passed_to_next_transform
33
+ @called = nil
34
+ control.transforms << {block: lambda { |_| }}
35
+ control.transforms << {block: lambda { |_| (@called = true) && (return nil) }}
36
+ kiba_run(control)
37
+ assert_nil @called
38
+ end
39
+
40
+ def test_post_process_runs_once
41
+ assert_equal 2, rows.size
42
+ @called = 0
43
+ control.post_processes << {block: lambda { @called += 1 }}
44
+ kiba_run(control)
45
+ assert_equal 1, @called
46
+ end
47
+
48
+ def test_post_process_not_called_after_row_failure
49
+ @called = nil
50
+ control.transforms << {block: lambda { |_| fail "FAIL" }}
51
+ control.post_processes << {block: lambda { @called = true }}
52
+ assert_raises(RuntimeError, "FAIL") { kiba_run(control) }
53
+ assert_nil @called
54
+ end
55
+
56
+ def test_pre_process_runs_once
57
+ assert_equal 2, rows.size
58
+ @called = 0
59
+ control.pre_processes << {block: lambda { @called += 1 }}
60
+ kiba_run(control)
61
+ assert_equal 1, @called
62
+ end
63
+
64
+ def test_pre_process_runs_before_source_is_instantiated
65
+ calls = []
66
+
67
+ mock_source_class = MiniTest::Mock.new
68
+ mock_source_class.expect(:new, TestEnumerableSource.new([1, 2, 3])) do
69
+ calls << :source_instantiated
70
+ end
71
+
72
+ control = Kiba::Control.new
73
+ control.pre_processes << {block: lambda { calls << :pre_processor_executed }}
74
+ control.sources << {klass: mock_source_class}
75
+ kiba_run(control)
76
+
77
+ assert_equal [:pre_processor_executed, :source_instantiated], calls
78
+ assert_mock mock_source_class
79
+ end
80
+
81
+ def test_no_error_raised_if_destination_close_not_implemented
82
+ # NOTE: this fake destination does not implement `close`
83
+ destination_instance = MiniTest::Mock.new
84
+
85
+ mock_destination_class = MiniTest::Mock.new
86
+ mock_destination_class.expect(:new, destination_instance)
87
+
88
+ control = Kiba::Control.new
89
+ control.destinations << {klass: mock_destination_class}
90
+ kiba_run(control)
91
+ assert_mock mock_destination_class
92
+ end
93
+
94
+ def test_destination_close_called_if_defined
95
+ destination_instance = MiniTest::Mock.new
96
+ destination_instance.expect(:close, nil)
97
+ mock_destination_class = MiniTest::Mock.new
98
+ mock_destination_class.expect(:new, destination_instance)
99
+
100
+ control = Kiba::Control.new
101
+ control.destinations << {klass: mock_destination_class}
102
+ kiba_run(control)
103
+ assert_mock destination_instance
104
+ assert_mock mock_destination_class
105
+ end
106
+
107
+ def test_use_next_to_exit_early_from_block_transform
108
+ assert_equal 2, rows.size
109
+
110
+ # calling "return row" from a block is forbidden, but you can use "next" instead
111
+ b = lambda do |row|
112
+ if row.fetch(:identifier) == "first-row"
113
+ # demonstrate how to remove a row from the pipeline via next
114
+ next
115
+ else
116
+ # demonstrate how you can reformat via next
117
+ next({new_identifier: row.fetch(:identifier)})
118
+ end
119
+ end
120
+ control.transforms << {block: b}
121
+
122
+ # keep track of the rows
123
+ @remaining_rows = []
124
+ checker = lambda { |row| (@remaining_rows << row) && (return row) }
125
+ control.transforms << {block: checker}
126
+
127
+ kiba_run(control)
128
+
129
+ # the first row should have been removed
130
+ # and the second row should have been reformatted
131
+ assert_equal [{new_identifier: "second-row"}], @remaining_rows
132
+ end
133
+
134
+ def test_destination_returning_nil_does_not_remove_row_from_pipeline
135
+ # safeguard to avoid modification on the support code
136
+ assert_nil TestDestinationReturningNil.new.write("FOOBAR")
137
+
138
+ destinations = []
139
+ control = Kiba.parse do
140
+ source TestEnumerableSource, [{key: "value"}]
141
+ 2.times do
142
+ destination TestDestinationReturningNil, on_init: lambda { |d| destinations << d }
143
+ end
144
+ end
145
+ kiba_run(control)
146
+ 2.times do |i|
147
+ assert_equal [{key: "value"}], destinations[i].instance_variable_get(:@written_rows)
148
+ end
149
+ end
150
+
151
+ def test_nil_transform_error_message
152
+ control = Kiba.parse do
153
+ transform
154
+ end
155
+ assert_raises(RuntimeError, "Nil parameters not allowed here") { kiba_run(control) }
156
+ end
157
+
158
+ def test_ruby_3_source_kwargs
159
+ # NOTE: before Ruby 3 kwargs support, a Ruby warning would
160
+ # be captured here with Ruby 2.7 & ensure we fail,
161
+ # and an error would be raised with Ruby 2.8.0-dev
162
+ # NOTE: only the first warning will be captured, though, but
163
+ # having 3 different tests is still better
164
+ storage = nil
165
+ assert_silent do
166
+ Kiba.run(Kiba.parse do
167
+ source TestKeywordArgumentsComponent,
168
+ mandatory: "first",
169
+ on_init: ->(values) { storage = values }
170
+ end)
171
+ end
172
+ assert_equal({
173
+ mandatory: "first",
174
+ optional: nil
175
+ }, storage)
176
+ end
177
+
178
+ def test_ruby_3_transform_kwargs
179
+ storage = nil
180
+ assert_silent do
181
+ Kiba.run(Kiba.parse do
182
+ transform TestKeywordArgumentsComponent,
183
+ mandatory: "first",
184
+ on_init: ->(values) { storage = values }
185
+ end)
186
+ end
187
+ assert_equal({
188
+ mandatory: "first",
189
+ optional: nil
190
+ }, storage)
191
+ end
192
+
193
+ def test_ruby_3_destination_kwargs
194
+ storage = nil
195
+ assert_silent do
196
+ Kiba.run(Kiba.parse do
197
+ destination TestKeywordArgumentsComponent,
198
+ mandatory: "first",
199
+ on_init: ->(values) { storage = values }
200
+ end)
201
+ end
202
+ assert_equal({
203
+ mandatory: "first",
204
+ optional: nil
205
+ }, storage)
206
+ end
207
+
208
+ def test_positional_plus_keyword_arguments
209
+ storage = nil
210
+ assert_silent do
211
+ Kiba.run(Kiba.parse do
212
+ source TestMixedArgumentsComponent,
213
+ "some positional argument",
214
+ mandatory: "first",
215
+ on_init: ->(values) {
216
+ storage = values
217
+ }
218
+ end)
219
+ end
220
+
221
+ assert_equal({
222
+ some_value: "some positional argument",
223
+ mandatory: "first",
224
+ optional: nil
225
+ }, storage)
226
+ end
227
+ end
@@ -5,6 +5,6 @@ module SharedTests
5
5
  end
6
6
 
7
7
  def shared_tests(desc, *args)
8
- self.class_exec(*args, &@@shared_tests.fetch(desc))
8
+ class_exec(*args, &@@shared_tests.fetch(desc))
9
9
  end
10
10
  end
@@ -0,0 +1,19 @@
1
+ class AggregateTransform
2
+ def initialize(options)
3
+ @aggregate_size = options.fetch(:aggregate_size)
4
+ end
5
+
6
+ def process(row)
7
+ @buffer ||= []
8
+ @buffer << row
9
+ if @buffer.size == @aggregate_size
10
+ yield @buffer
11
+ @buffer = []
12
+ end
13
+ nil
14
+ end
15
+
16
+ def close
17
+ yield @buffer unless @buffer.empty?
18
+ end
19
+ end
@@ -2,8 +2,8 @@ class TestArrayDestination
2
2
  def initialize(array)
3
3
  @array = array
4
4
  end
5
-
5
+
6
6
  def write(row)
7
7
  @array << row
8
8
  end
9
- end
9
+ end
@@ -0,0 +1,11 @@
1
+ class CloseYieldingTransform
2
+ def initialize(options)
3
+ @yield_on_close = options.fetch(:yield_on_close)
4
+ end
5
+
6
+ def close
7
+ @yield_on_close.each do |item|
8
+ yield item
9
+ end
10
+ end
11
+ end
@@ -1,9 +1,9 @@
1
- require 'csv'
1
+ require "csv"
2
2
 
3
3
  # simple destination, not checking that each row has all the fields
4
4
  class TestCsvDestination
5
5
  def initialize(output_file)
6
- @csv = CSV.open(output_file, 'w')
6
+ @csv = CSV.open(output_file, "w")
7
7
  @headers_written = false
8
8
  end
9
9
 
@@ -1,4 +1,4 @@
1
- require 'csv'
1
+ require "csv"
2
2
 
3
3
  class TestCsvSource
4
4
  def initialize(input_file)
@@ -0,0 +1,12 @@
1
+ class TestDestinationReturningNil
2
+ def initialize(options = {})
3
+ on_init = options[:on_init]
4
+ # A little trick to allow outer references to this instance
5
+ on_init&.call(self)
6
+ end
7
+
8
+ def write(row)
9
+ (@written_rows ||= []) << row
10
+ nil
11
+ end
12
+ end
@@ -6,4 +6,4 @@ class TestDuplicateRowTranform
6
6
  end
7
7
  nil
8
8
  end
9
- end
9
+ end
@@ -0,0 +1,14 @@
1
+ # a mock component to test Ruby 3 keyword argument support
2
+ class TestKeywordArgumentsComponent
3
+ def initialize(mandatory:, optional: nil, on_init: nil)
4
+ values = {
5
+ mandatory: mandatory,
6
+ optional: optional
7
+ }
8
+ on_init&.call(values)
9
+ end
10
+
11
+ def each
12
+ # no-op
13
+ end
14
+ end
@@ -0,0 +1,14 @@
1
+ # a mock component to test Ruby 3 keyword argument support
2
+ class TestMixedArgumentsComponent
3
+ def initialize(some_value, mandatory:, on_init:, optional: nil)
4
+ @values = {}
5
+ @values[:some_value] = some_value
6
+ @values[:mandatory] = mandatory
7
+ @values[:optional] = optional
8
+ on_init&.call(@values)
9
+ end
10
+
11
+ def each
12
+ # no-op
13
+ end
14
+ end
@@ -0,0 +1,5 @@
1
+ class NonClosingTransform
2
+ def process(row)
3
+ row
4
+ end
5
+ end
@@ -5,4 +5,4 @@ class TestYieldingTransform
5
5
  end
6
6
  {item: "classic-return-value"}
7
7
  end
8
- end
8
+ end
@@ -1,28 +1,33 @@
1
- require_relative 'helper'
1
+ require_relative "helper"
2
2
 
3
- require_relative 'support/test_csv_source'
4
- require_relative 'support/test_csv_destination'
5
- require_relative 'support/test_rename_field_transform'
6
- require_relative 'support/test_enumerable_source'
7
- require_relative 'support/test_source_that_reads_at_instantiation_time'
3
+ require_relative "support/test_csv_source"
4
+ require_relative "support/test_csv_destination"
5
+ require_relative "support/test_rename_field_transform"
6
+ require_relative "support/test_enumerable_source"
7
+ require_relative "support/test_source_that_reads_at_instantiation_time"
8
8
 
9
9
  # End-to-end tests go here
10
10
  class TestIntegration < Kiba::Test
11
- let(:output_file) { 'test/tmp/output.csv' }
12
- let(:input_file) { 'test/tmp/input.csv' }
13
-
14
- let(:sample_csv_data) do
15
- <<CSV
16
- first_name,last_name,sex
17
- John,Doe,M
18
- Mary,Johnson,F
19
- Cindy,Backgammon,F
20
- Patrick,McWire,M
21
- CSV
11
+ def output_file
12
+ "test/tmp/output.csv"
13
+ end
14
+
15
+ def input_file
16
+ "test/tmp/input.csv"
17
+ end
18
+
19
+ def sample_csv_data
20
+ <<~CSV
21
+ first_name,last_name,sex
22
+ John,Doe,M
23
+ Mary,Johnson,F
24
+ Cindy,Backgammon,F
25
+ Patrick,McWire,M
26
+ CSV
22
27
  end
23
28
 
24
29
  def clean
25
- remove_files(*Dir['test/tmp/*.csv'])
30
+ remove_files(*Dir["test/tmp/*.csv"])
26
31
  end
27
32
 
28
33
  def setup
@@ -37,36 +42,36 @@ CSV
37
42
  def test_csv_to_csv
38
43
  # parse the ETL script (this won't run it)
39
44
  control = Kiba.parse do
40
- source TestCsvSource, 'test/tmp/input.csv'
45
+ source TestCsvSource, "test/tmp/input.csv"
41
46
 
42
47
  transform do |row|
43
48
  row[:sex] = case row[:sex]
44
- when 'M' then 'Male'
45
- when 'F' then 'Female'
46
- else 'Unknown'
49
+ when "M" then "Male"
50
+ when "F" then "Female"
51
+ else "Unknown"
47
52
  end
48
53
  row # must be returned
49
54
  end
50
55
 
51
56
  # returning nil dismisses the row
52
57
  transform do |row|
53
- row[:sex] == 'Female' ? row : nil
58
+ row[:sex] == "Female" ? row : nil
54
59
  end
55
60
 
56
61
  transform TestRenameFieldTransform, :sex, :sex_2015
57
62
 
58
- destination TestCsvDestination, 'test/tmp/output.csv'
63
+ destination TestCsvDestination, "test/tmp/output.csv"
59
64
  end
60
65
 
61
66
  # run the parsed ETL script
62
67
  Kiba.run(control)
63
68
 
64
69
  # verify the output
65
- assert_equal <<CSV, IO.read(output_file)
66
- first_name,last_name,sex_2015
67
- Mary,Johnson,Female
68
- Cindy,Backgammon,Female
69
- CSV
70
+ assert_equal <<~CSV, IO.read(output_file)
71
+ first_name,last_name,sex_2015
72
+ Mary,Johnson,Female
73
+ Cindy,Backgammon,Female
74
+ CSV
70
75
  end
71
76
 
72
77
  def test_variable_access
@@ -97,18 +102,18 @@ CSV
97
102
 
98
103
  Kiba.run(control)
99
104
 
100
- assert_equal 'Count is now 103', message
105
+ assert_equal "Count is now 103", message
101
106
  end
102
107
 
103
108
  def test_file_created_by_pre_process_can_be_read_by_source_at_instantiation_time
104
- remove_files('test/tmp/eager.csv')
109
+ remove_files("test/tmp/eager.csv")
105
110
 
106
111
  control = Kiba.parse do
107
112
  pre_process do
108
- IO.write('test/tmp/eager.csv', 'something')
113
+ IO.write("test/tmp/eager.csv", "something")
109
114
  end
110
115
 
111
- source SourceThatReadsAtInstantionTime, 'test/tmp/eager.csv'
116
+ source SourceThatReadsAtInstantionTime, "test/tmp/eager.csv"
112
117
  end
113
118
 
114
119
  Kiba.run(control)