kiba 1.0.0 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +5 -5
  2. data/.github/FUNDING.yml +1 -0
  3. data/.travis.yml +11 -9
  4. data/COMM-LICENSE.md +348 -0
  5. data/Changes.md +28 -0
  6. data/ISSUE_TEMPLATE.md +7 -0
  7. data/LICENSE +7 -0
  8. data/Pro-Changes.md +108 -0
  9. data/README.md +15 -282
  10. data/Rakefile +6 -1
  11. data/appveyor.yml +19 -9
  12. data/bin/kiba +12 -2
  13. data/kiba.gemspec +6 -1
  14. data/lib/kiba.rb +10 -1
  15. data/lib/kiba/context.rb +4 -0
  16. data/lib/kiba/control.rb +4 -0
  17. data/lib/kiba/dsl_extensions/config.rb +9 -0
  18. data/lib/kiba/parser.rb +4 -9
  19. data/lib/kiba/runner.rb +14 -5
  20. data/lib/kiba/streaming_runner.rb +38 -0
  21. data/lib/kiba/version.rb +1 -1
  22. data/test/helper.rb +11 -2
  23. data/test/shared_runner_tests.rb +228 -0
  24. data/test/support/shared_tests.rb +10 -0
  25. data/test/support/test_aggregate_transform.rb +19 -0
  26. data/test/support/test_array_destination.rb +9 -0
  27. data/test/support/test_close_yielding_transform.rb +11 -0
  28. data/test/support/test_destination_returning_nil.rb +12 -0
  29. data/test/support/test_duplicate_row_transform.rb +9 -0
  30. data/test/support/test_keyword_arguments_component.rb +14 -0
  31. data/test/support/test_mixed_arguments_component.rb +14 -0
  32. data/test/support/test_non_closing_transform.rb +5 -0
  33. data/test/support/test_yielding_transform.rb +8 -0
  34. data/test/test_integration.rb +3 -3
  35. data/test/test_parser.rb +34 -29
  36. data/test/test_run.rb +12 -0
  37. data/test/test_runner.rb +5 -81
  38. data/test/test_streaming_runner.rb +70 -0
  39. metadata +57 -16
  40. data/lib/kiba/cli.rb +0 -16
  41. data/test/fixtures/bogus.etl +0 -2
  42. data/test/fixtures/valid.etl +0 -1
  43. data/test/test_cli.rb +0 -17
@@ -1,14 +1,9 @@
1
1
  module Kiba
2
2
  module Parser
3
- def parse(source_as_string = nil, source_file = nil, &source_as_block)
4
- control = Control.new
5
- context = Context.new(control)
6
- if source_as_string
7
- # this somewhat weird construct allows to remove a nil source_file
8
- context.instance_eval(*[source_as_string, source_file].compact)
9
- else
10
- context.instance_eval(&source_as_block)
11
- end
3
+ def parse(&source_as_block)
4
+ control = Kiba::Control.new
5
+ context = Kiba::Context.new(control)
6
+ context.instance_eval(&source_as_block)
12
7
  control
13
8
  end
14
9
  end
@@ -1,5 +1,7 @@
1
1
  module Kiba
2
2
  module Runner
3
+ extend self
4
+
3
5
  # allow to handle a block form just like a regular transform
4
6
  class AliasingProc < Proc
5
7
  alias_method :process, :call
@@ -13,8 +15,9 @@ module Kiba
13
15
  process_rows(
14
16
  to_instances(control.sources),
15
17
  to_instances(control.transforms, true),
16
- to_instances(control.destinations)
18
+ destinations = to_instances(control.destinations)
17
19
  )
20
+ close_destinations(destinations)
18
21
  # TODO: when I add post processes as class, I'll have to add a test to
19
22
  # make sure instantiation occurs after the main processing is done (#16)
20
23
  run_post_processes(control)
@@ -28,6 +31,12 @@ module Kiba
28
31
  to_instances(control.post_processes, true, false).each(&:call)
29
32
  end
30
33
 
34
+ def close_destinations(destinations)
35
+ destinations
36
+ .find_all { |d| d.respond_to?(:close) }
37
+ .each(&:close)
38
+ end
39
+
31
40
  def process_rows(sources, transforms, destinations)
32
41
  sources.each do |source|
33
42
  source.each do |row|
@@ -41,7 +50,6 @@ module Kiba
41
50
  end
42
51
  end
43
52
  end
44
- destinations.find_all { |d| d.respond_to?(:close) }.each(&:close)
45
53
  end
46
54
 
47
55
  # not using keyword args because JRuby defaults to 1.9 syntax currently
@@ -55,15 +63,16 @@ module Kiba
55
63
  end
56
64
 
57
65
  def to_instance(klass, args, block, allow_block, allow_class)
58
- if klass
66
+ if klass && block
67
+ fail 'Class and block form cannot be used together at the moment'
68
+ elsif klass
59
69
  fail 'Class form is not allowed here' unless allow_class
60
70
  klass.new(*args)
61
71
  elsif block
62
72
  fail 'Block form is not allowed here' unless allow_block
63
73
  AliasingProc.new(&block)
64
74
  else
65
- # TODO: support block passing to a class form definition?
66
- fail 'Class and block form cannot be used together at the moment'
75
+ fail 'Nil parameters not allowed here'
67
76
  end
68
77
  end
69
78
  end
@@ -0,0 +1,38 @@
1
+ module Kiba
2
+ module StreamingRunner
3
+ include Runner
4
+ extend self
5
+
6
+ def transform_stream(stream, t)
7
+ Enumerator.new do |y|
8
+ stream.each do |input_row|
9
+ returned_row = t.process(input_row) do |yielded_row|
10
+ y << yielded_row
11
+ end
12
+ y << returned_row if returned_row
13
+ end
14
+ if t.respond_to?(:close)
15
+ t.close do |close_row|
16
+ y << close_row
17
+ end
18
+ end
19
+ end
20
+ end
21
+
22
+ def source_stream(sources)
23
+ Enumerator.new do |y|
24
+ sources.each do |source|
25
+ source.each { |r| y << r }
26
+ end
27
+ end
28
+ end
29
+
30
+ def process_rows(sources, transforms, destinations)
31
+ stream = source_stream(sources)
32
+ recurser = lambda { |s,t| transform_stream(s, t) }
33
+ transforms.inject(stream, &recurser).each do |r|
34
+ destinations.each { |d| d.write(r) }
35
+ end
36
+ end
37
+ end
38
+ end
@@ -1,3 +1,3 @@
1
1
  module Kiba
2
- VERSION = '1.0.0'
2
+ VERSION = '3.5.0'
3
3
  end
@@ -1,10 +1,13 @@
1
1
  require 'minitest/autorun'
2
2
  require 'minitest/pride'
3
+ require 'minitest/focus'
3
4
  require 'kiba'
4
5
 
5
- class Kiba::Test < Minitest::Test
6
- extend Minitest::Spec::DSL
6
+ if ENV['CI'] == 'true'
7
+ puts "Running with MiniTest version #{MiniTest::VERSION}"
8
+ end
7
9
 
10
+ class Kiba::Test < Minitest::Test
8
11
  def remove_files(*files)
9
12
  files.each do |file|
10
13
  File.delete(file) if File.exist?(file)
@@ -14,4 +17,10 @@ class Kiba::Test < Minitest::Test
14
17
  def fixture(file)
15
18
  File.join(File.dirname(__FILE__), 'fixtures', file)
16
19
  end
20
+
21
+ unless self.method_defined?(:assert_mock)
22
+ def assert_mock(mock)
23
+ mock.verify
24
+ end
25
+ end
17
26
  end
@@ -0,0 +1,228 @@
1
+ require 'minitest/mock'
2
+ require_relative 'support/test_enumerable_source'
3
+ require_relative 'support/test_destination_returning_nil'
4
+
5
+ module SharedRunnerTests
6
+ def rows
7
+ @rows ||= [
8
+ { identifier: 'first-row' },
9
+ { identifier: 'second-row' }
10
+ ]
11
+ end
12
+
13
+ def control
14
+ @control ||= begin
15
+ control = Kiba::Control.new
16
+ # this will yield a single row for testing
17
+ control.sources << {
18
+ klass: TestEnumerableSource,
19
+ args: [rows]
20
+ }
21
+ control
22
+ end
23
+ end
24
+
25
+ def test_block_transform_processing
26
+ # is there a better way to assert a block was called in minitest?
27
+ control.transforms << { block: lambda { |r| @called = true; r } }
28
+ kiba_run(control)
29
+ assert_equal true, @called
30
+ end
31
+
32
+ def test_dismissed_row_not_passed_to_next_transform
33
+ @called = nil
34
+ control.transforms << { block: lambda { |_| nil } }
35
+ control.transforms << { block: lambda { |_| @called = true; nil } }
36
+ kiba_run(control)
37
+ assert_nil @called
38
+ end
39
+
40
+ def test_post_process_runs_once
41
+ assert_equal 2, rows.size
42
+ @called = 0
43
+ control.post_processes << { block: lambda { @called += 1 } }
44
+ kiba_run(control)
45
+ assert_equal 1, @called
46
+ end
47
+
48
+ def test_post_process_not_called_after_row_failure
49
+ @called = nil
50
+ control.transforms << { block: lambda { |_| fail 'FAIL' } }
51
+ control.post_processes << { block: lambda { @called = true } }
52
+ assert_raises(RuntimeError, 'FAIL') { kiba_run(control) }
53
+ assert_nil @called
54
+ end
55
+
56
+ def test_pre_process_runs_once
57
+ assert_equal 2, rows.size
58
+ @called = 0
59
+ control.pre_processes << { block: lambda { @called += 1 } }
60
+ kiba_run(control)
61
+ assert_equal 1, @called
62
+ end
63
+
64
+ def test_pre_process_runs_before_source_is_instantiated
65
+ calls = []
66
+
67
+ mock_source_class = MiniTest::Mock.new
68
+ mock_source_class.expect(:new, TestEnumerableSource.new([1, 2, 3])) do
69
+ calls << :source_instantiated
70
+ end
71
+
72
+ control = Kiba::Control.new
73
+ control.pre_processes << { block: lambda { calls << :pre_processor_executed } }
74
+ control.sources << { klass: mock_source_class }
75
+ kiba_run(control)
76
+
77
+ assert_equal [:pre_processor_executed, :source_instantiated], calls
78
+ assert_mock mock_source_class
79
+ end
80
+
81
+ def test_no_error_raised_if_destination_close_not_implemented
82
+ # NOTE: this fake destination does not implement `close`
83
+ destination_instance = MiniTest::Mock.new
84
+
85
+ mock_destination_class = MiniTest::Mock.new
86
+ mock_destination_class.expect(:new, destination_instance)
87
+
88
+ control = Kiba::Control.new
89
+ control.destinations << { klass: mock_destination_class }
90
+ kiba_run(control)
91
+ assert_mock mock_destination_class
92
+ end
93
+
94
+ def test_destination_close_called_if_defined
95
+ destination_instance = MiniTest::Mock.new
96
+ destination_instance.expect(:close, nil)
97
+ mock_destination_class = MiniTest::Mock.new
98
+ mock_destination_class.expect(:new, destination_instance)
99
+
100
+ control = Kiba::Control.new
101
+ control.destinations << { klass: mock_destination_class }
102
+ kiba_run(control)
103
+ assert_mock destination_instance
104
+ assert_mock mock_destination_class
105
+ end
106
+
107
+ def test_use_next_to_exit_early_from_block_transform
108
+ assert_equal 2, rows.size
109
+
110
+ # calling "return row" from a block is forbidden, but you can use "next" instead
111
+ b = lambda do |row|
112
+ if row.fetch(:identifier) == 'first-row'
113
+ # demonstrate how to remove a row from the pipeline via next
114
+ next
115
+ else
116
+ # demonstrate how you can reformat via next
117
+ next({new_identifier: row.fetch(:identifier)})
118
+ end
119
+ fail "This should not be called"
120
+ end
121
+ control.transforms << { block: b }
122
+
123
+ # keep track of the rows
124
+ @remaining_rows = []
125
+ checker = lambda { |row| @remaining_rows << row; row }
126
+ control.transforms << { block: checker }
127
+
128
+ kiba_run(control)
129
+
130
+ # the first row should have been removed
131
+ # and the second row should have been reformatted
132
+ assert_equal [{new_identifier: 'second-row'}], @remaining_rows
133
+ end
134
+
135
+ def test_destination_returning_nil_does_not_remove_row_from_pipeline
136
+ # safeguard to avoid modification on the support code
137
+ assert_nil TestDestinationReturningNil.new.write("FOOBAR")
138
+
139
+ destinations = []
140
+ control = Kiba.parse do
141
+ source TestEnumerableSource, [{key: 'value'}]
142
+ 2.times do
143
+ destination TestDestinationReturningNil, on_init: lambda { |d| destinations << d }
144
+ end
145
+ end
146
+ kiba_run(control)
147
+ 2.times do |i|
148
+ assert_equal [{key: 'value'}], destinations[i].instance_variable_get(:@written_rows)
149
+ end
150
+ end
151
+
152
+ def test_nil_transform_error_message
153
+ control = Kiba.parse do
154
+ transform
155
+ end
156
+ assert_raises(RuntimeError, 'Nil parameters not allowed here') { kiba_run(control) }
157
+ end
158
+
159
+ def test_ruby_3_source_kwargs
160
+ # NOTE: before Ruby 3 kwargs support, a Ruby warning would
161
+ # be captured here with Ruby 2.7 & ensure we fail,
162
+ # and an error would be raised with Ruby 2.8.0-dev
163
+ # NOTE: only the first warning will be captured, though, but
164
+ # having 3 different tests is still better
165
+ storage = nil
166
+ assert_silent do
167
+ Kiba.run(Kiba.parse do
168
+ source TestKeywordArgumentsComponent,
169
+ mandatory: "first",
170
+ on_init: -> (values) { storage = values }
171
+ end)
172
+ end
173
+ assert_equal({
174
+ mandatory: "first",
175
+ optional: nil
176
+ }, storage)
177
+ end
178
+
179
+ def test_ruby_3_transform_kwargs
180
+ storage = nil
181
+ assert_silent do
182
+ Kiba.run(Kiba.parse do
183
+ transform TestKeywordArgumentsComponent,
184
+ mandatory: "first",
185
+ on_init: -> (values) { storage = values }
186
+ end)
187
+ end
188
+ assert_equal({
189
+ mandatory: "first",
190
+ optional: nil
191
+ }, storage)
192
+ end
193
+
194
+ def test_ruby_3_destination_kwargs
195
+ storage = nil
196
+ assert_silent do
197
+ Kiba.run(Kiba.parse do
198
+ destination TestKeywordArgumentsComponent,
199
+ mandatory: "first",
200
+ on_init: -> (values) { storage = values }
201
+ end)
202
+ end
203
+ assert_equal({
204
+ mandatory: "first",
205
+ optional: nil
206
+ }, storage)
207
+ end
208
+
209
+ def test_positional_plus_keyword_arguments
210
+ storage = nil
211
+ assert_silent do
212
+ Kiba.run(Kiba.parse do
213
+ source TestMixedArgumentsComponent,
214
+ "some positional argument",
215
+ mandatory: "first",
216
+ on_init: -> (values) {
217
+ storage = values
218
+ }
219
+ end)
220
+ end
221
+
222
+ assert_equal({
223
+ some_value: "some positional argument",
224
+ mandatory: "first",
225
+ optional: nil
226
+ }, storage)
227
+ end
228
+ end
@@ -0,0 +1,10 @@
1
+ module SharedTests
2
+ def shared_tests_for(desc, &block)
3
+ @@shared_tests ||= {}
4
+ @@shared_tests[desc] = block
5
+ end
6
+
7
+ def shared_tests(desc, *args)
8
+ self.class_exec(*args, &@@shared_tests.fetch(desc))
9
+ end
10
+ end
@@ -0,0 +1,19 @@
1
+ class AggregateTransform
2
+ def initialize(options)
3
+ @aggregate_size = options.fetch(:aggregate_size)
4
+ end
5
+
6
+ def process(row)
7
+ @buffer ||= []
8
+ @buffer << row
9
+ if @buffer.size == @aggregate_size
10
+ yield @buffer
11
+ @buffer = []
12
+ end
13
+ nil
14
+ end
15
+
16
+ def close
17
+ yield @buffer unless @buffer.empty?
18
+ end
19
+ end
@@ -0,0 +1,9 @@
1
+ class TestArrayDestination
2
+ def initialize(array)
3
+ @array = array
4
+ end
5
+
6
+ def write(row)
7
+ @array << row
8
+ end
9
+ end
@@ -0,0 +1,11 @@
1
+ class CloseYieldingTransform
2
+ def initialize(options)
3
+ @yield_on_close = options.fetch(:yield_on_close)
4
+ end
5
+
6
+ def close
7
+ @yield_on_close.each do |item|
8
+ yield item
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,12 @@
1
+ class TestDestinationReturningNil
2
+ def initialize(options = {})
3
+ on_init = options[:on_init]
4
+ # A little trick to allow outer references to this instance
5
+ on_init.call(self) if on_init
6
+ end
7
+
8
+ def write(row)
9
+ (@written_rows ||= []) << row
10
+ nil
11
+ end
12
+ end