kiba 1.0.0 → 3.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +5 -5
  2. data/.github/FUNDING.yml +1 -0
  3. data/.travis.yml +11 -9
  4. data/COMM-LICENSE.md +348 -0
  5. data/Changes.md +28 -0
  6. data/ISSUE_TEMPLATE.md +7 -0
  7. data/LICENSE +7 -0
  8. data/Pro-Changes.md +108 -0
  9. data/README.md +15 -282
  10. data/Rakefile +6 -1
  11. data/appveyor.yml +19 -9
  12. data/bin/kiba +12 -2
  13. data/kiba.gemspec +6 -1
  14. data/lib/kiba.rb +10 -1
  15. data/lib/kiba/context.rb +4 -0
  16. data/lib/kiba/control.rb +4 -0
  17. data/lib/kiba/dsl_extensions/config.rb +9 -0
  18. data/lib/kiba/parser.rb +4 -9
  19. data/lib/kiba/runner.rb +14 -5
  20. data/lib/kiba/streaming_runner.rb +38 -0
  21. data/lib/kiba/version.rb +1 -1
  22. data/test/helper.rb +11 -2
  23. data/test/shared_runner_tests.rb +228 -0
  24. data/test/support/shared_tests.rb +10 -0
  25. data/test/support/test_aggregate_transform.rb +19 -0
  26. data/test/support/test_array_destination.rb +9 -0
  27. data/test/support/test_close_yielding_transform.rb +11 -0
  28. data/test/support/test_destination_returning_nil.rb +12 -0
  29. data/test/support/test_duplicate_row_transform.rb +9 -0
  30. data/test/support/test_keyword_arguments_component.rb +14 -0
  31. data/test/support/test_mixed_arguments_component.rb +14 -0
  32. data/test/support/test_non_closing_transform.rb +5 -0
  33. data/test/support/test_yielding_transform.rb +8 -0
  34. data/test/test_integration.rb +3 -3
  35. data/test/test_parser.rb +34 -29
  36. data/test/test_run.rb +12 -0
  37. data/test/test_runner.rb +5 -81
  38. data/test/test_streaming_runner.rb +70 -0
  39. metadata +57 -16
  40. data/lib/kiba/cli.rb +0 -16
  41. data/test/fixtures/bogus.etl +0 -2
  42. data/test/fixtures/valid.etl +0 -1
  43. data/test/test_cli.rb +0 -17
@@ -1,14 +1,9 @@
1
1
  module Kiba
2
2
  module Parser
3
- def parse(source_as_string = nil, source_file = nil, &source_as_block)
4
- control = Control.new
5
- context = Context.new(control)
6
- if source_as_string
7
- # this somewhat weird construct allows to remove a nil source_file
8
- context.instance_eval(*[source_as_string, source_file].compact)
9
- else
10
- context.instance_eval(&source_as_block)
11
- end
3
+ def parse(&source_as_block)
4
+ control = Kiba::Control.new
5
+ context = Kiba::Context.new(control)
6
+ context.instance_eval(&source_as_block)
12
7
  control
13
8
  end
14
9
  end
@@ -1,5 +1,7 @@
1
1
  module Kiba
2
2
  module Runner
3
+ extend self
4
+
3
5
  # allow to handle a block form just like a regular transform
4
6
  class AliasingProc < Proc
5
7
  alias_method :process, :call
@@ -13,8 +15,9 @@ module Kiba
13
15
  process_rows(
14
16
  to_instances(control.sources),
15
17
  to_instances(control.transforms, true),
16
- to_instances(control.destinations)
18
+ destinations = to_instances(control.destinations)
17
19
  )
20
+ close_destinations(destinations)
18
21
  # TODO: when I add post processes as class, I'll have to add a test to
19
22
  # make sure instantiation occurs after the main processing is done (#16)
20
23
  run_post_processes(control)
@@ -28,6 +31,12 @@ module Kiba
28
31
  to_instances(control.post_processes, true, false).each(&:call)
29
32
  end
30
33
 
34
+ def close_destinations(destinations)
35
+ destinations
36
+ .find_all { |d| d.respond_to?(:close) }
37
+ .each(&:close)
38
+ end
39
+
31
40
  def process_rows(sources, transforms, destinations)
32
41
  sources.each do |source|
33
42
  source.each do |row|
@@ -41,7 +50,6 @@ module Kiba
41
50
  end
42
51
  end
43
52
  end
44
- destinations.find_all { |d| d.respond_to?(:close) }.each(&:close)
45
53
  end
46
54
 
47
55
  # not using keyword args because JRuby defaults to 1.9 syntax currently
@@ -55,15 +63,16 @@ module Kiba
55
63
  end
56
64
 
57
65
  def to_instance(klass, args, block, allow_block, allow_class)
58
- if klass
66
+ if klass && block
67
+ fail 'Class and block form cannot be used together at the moment'
68
+ elsif klass
59
69
  fail 'Class form is not allowed here' unless allow_class
60
70
  klass.new(*args)
61
71
  elsif block
62
72
  fail 'Block form is not allowed here' unless allow_block
63
73
  AliasingProc.new(&block)
64
74
  else
65
- # TODO: support block passing to a class form definition?
66
- fail 'Class and block form cannot be used together at the moment'
75
+ fail 'Nil parameters not allowed here'
67
76
  end
68
77
  end
69
78
  end
@@ -0,0 +1,38 @@
1
+ module Kiba
2
+ module StreamingRunner
3
+ include Runner
4
+ extend self
5
+
6
+ def transform_stream(stream, t)
7
+ Enumerator.new do |y|
8
+ stream.each do |input_row|
9
+ returned_row = t.process(input_row) do |yielded_row|
10
+ y << yielded_row
11
+ end
12
+ y << returned_row if returned_row
13
+ end
14
+ if t.respond_to?(:close)
15
+ t.close do |close_row|
16
+ y << close_row
17
+ end
18
+ end
19
+ end
20
+ end
21
+
22
+ def source_stream(sources)
23
+ Enumerator.new do |y|
24
+ sources.each do |source|
25
+ source.each { |r| y << r }
26
+ end
27
+ end
28
+ end
29
+
30
+ def process_rows(sources, transforms, destinations)
31
+ stream = source_stream(sources)
32
+ recurser = lambda { |s,t| transform_stream(s, t) }
33
+ transforms.inject(stream, &recurser).each do |r|
34
+ destinations.each { |d| d.write(r) }
35
+ end
36
+ end
37
+ end
38
+ end
@@ -1,3 +1,3 @@
1
1
  module Kiba
2
- VERSION = '1.0.0'
2
+ VERSION = '3.5.0'
3
3
  end
@@ -1,10 +1,13 @@
1
1
  require 'minitest/autorun'
2
2
  require 'minitest/pride'
3
+ require 'minitest/focus'
3
4
  require 'kiba'
4
5
 
5
- class Kiba::Test < Minitest::Test
6
- extend Minitest::Spec::DSL
6
+ if ENV['CI'] == 'true'
7
+ puts "Running with MiniTest version #{MiniTest::VERSION}"
8
+ end
7
9
 
10
+ class Kiba::Test < Minitest::Test
8
11
  def remove_files(*files)
9
12
  files.each do |file|
10
13
  File.delete(file) if File.exist?(file)
@@ -14,4 +17,10 @@ class Kiba::Test < Minitest::Test
14
17
  def fixture(file)
15
18
  File.join(File.dirname(__FILE__), 'fixtures', file)
16
19
  end
20
+
21
+ unless self.method_defined?(:assert_mock)
22
+ def assert_mock(mock)
23
+ mock.verify
24
+ end
25
+ end
17
26
  end
@@ -0,0 +1,228 @@
1
+ require 'minitest/mock'
2
+ require_relative 'support/test_enumerable_source'
3
+ require_relative 'support/test_destination_returning_nil'
4
+
5
+ module SharedRunnerTests
6
+ def rows
7
+ @rows ||= [
8
+ { identifier: 'first-row' },
9
+ { identifier: 'second-row' }
10
+ ]
11
+ end
12
+
13
+ def control
14
+ @control ||= begin
15
+ control = Kiba::Control.new
16
+ # this will yield a single row for testing
17
+ control.sources << {
18
+ klass: TestEnumerableSource,
19
+ args: [rows]
20
+ }
21
+ control
22
+ end
23
+ end
24
+
25
+ def test_block_transform_processing
26
+ # is there a better way to assert a block was called in minitest?
27
+ control.transforms << { block: lambda { |r| @called = true; r } }
28
+ kiba_run(control)
29
+ assert_equal true, @called
30
+ end
31
+
32
+ def test_dismissed_row_not_passed_to_next_transform
33
+ @called = nil
34
+ control.transforms << { block: lambda { |_| nil } }
35
+ control.transforms << { block: lambda { |_| @called = true; nil } }
36
+ kiba_run(control)
37
+ assert_nil @called
38
+ end
39
+
40
+ def test_post_process_runs_once
41
+ assert_equal 2, rows.size
42
+ @called = 0
43
+ control.post_processes << { block: lambda { @called += 1 } }
44
+ kiba_run(control)
45
+ assert_equal 1, @called
46
+ end
47
+
48
+ def test_post_process_not_called_after_row_failure
49
+ @called = nil
50
+ control.transforms << { block: lambda { |_| fail 'FAIL' } }
51
+ control.post_processes << { block: lambda { @called = true } }
52
+ assert_raises(RuntimeError, 'FAIL') { kiba_run(control) }
53
+ assert_nil @called
54
+ end
55
+
56
+ def test_pre_process_runs_once
57
+ assert_equal 2, rows.size
58
+ @called = 0
59
+ control.pre_processes << { block: lambda { @called += 1 } }
60
+ kiba_run(control)
61
+ assert_equal 1, @called
62
+ end
63
+
64
+ def test_pre_process_runs_before_source_is_instantiated
65
+ calls = []
66
+
67
+ mock_source_class = MiniTest::Mock.new
68
+ mock_source_class.expect(:new, TestEnumerableSource.new([1, 2, 3])) do
69
+ calls << :source_instantiated
70
+ end
71
+
72
+ control = Kiba::Control.new
73
+ control.pre_processes << { block: lambda { calls << :pre_processor_executed } }
74
+ control.sources << { klass: mock_source_class }
75
+ kiba_run(control)
76
+
77
+ assert_equal [:pre_processor_executed, :source_instantiated], calls
78
+ assert_mock mock_source_class
79
+ end
80
+
81
+ def test_no_error_raised_if_destination_close_not_implemented
82
+ # NOTE: this fake destination does not implement `close`
83
+ destination_instance = MiniTest::Mock.new
84
+
85
+ mock_destination_class = MiniTest::Mock.new
86
+ mock_destination_class.expect(:new, destination_instance)
87
+
88
+ control = Kiba::Control.new
89
+ control.destinations << { klass: mock_destination_class }
90
+ kiba_run(control)
91
+ assert_mock mock_destination_class
92
+ end
93
+
94
+ def test_destination_close_called_if_defined
95
+ destination_instance = MiniTest::Mock.new
96
+ destination_instance.expect(:close, nil)
97
+ mock_destination_class = MiniTest::Mock.new
98
+ mock_destination_class.expect(:new, destination_instance)
99
+
100
+ control = Kiba::Control.new
101
+ control.destinations << { klass: mock_destination_class }
102
+ kiba_run(control)
103
+ assert_mock destination_instance
104
+ assert_mock mock_destination_class
105
+ end
106
+
107
+ def test_use_next_to_exit_early_from_block_transform
108
+ assert_equal 2, rows.size
109
+
110
+ # calling "return row" from a block is forbidden, but you can use "next" instead
111
+ b = lambda do |row|
112
+ if row.fetch(:identifier) == 'first-row'
113
+ # demonstrate how to remove a row from the pipeline via next
114
+ next
115
+ else
116
+ # demonstrate how you can reformat via next
117
+ next({new_identifier: row.fetch(:identifier)})
118
+ end
119
+ fail "This should not be called"
120
+ end
121
+ control.transforms << { block: b }
122
+
123
+ # keep track of the rows
124
+ @remaining_rows = []
125
+ checker = lambda { |row| @remaining_rows << row; row }
126
+ control.transforms << { block: checker }
127
+
128
+ kiba_run(control)
129
+
130
+ # the first row should have been removed
131
+ # and the second row should have been reformatted
132
+ assert_equal [{new_identifier: 'second-row'}], @remaining_rows
133
+ end
134
+
135
+ def test_destination_returning_nil_does_not_remove_row_from_pipeline
136
+ # safeguard to avoid modification on the support code
137
+ assert_nil TestDestinationReturningNil.new.write("FOOBAR")
138
+
139
+ destinations = []
140
+ control = Kiba.parse do
141
+ source TestEnumerableSource, [{key: 'value'}]
142
+ 2.times do
143
+ destination TestDestinationReturningNil, on_init: lambda { |d| destinations << d }
144
+ end
145
+ end
146
+ kiba_run(control)
147
+ 2.times do |i|
148
+ assert_equal [{key: 'value'}], destinations[i].instance_variable_get(:@written_rows)
149
+ end
150
+ end
151
+
152
+ def test_nil_transform_error_message
153
+ control = Kiba.parse do
154
+ transform
155
+ end
156
+ assert_raises(RuntimeError, 'Nil parameters not allowed here') { kiba_run(control) }
157
+ end
158
+
159
+ def test_ruby_3_source_kwargs
160
+ # NOTE: before Ruby 3 kwargs support, a Ruby warning would
161
+ # be captured here with Ruby 2.7 & ensure we fail,
162
+ # and an error would be raised with Ruby 2.8.0-dev
163
+ # NOTE: only the first warning will be captured, though, but
164
+ # having 3 different tests is still better
165
+ storage = nil
166
+ assert_silent do
167
+ Kiba.run(Kiba.parse do
168
+ source TestKeywordArgumentsComponent,
169
+ mandatory: "first",
170
+ on_init: -> (values) { storage = values }
171
+ end)
172
+ end
173
+ assert_equal({
174
+ mandatory: "first",
175
+ optional: nil
176
+ }, storage)
177
+ end
178
+
179
+ def test_ruby_3_transform_kwargs
180
+ storage = nil
181
+ assert_silent do
182
+ Kiba.run(Kiba.parse do
183
+ transform TestKeywordArgumentsComponent,
184
+ mandatory: "first",
185
+ on_init: -> (values) { storage = values }
186
+ end)
187
+ end
188
+ assert_equal({
189
+ mandatory: "first",
190
+ optional: nil
191
+ }, storage)
192
+ end
193
+
194
+ def test_ruby_3_destination_kwargs
195
+ storage = nil
196
+ assert_silent do
197
+ Kiba.run(Kiba.parse do
198
+ destination TestKeywordArgumentsComponent,
199
+ mandatory: "first",
200
+ on_init: -> (values) { storage = values }
201
+ end)
202
+ end
203
+ assert_equal({
204
+ mandatory: "first",
205
+ optional: nil
206
+ }, storage)
207
+ end
208
+
209
+ def test_positional_plus_keyword_arguments
210
+ storage = nil
211
+ assert_silent do
212
+ Kiba.run(Kiba.parse do
213
+ source TestMixedArgumentsComponent,
214
+ "some positional argument",
215
+ mandatory: "first",
216
+ on_init: -> (values) {
217
+ storage = values
218
+ }
219
+ end)
220
+ end
221
+
222
+ assert_equal({
223
+ some_value: "some positional argument",
224
+ mandatory: "first",
225
+ optional: nil
226
+ }, storage)
227
+ end
228
+ end
@@ -0,0 +1,10 @@
1
+ module SharedTests
2
+ def shared_tests_for(desc, &block)
3
+ @@shared_tests ||= {}
4
+ @@shared_tests[desc] = block
5
+ end
6
+
7
+ def shared_tests(desc, *args)
8
+ self.class_exec(*args, &@@shared_tests.fetch(desc))
9
+ end
10
+ end
@@ -0,0 +1,19 @@
1
+ class AggregateTransform
2
+ def initialize(options)
3
+ @aggregate_size = options.fetch(:aggregate_size)
4
+ end
5
+
6
+ def process(row)
7
+ @buffer ||= []
8
+ @buffer << row
9
+ if @buffer.size == @aggregate_size
10
+ yield @buffer
11
+ @buffer = []
12
+ end
13
+ nil
14
+ end
15
+
16
+ def close
17
+ yield @buffer unless @buffer.empty?
18
+ end
19
+ end
@@ -0,0 +1,9 @@
1
+ class TestArrayDestination
2
+ def initialize(array)
3
+ @array = array
4
+ end
5
+
6
+ def write(row)
7
+ @array << row
8
+ end
9
+ end
@@ -0,0 +1,11 @@
1
+ class CloseYieldingTransform
2
+ def initialize(options)
3
+ @yield_on_close = options.fetch(:yield_on_close)
4
+ end
5
+
6
+ def close
7
+ @yield_on_close.each do |item|
8
+ yield item
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,12 @@
1
+ class TestDestinationReturningNil
2
+ def initialize(options = {})
3
+ on_init = options[:on_init]
4
+ # A little trick to allow outer references to this instance
5
+ on_init.call(self) if on_init
6
+ end
7
+
8
+ def write(row)
9
+ (@written_rows ||= []) << row
10
+ nil
11
+ end
12
+ end