kiba 1.0.0 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +5 -5
  2. data/.github/FUNDING.yml +1 -0
  3. data/.travis.yml +11 -9
  4. data/COMM-LICENSE.md +348 -0
  5. data/Changes.md +28 -0
  6. data/ISSUE_TEMPLATE.md +7 -0
  7. data/LICENSE +7 -0
  8. data/Pro-Changes.md +108 -0
  9. data/README.md +15 -282
  10. data/Rakefile +6 -1
  11. data/appveyor.yml +19 -9
  12. data/bin/kiba +12 -2
  13. data/kiba.gemspec +6 -1
  14. data/lib/kiba.rb +10 -1
  15. data/lib/kiba/context.rb +4 -0
  16. data/lib/kiba/control.rb +4 -0
  17. data/lib/kiba/dsl_extensions/config.rb +9 -0
  18. data/lib/kiba/parser.rb +4 -9
  19. data/lib/kiba/runner.rb +14 -5
  20. data/lib/kiba/streaming_runner.rb +38 -0
  21. data/lib/kiba/version.rb +1 -1
  22. data/test/helper.rb +11 -2
  23. data/test/shared_runner_tests.rb +228 -0
  24. data/test/support/shared_tests.rb +10 -0
  25. data/test/support/test_aggregate_transform.rb +19 -0
  26. data/test/support/test_array_destination.rb +9 -0
  27. data/test/support/test_close_yielding_transform.rb +11 -0
  28. data/test/support/test_destination_returning_nil.rb +12 -0
  29. data/test/support/test_duplicate_row_transform.rb +9 -0
  30. data/test/support/test_keyword_arguments_component.rb +14 -0
  31. data/test/support/test_mixed_arguments_component.rb +14 -0
  32. data/test/support/test_non_closing_transform.rb +5 -0
  33. data/test/support/test_yielding_transform.rb +8 -0
  34. data/test/test_integration.rb +3 -3
  35. data/test/test_parser.rb +34 -29
  36. data/test/test_run.rb +12 -0
  37. data/test/test_runner.rb +5 -81
  38. data/test/test_streaming_runner.rb +70 -0
  39. metadata +57 -16
  40. data/lib/kiba/cli.rb +0 -16
  41. data/test/fixtures/bogus.etl +0 -2
  42. data/test/fixtures/valid.etl +0 -1
  43. data/test/test_cli.rb +0 -17
@@ -0,0 +1,9 @@
1
+ class TestDuplicateRowTranform
2
+ def process(row)
3
+ 2.times do
4
+ # NOTE: it's a good idea to carefully avoid data reuse between rows
5
+ yield({item: row.fetch(:item).dup})
6
+ end
7
+ nil
8
+ end
9
+ end
@@ -0,0 +1,14 @@
1
+ # a mock component to test Ruby 3 keyword argument support
2
+ class TestKeywordArgumentsComponent
3
+ def initialize(mandatory:, optional: nil, on_init: nil)
4
+ values = {
5
+ mandatory: mandatory,
6
+ optional: optional
7
+ }
8
+ on_init&.call(values)
9
+ end
10
+
11
+ def each
12
+ # no-op
13
+ end
14
+ end
@@ -0,0 +1,14 @@
1
+ # a mock component to test Ruby 3 keyword argument support
2
+ class TestMixedArgumentsComponent
3
+ def initialize(some_value, mandatory:, optional: nil, on_init:)
4
+ @values = {}
5
+ @values[:some_value] = some_value
6
+ @values[:mandatory] = mandatory
7
+ @values[:optional] = optional
8
+ on_init&.call(@values)
9
+ end
10
+
11
+ def each
12
+ # no-op
13
+ end
14
+ end
@@ -0,0 +1,5 @@
1
+ class NonClosingTransform
2
+ def process(row)
3
+ row
4
+ end
5
+ end
@@ -0,0 +1,8 @@
1
+ class TestYieldingTransform
2
+ def process(row)
3
+ row.fetch(:tags).each do |value|
4
+ yield({item: value})
5
+ end
6
+ {item: "classic-return-value"}
7
+ end
8
+ end
@@ -8,10 +8,10 @@ require_relative 'support/test_source_that_reads_at_instantiation_time'
8
8
 
9
9
  # End-to-end tests go here
10
10
  class TestIntegration < Kiba::Test
11
- let(:output_file) { 'test/tmp/output.csv' }
12
- let(:input_file) { 'test/tmp/input.csv' }
11
+ def output_file; 'test/tmp/output.csv'; end
12
+ def input_file; 'test/tmp/input.csv'; end
13
13
 
14
- let(:sample_csv_data) do
14
+ def sample_csv_data
15
15
  <<CSV
16
16
  first_name,last_name,sex
17
17
  John,Doe,M
@@ -14,6 +14,17 @@ class TestParser < Kiba::Test
14
14
  assert_equal DummyClass, control.sources[0][:klass]
15
15
  assert_equal %w(has args), control.sources[0][:args]
16
16
  end
17
+
18
+ # NOTE: useful for anything not using the CLI (e.g. sidekiq)
19
+ def test_block_parsing_with_reference_to_outside_variable
20
+ some_variable = Object.new
21
+
22
+ control = Kiba.parse do
23
+ source DummyClass, some_variable
24
+ end
25
+
26
+ assert_equal [some_variable], control.sources[0][:args]
27
+ end
17
28
 
18
29
  def test_block_transform_definition
19
30
  control = Kiba.parse do
@@ -57,36 +68,30 @@ class TestParser < Kiba::Test
57
68
  assert_instance_of Proc, control.pre_processes[0][:block]
58
69
  end
59
70
 
60
- def test_source_as_string_parsing
61
- control = Kiba.parse <<RUBY
62
- source DummyClass, 'from', 'file'
63
- RUBY
64
-
65
- assert_equal 1, control.sources.size
66
- assert_equal DummyClass, control.sources[0][:klass]
67
- assert_equal %w(from file), control.sources[0][:args]
71
+ def test_config
72
+ control = Kiba.parse do
73
+ extend Kiba::DSLExtensions::Config
74
+
75
+ config :context, key: "value", other_key: "other_value"
76
+ end
77
+
78
+ assert_equal({ context: {
79
+ key: "value",
80
+ other_key: "other_value"
81
+ }}, control.config)
68
82
  end
83
+
84
+ def test_config_override
85
+ control = Kiba.parse do
86
+ extend Kiba::DSLExtensions::Config
69
87
 
70
- def test_source_as_file_doing_require
71
- IO.write 'test/tmp/etl-common.rb', <<RUBY
72
- def common_source_declaration
73
- source DummyClass, 'from', 'common'
74
- end
75
- RUBY
76
- IO.write 'test/tmp/etl-main.rb', <<RUBY
77
- require './test/tmp/etl-common.rb'
78
-
79
- source DummyClass, 'from', 'main'
80
- common_source_declaration
81
- RUBY
82
- control = Kiba.parse IO.read('test/tmp/etl-main.rb')
83
-
84
- assert_equal 2, control.sources.size
85
-
86
- assert_equal %w(from main), control.sources[0][:args]
87
- assert_equal %w(from common), control.sources[1][:args]
88
-
89
- ensure
90
- remove_files('test/tmp/etl-common.rb', 'test/tmp/etl-main.rb')
88
+ config :context, key: "value", other_key: "other_value"
89
+ config :context, key: "new_value"
90
+ end
91
+
92
+ assert_equal({ context: {
93
+ key: "new_value",
94
+ other_key: "other_value"
95
+ }}, control.config)
91
96
  end
92
97
  end
@@ -0,0 +1,12 @@
1
+ require_relative 'helper'
2
+ require 'minitest/mock'
3
+
4
+ class TestRun < Kiba::Test
5
+ def test_ensure_kiba_defaults_to_streaming_runner
6
+ cb = -> (job) { "Streaming runner called" }
7
+ Kiba::StreamingRunner.stub(:run, cb) do
8
+ job = Kiba::Control.new
9
+ assert_equal "Streaming runner called", Kiba.run(job)
10
+ end
11
+ end
12
+ end
@@ -1,87 +1,11 @@
1
1
  require_relative 'helper'
2
- require 'minitest/mock'
3
- require_relative 'support/test_enumerable_source'
2
+ require_relative 'shared_runner_tests'
4
3
 
5
4
  class TestRunner < Kiba::Test
6
- let(:rows) do
7
- [
8
- { field: 'value' },
9
- { field: 'other-value' }
10
- ]
5
+ def kiba_run(job)
6
+ job.config[:kiba] = {runner: Kiba::Runner}
7
+ Kiba.run(job)
11
8
  end
12
-
13
- let(:control) do
14
- control = Kiba::Control.new
15
- # this will yield a single row for testing
16
- control.sources << {
17
- klass: TestEnumerableSource,
18
- args: [rows]
19
- }
20
- control
21
- end
22
-
23
- def test_block_transform_processing
24
- # is there a better way to assert a block was called in minitest?
25
- control.transforms << { block: lambda { |r| @called = true; r } }
26
- Kiba.run(control)
27
- assert_equal true, @called
28
- end
29
-
30
- def test_dismissed_row_not_passed_to_next_transform
31
- control.transforms << { block: lambda { |_| nil } }
32
- control.transforms << { block: lambda { |_| @called = true; nil } }
33
- Kiba.run(control)
34
- assert_nil @called
35
- end
36
-
37
- def test_post_process_runs_once
38
- assert_equal 2, rows.size
39
- @called = 0
40
- control.post_processes << { block: lambda { @called += 1 } }
41
- Kiba.run(control)
42
- assert_equal 1, @called
43
- end
44
-
45
- def test_post_process_not_called_after_row_failure
46
- control.transforms << { block: lambda { |_| fail 'FAIL' } }
47
- control.post_processes << { block: lambda { @called = true } }
48
- assert_raises(RuntimeError, 'FAIL') { Kiba.run(control) }
49
- assert_nil @called
50
- end
51
-
52
- def test_pre_process_runs_once
53
- assert_equal 2, rows.size
54
- @called = 0
55
- control.pre_processes << { block: lambda { @called += 1 } }
56
- Kiba.run(control)
57
- assert_equal 1, @called
58
- end
59
-
60
- def test_pre_process_runs_before_source_is_instantiated
61
- calls = []
62
-
63
- mock_source_class = MiniTest::Mock.new
64
- mock_source_class.expect(:new, TestEnumerableSource.new([1, 2, 3])) do
65
- calls << :source_instantiated
66
- end
67
9
 
68
- control = Kiba::Control.new
69
- control.pre_processes << { block: lambda { calls << :pre_processor_executed } }
70
- control.sources << { klass: mock_source_class }
71
- Kiba.run(control)
72
-
73
- assert_equal [:pre_processor_executed, :source_instantiated], calls
74
- end
75
-
76
- def test_no_error_raised_if_destination_close_not_implemented
77
- # NOTE: this fake destination does not implement `close`
78
- destination_instance = MiniTest::Mock.new
79
-
80
- mock_destination_class = MiniTest::Mock.new
81
- mock_destination_class.expect(:new, destination_instance)
82
-
83
- control = Kiba::Control.new
84
- control.destinations << { klass: mock_destination_class }
85
- Kiba.run(control)
86
- end
10
+ include SharedRunnerTests
87
11
  end
@@ -0,0 +1,70 @@
1
+ require_relative 'helper'
2
+ require_relative 'support/test_enumerable_source'
3
+ require_relative 'support/test_array_destination'
4
+ require_relative 'support/test_yielding_transform'
5
+ require_relative 'support/test_duplicate_row_transform'
6
+ require_relative 'support/test_close_yielding_transform'
7
+ require_relative 'support/test_non_closing_transform'
8
+ require_relative 'shared_runner_tests'
9
+ require_relative 'support/test_keyword_arguments_component'
10
+ require_relative 'support/test_mixed_arguments_component'
11
+
12
+ class TestStreamingRunner < Kiba::Test
13
+ def kiba_run(job)
14
+ job.config[:kiba] = {runner: Kiba::StreamingRunner}
15
+ Kiba.run(job)
16
+ end
17
+
18
+ include SharedRunnerTests
19
+
20
+ def test_yielding_class_transform
21
+ input_row = {tags: ["one", "two", "three"]}
22
+ destination_array = []
23
+
24
+ job = Kiba.parse do
25
+ # provide a single row as the input
26
+ source TestEnumerableSource, [input_row]
27
+
28
+ # explode tags in one row each
29
+ transform TestYieldingTransform
30
+
31
+ # generate two rows out of each exploded tags row
32
+ transform TestDuplicateRowTranform
33
+
34
+ destination TestArrayDestination, destination_array
35
+ end
36
+
37
+ kiba_run(job)
38
+
39
+ assert_equal [
40
+ {item: 'one'},
41
+ {item: 'one'},
42
+
43
+ {item: 'two'},
44
+ {item: 'two'},
45
+
46
+ {item: 'three'},
47
+ {item: 'three'},
48
+
49
+ {item: 'classic-return-value'},
50
+ {item: 'classic-return-value'}
51
+ ], destination_array
52
+ end
53
+
54
+ def test_transform_yielding_from_close
55
+ destination_array = []
56
+ job = Kiba.parse do
57
+ transform CloseYieldingTransform, yield_on_close: [1, 2]
58
+ destination TestArrayDestination, destination_array
59
+ end
60
+ Kiba.run(job)
61
+ assert_equal [1, 2], destination_array
62
+ end
63
+
64
+ def test_transform_with_no_close_must_not_raise
65
+ job = Kiba.parse do
66
+ transform NonClosingTransform
67
+ end
68
+ Kiba.run(job)
69
+ end
70
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kiba
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 3.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thibaut Barrère
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-01 00:00:00.000000000 Z
11
+ date: 2020-09-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -26,6 +26,20 @@ dependencies:
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '5.9'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '5.9'
41
+ - !ruby/object:Gem::Dependency
42
+ name: awesome_print
29
43
  requirement: !ruby/object:Gem::Requirement
30
44
  requirements:
31
45
  - - ">="
@@ -39,7 +53,7 @@ dependencies:
39
53
  - !ruby/object:Gem::Version
40
54
  version: '0'
41
55
  - !ruby/object:Gem::Dependency
42
- name: awesome_print
56
+ name: minitest-focus
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
59
  - - ">="
@@ -60,40 +74,58 @@ executables:
60
74
  extensions: []
61
75
  extra_rdoc_files: []
62
76
  files:
77
+ - ".github/FUNDING.yml"
63
78
  - ".gitignore"
64
79
  - ".travis.yml"
80
+ - COMM-LICENSE.md
65
81
  - Changes.md
66
82
  - Gemfile
83
+ - ISSUE_TEMPLATE.md
84
+ - LICENSE
85
+ - Pro-Changes.md
67
86
  - README.md
68
87
  - Rakefile
69
88
  - appveyor.yml
70
89
  - bin/kiba
71
90
  - kiba.gemspec
72
91
  - lib/kiba.rb
73
- - lib/kiba/cli.rb
74
92
  - lib/kiba/context.rb
75
93
  - lib/kiba/control.rb
94
+ - lib/kiba/dsl_extensions/config.rb
76
95
  - lib/kiba/parser.rb
77
96
  - lib/kiba/runner.rb
97
+ - lib/kiba/streaming_runner.rb
78
98
  - lib/kiba/version.rb
79
- - test/fixtures/bogus.etl
80
- - test/fixtures/valid.etl
81
99
  - test/helper.rb
100
+ - test/shared_runner_tests.rb
101
+ - test/support/shared_tests.rb
102
+ - test/support/test_aggregate_transform.rb
103
+ - test/support/test_array_destination.rb
104
+ - test/support/test_close_yielding_transform.rb
82
105
  - test/support/test_csv_destination.rb
83
106
  - test/support/test_csv_source.rb
107
+ - test/support/test_destination_returning_nil.rb
108
+ - test/support/test_duplicate_row_transform.rb
84
109
  - test/support/test_enumerable_source.rb
110
+ - test/support/test_keyword_arguments_component.rb
111
+ - test/support/test_mixed_arguments_component.rb
112
+ - test/support/test_non_closing_transform.rb
85
113
  - test/support/test_rename_field_transform.rb
86
114
  - test/support/test_source_that_reads_at_instantiation_time.rb
87
- - test/test_cli.rb
115
+ - test/support/test_yielding_transform.rb
88
116
  - test/test_integration.rb
89
117
  - test/test_parser.rb
118
+ - test/test_run.rb
90
119
  - test/test_runner.rb
120
+ - test/test_streaming_runner.rb
91
121
  - test/tmp/.gitkeep
92
122
  homepage: http://thbar.github.io/kiba/
93
123
  licenses:
94
124
  - LGPL-3.0
95
- metadata: {}
96
- post_install_message:
125
+ metadata:
126
+ source_code_uri: https://github.com/thbar/kiba
127
+ documentation_uri: https://github.com/thbar/kiba/wiki
128
+ post_install_message:
97
129
  rdoc_options: []
98
130
  require_paths:
99
131
  - lib
@@ -108,22 +140,31 @@ required_rubygems_version: !ruby/object:Gem::Requirement
108
140
  - !ruby/object:Gem::Version
109
141
  version: '0'
110
142
  requirements: []
111
- rubyforge_project:
112
- rubygems_version: 2.4.8
113
- signing_key:
143
+ rubygems_version: 3.1.2
144
+ signing_key:
114
145
  specification_version: 4
115
146
  summary: Lightweight ETL for Ruby
116
147
  test_files:
117
- - test/fixtures/bogus.etl
118
- - test/fixtures/valid.etl
119
148
  - test/helper.rb
149
+ - test/shared_runner_tests.rb
150
+ - test/support/shared_tests.rb
151
+ - test/support/test_aggregate_transform.rb
152
+ - test/support/test_array_destination.rb
153
+ - test/support/test_close_yielding_transform.rb
120
154
  - test/support/test_csv_destination.rb
121
155
  - test/support/test_csv_source.rb
156
+ - test/support/test_destination_returning_nil.rb
157
+ - test/support/test_duplicate_row_transform.rb
122
158
  - test/support/test_enumerable_source.rb
159
+ - test/support/test_keyword_arguments_component.rb
160
+ - test/support/test_mixed_arguments_component.rb
161
+ - test/support/test_non_closing_transform.rb
123
162
  - test/support/test_rename_field_transform.rb
124
163
  - test/support/test_source_that_reads_at_instantiation_time.rb
125
- - test/test_cli.rb
164
+ - test/support/test_yielding_transform.rb
126
165
  - test/test_integration.rb
127
166
  - test/test_parser.rb
167
+ - test/test_run.rb
128
168
  - test/test_runner.rb
169
+ - test/test_streaming_runner.rb
129
170
  - test/tmp/.gitkeep