kiba 2.0.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +5 -5
  2. data/.github/FUNDING.yml +1 -0
  3. data/.github/workflows/ci.yml +41 -0
  4. data/COMM-LICENSE.md +348 -0
  5. data/Changes.md +38 -2
  6. data/Gemfile +1 -1
  7. data/ISSUE_TEMPLATE.md +7 -0
  8. data/LICENSE +3 -1
  9. data/Pro-Changes.md +82 -5
  10. data/README.md +12 -65
  11. data/Rakefile +8 -3
  12. data/kiba.gemspec +20 -17
  13. data/lib/kiba.rb +14 -11
  14. data/lib/kiba/context.rb +9 -5
  15. data/lib/kiba/control.rb +1 -1
  16. data/lib/kiba/dsl_extensions/config.rb +1 -1
  17. data/lib/kiba/parser.rb +6 -22
  18. data/lib/kiba/streaming_runner.rb +62 -5
  19. data/lib/kiba/version.rb +1 -1
  20. data/test/helper.rb +15 -7
  21. data/test/shared_runner_tests.rb +227 -0
  22. data/test/support/shared_tests.rb +1 -1
  23. data/test/support/test_aggregate_transform.rb +19 -0
  24. data/test/support/test_array_destination.rb +2 -2
  25. data/test/support/test_close_yielding_transform.rb +11 -0
  26. data/test/support/test_csv_destination.rb +2 -2
  27. data/test/support/test_csv_source.rb +1 -1
  28. data/test/support/test_destination_returning_nil.rb +12 -0
  29. data/test/support/test_duplicate_row_transform.rb +1 -1
  30. data/test/support/test_keyword_arguments_component.rb +14 -0
  31. data/test/support/test_mixed_arguments_component.rb +14 -0
  32. data/test/support/test_non_closing_transform.rb +5 -0
  33. data/test/support/test_yielding_transform.rb +1 -1
  34. data/test/test_integration.rb +38 -33
  35. data/test/test_parser.rb +16 -50
  36. data/test/test_run.rb +37 -0
  37. data/test/test_streaming_runner.rb +44 -23
  38. metadata +45 -30
  39. data/.travis.yml +0 -15
  40. data/appveyor.yml +0 -26
  41. data/bin/kiba +0 -5
  42. data/lib/kiba/cli.rb +0 -16
  43. data/lib/kiba/runner.rb +0 -78
  44. data/test/common/runner.rb +0 -137
  45. data/test/fixtures/bogus.etl +0 -2
  46. data/test/fixtures/namespace_conflict.etl +0 -9
  47. data/test/fixtures/some_extension.rb +0 -4
  48. data/test/fixtures/valid.etl +0 -1
  49. data/test/test_cli.rb +0 -21
  50. data/test/test_runner.rb +0 -6
data/bin/kiba DELETED
@@ -1,5 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/kiba/cli'
4
-
5
- Kiba::Cli.run(ARGV)
data/lib/kiba/cli.rb DELETED
@@ -1,16 +0,0 @@
1
- require 'kiba'
2
-
3
- module Kiba
4
- class Cli
5
- def self.run(args)
6
- unless args.size == 1
7
- puts 'Syntax: kiba your-script.etl'
8
- exit(-1)
9
- end
10
- filename = args[0]
11
- script_content = IO.read(filename)
12
- job_definition = Kiba.parse(script_content, filename)
13
- Kiba.run(job_definition)
14
- end
15
- end
16
- end
data/lib/kiba/runner.rb DELETED
@@ -1,78 +0,0 @@
1
- module Kiba
2
- module Runner
3
- extend self
4
-
5
- # allow to handle a block form just like a regular transform
6
- class AliasingProc < Proc
7
- alias_method :process, :call
8
- end
9
-
10
- def run(control)
11
- # TODO: add a dry-run (not instantiating mode) to_instances call
12
- # that will validate the job definition from a syntax pov before
13
- # going any further. This could be shared with the parser.
14
- run_pre_processes(control)
15
- process_rows(
16
- to_instances(control.sources),
17
- to_instances(control.transforms, true),
18
- destinations = to_instances(control.destinations)
19
- )
20
- close_destinations(destinations)
21
- # TODO: when I add post processes as class, I'll have to add a test to
22
- # make sure instantiation occurs after the main processing is done (#16)
23
- run_post_processes(control)
24
- end
25
-
26
- def run_pre_processes(control)
27
- to_instances(control.pre_processes, true, false).each(&:call)
28
- end
29
-
30
- def run_post_processes(control)
31
- to_instances(control.post_processes, true, false).each(&:call)
32
- end
33
-
34
- def close_destinations(destinations)
35
- destinations
36
- .find_all { |d| d.respond_to?(:close) }
37
- .each(&:close)
38
- end
39
-
40
- def process_rows(sources, transforms, destinations)
41
- sources.each do |source|
42
- source.each do |row|
43
- transforms.each do |transform|
44
- row = transform.process(row)
45
- break unless row
46
- end
47
- next unless row
48
- destinations.each do |destination|
49
- destination.write(row)
50
- end
51
- end
52
- end
53
- end
54
-
55
- # not using keyword args because JRuby defaults to 1.9 syntax currently
56
- def to_instances(definitions, allow_block = false, allow_class = true)
57
- definitions.map do |definition|
58
- to_instance(
59
- *definition.values_at(:klass, :args, :block),
60
- allow_block, allow_class
61
- )
62
- end
63
- end
64
-
65
- def to_instance(klass, args, block, allow_block, allow_class)
66
- if klass
67
- fail 'Class form is not allowed here' unless allow_class
68
- klass.new(*args)
69
- elsif block
70
- fail 'Block form is not allowed here' unless allow_block
71
- AliasingProc.new(&block)
72
- else
73
- # TODO: support block passing to a class form definition?
74
- fail 'Class and block form cannot be used together at the moment'
75
- end
76
- end
77
- end
78
- end
@@ -1,137 +0,0 @@
1
- require 'minitest/mock'
2
- require_relative '../support/test_enumerable_source'
3
-
4
- module SharedRunnerTests
5
- def kiba_run(job)
6
- Kiba.run(job)
7
- end
8
-
9
- def rows
10
- @rows ||= [
11
- { identifier: 'first-row' },
12
- { identifier: 'second-row' }
13
- ]
14
- end
15
-
16
- def control
17
- @control ||= begin
18
- control = Kiba::Control.new
19
- # this will yield a single row for testing
20
- control.sources << {
21
- klass: TestEnumerableSource,
22
- args: [rows]
23
- }
24
- control
25
- end
26
- end
27
-
28
- def test_block_transform_processing
29
- # is there a better way to assert a block was called in minitest?
30
- control.transforms << { block: lambda { |r| @called = true; r } }
31
- kiba_run(control)
32
- assert_equal true, @called
33
- end
34
-
35
- def test_dismissed_row_not_passed_to_next_transform
36
- @called = nil
37
- control.transforms << { block: lambda { |_| nil } }
38
- control.transforms << { block: lambda { |_| @called = true; nil } }
39
- kiba_run(control)
40
- assert_nil @called
41
- end
42
-
43
- def test_post_process_runs_once
44
- assert_equal 2, rows.size
45
- @called = 0
46
- control.post_processes << { block: lambda { @called += 1 } }
47
- kiba_run(control)
48
- assert_equal 1, @called
49
- end
50
-
51
- def test_post_process_not_called_after_row_failure
52
- @called = nil
53
- control.transforms << { block: lambda { |_| fail 'FAIL' } }
54
- control.post_processes << { block: lambda { @called = true } }
55
- assert_raises(RuntimeError, 'FAIL') { kiba_run(control) }
56
- assert_nil @called
57
- end
58
-
59
- def test_pre_process_runs_once
60
- assert_equal 2, rows.size
61
- @called = 0
62
- control.pre_processes << { block: lambda { @called += 1 } }
63
- kiba_run(control)
64
- assert_equal 1, @called
65
- end
66
-
67
- def test_pre_process_runs_before_source_is_instantiated
68
- calls = []
69
-
70
- mock_source_class = MiniTest::Mock.new
71
- mock_source_class.expect(:new, TestEnumerableSource.new([1, 2, 3])) do
72
- calls << :source_instantiated
73
- end
74
-
75
- control = Kiba::Control.new
76
- control.pre_processes << { block: lambda { calls << :pre_processor_executed } }
77
- control.sources << { klass: mock_source_class }
78
- kiba_run(control)
79
-
80
- assert_equal [:pre_processor_executed, :source_instantiated], calls
81
- assert_mock mock_source_class
82
- end
83
-
84
- def test_no_error_raised_if_destination_close_not_implemented
85
- # NOTE: this fake destination does not implement `close`
86
- destination_instance = MiniTest::Mock.new
87
-
88
- mock_destination_class = MiniTest::Mock.new
89
- mock_destination_class.expect(:new, destination_instance)
90
-
91
- control = Kiba::Control.new
92
- control.destinations << { klass: mock_destination_class }
93
- kiba_run(control)
94
- assert_mock mock_destination_class
95
- end
96
-
97
- def test_destination_close_called_if_defined
98
- destination_instance = MiniTest::Mock.new
99
- destination_instance.expect(:close, nil)
100
- mock_destination_class = MiniTest::Mock.new
101
- mock_destination_class.expect(:new, destination_instance)
102
-
103
- control = Kiba::Control.new
104
- control.destinations << { klass: mock_destination_class }
105
- kiba_run(control)
106
- assert_mock destination_instance
107
- assert_mock mock_destination_class
108
- end
109
-
110
- def test_use_next_to_exit_early_from_block_transform
111
- assert_equal 2, rows.size
112
-
113
- # calling "return row" from a block is forbidden, but you can use "next" instead
114
- b = lambda do |row|
115
- if row.fetch(:identifier) == 'first-row'
116
- # demonstrate how to remove a row from the pipeline via next
117
- next
118
- else
119
- # demonstrate how you can reformat via next
120
- next({new_identifier: row.fetch(:identifier)})
121
- end
122
- fail "This should not be called"
123
- end
124
- control.transforms << { block: b }
125
-
126
- # keep track of the rows
127
- @remaining_rows = []
128
- checker = lambda { |row| @remaining_rows << row; row }
129
- control.transforms << { block: checker }
130
-
131
- kiba_run(control)
132
-
133
- # the first row should have been removed
134
- # and the second row should have been reformatted
135
- assert_equal [{new_identifier: 'second-row'}], @remaining_rows
136
- end
137
- end
@@ -1,2 +0,0 @@
1
- # this should fail because we have an unknown class
2
- source UnknownThing
@@ -1,9 +0,0 @@
1
- fail "Context should not be visible without Kiba namespace" if defined?(Context)
2
- fail "Control should not be visible without Kiba namespace" if defined?(Control)
3
- fail "Parser should not be visible without Kiba namespace" if defined?(Parser)
4
- fail "Config should not be visible without Kiba namespace" if defined?(DSLExtensions::Config)
5
-
6
- # verify Kiba config (namespaced under Kiba::DSLExtensions::Config)
7
- # isn't causing troubles to implementers using a top-level DSLExtensions module
8
- require_relative 'some_extension'
9
- extend DSLExtensions::SomeExtension
@@ -1,4 +0,0 @@
1
- module DSLExtensions
2
- module SomeExtension
3
- end
4
- end
@@ -1 +0,0 @@
1
- # this does nothing
data/test/test_cli.rb DELETED
@@ -1,21 +0,0 @@
1
- require_relative 'helper'
2
- require 'kiba/cli'
3
-
4
- class TestCli < Kiba::Test
5
- def test_cli_launches
6
- Kiba::Cli.run([fixture('valid.etl')])
7
- end
8
-
9
- def test_cli_reports_filename_and_lineno
10
- exception = assert_raises(NameError) do
11
- Kiba::Cli.run([fixture('bogus.etl')])
12
- end
13
-
14
- assert_match(/uninitialized constant(.*)UnknownThing/, exception.message)
15
- assert_includes exception.backtrace.to_s, 'test/fixtures/bogus.etl:2:in'
16
- end
17
-
18
- def test_namespace_conflict
19
- Kiba::Cli.run([fixture('namespace_conflict.etl')])
20
- end
21
- end
data/test/test_runner.rb DELETED
@@ -1,6 +0,0 @@
1
- require_relative 'helper'
2
- require_relative 'common/runner'
3
-
4
- class TestRunner < Kiba::Test
5
- include SharedRunnerTests
6
- end