kiba 2.0.0 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +5 -5
  2. data/.github/FUNDING.yml +1 -0
  3. data/.github/workflows/ci.yml +41 -0
  4. data/COMM-LICENSE.md +348 -0
  5. data/Changes.md +38 -2
  6. data/Gemfile +1 -1
  7. data/ISSUE_TEMPLATE.md +7 -0
  8. data/LICENSE +3 -1
  9. data/Pro-Changes.md +82 -5
  10. data/README.md +12 -65
  11. data/Rakefile +8 -3
  12. data/kiba.gemspec +20 -17
  13. data/lib/kiba.rb +14 -11
  14. data/lib/kiba/context.rb +9 -5
  15. data/lib/kiba/control.rb +1 -1
  16. data/lib/kiba/dsl_extensions/config.rb +1 -1
  17. data/lib/kiba/parser.rb +6 -22
  18. data/lib/kiba/streaming_runner.rb +62 -5
  19. data/lib/kiba/version.rb +1 -1
  20. data/test/helper.rb +15 -7
  21. data/test/shared_runner_tests.rb +227 -0
  22. data/test/support/shared_tests.rb +1 -1
  23. data/test/support/test_aggregate_transform.rb +19 -0
  24. data/test/support/test_array_destination.rb +2 -2
  25. data/test/support/test_close_yielding_transform.rb +11 -0
  26. data/test/support/test_csv_destination.rb +2 -2
  27. data/test/support/test_csv_source.rb +1 -1
  28. data/test/support/test_destination_returning_nil.rb +12 -0
  29. data/test/support/test_duplicate_row_transform.rb +1 -1
  30. data/test/support/test_keyword_arguments_component.rb +14 -0
  31. data/test/support/test_mixed_arguments_component.rb +14 -0
  32. data/test/support/test_non_closing_transform.rb +5 -0
  33. data/test/support/test_yielding_transform.rb +1 -1
  34. data/test/test_integration.rb +38 -33
  35. data/test/test_parser.rb +16 -50
  36. data/test/test_run.rb +37 -0
  37. data/test/test_streaming_runner.rb +44 -23
  38. metadata +45 -30
  39. data/.travis.yml +0 -15
  40. data/appveyor.yml +0 -26
  41. data/bin/kiba +0 -5
  42. data/lib/kiba/cli.rb +0 -16
  43. data/lib/kiba/runner.rb +0 -78
  44. data/test/common/runner.rb +0 -137
  45. data/test/fixtures/bogus.etl +0 -2
  46. data/test/fixtures/namespace_conflict.etl +0 -9
  47. data/test/fixtures/some_extension.rb +0 -4
  48. data/test/fixtures/valid.etl +0 -1
  49. data/test/test_cli.rb +0 -21
  50. data/test/test_runner.rb +0 -6
data/bin/kiba DELETED
@@ -1,5 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/kiba/cli'
4
-
5
- Kiba::Cli.run(ARGV)
data/lib/kiba/cli.rb DELETED
@@ -1,16 +0,0 @@
1
- require 'kiba'
2
-
3
- module Kiba
4
- class Cli
5
- def self.run(args)
6
- unless args.size == 1
7
- puts 'Syntax: kiba your-script.etl'
8
- exit(-1)
9
- end
10
- filename = args[0]
11
- script_content = IO.read(filename)
12
- job_definition = Kiba.parse(script_content, filename)
13
- Kiba.run(job_definition)
14
- end
15
- end
16
- end
data/lib/kiba/runner.rb DELETED
@@ -1,78 +0,0 @@
1
- module Kiba
2
- module Runner
3
- extend self
4
-
5
- # allow to handle a block form just like a regular transform
6
- class AliasingProc < Proc
7
- alias_method :process, :call
8
- end
9
-
10
- def run(control)
11
- # TODO: add a dry-run (not instantiating mode) to_instances call
12
- # that will validate the job definition from a syntax pov before
13
- # going any further. This could be shared with the parser.
14
- run_pre_processes(control)
15
- process_rows(
16
- to_instances(control.sources),
17
- to_instances(control.transforms, true),
18
- destinations = to_instances(control.destinations)
19
- )
20
- close_destinations(destinations)
21
- # TODO: when I add post processes as class, I'll have to add a test to
22
- # make sure instantiation occurs after the main processing is done (#16)
23
- run_post_processes(control)
24
- end
25
-
26
- def run_pre_processes(control)
27
- to_instances(control.pre_processes, true, false).each(&:call)
28
- end
29
-
30
- def run_post_processes(control)
31
- to_instances(control.post_processes, true, false).each(&:call)
32
- end
33
-
34
- def close_destinations(destinations)
35
- destinations
36
- .find_all { |d| d.respond_to?(:close) }
37
- .each(&:close)
38
- end
39
-
40
- def process_rows(sources, transforms, destinations)
41
- sources.each do |source|
42
- source.each do |row|
43
- transforms.each do |transform|
44
- row = transform.process(row)
45
- break unless row
46
- end
47
- next unless row
48
- destinations.each do |destination|
49
- destination.write(row)
50
- end
51
- end
52
- end
53
- end
54
-
55
- # not using keyword args because JRuby defaults to 1.9 syntax currently
56
- def to_instances(definitions, allow_block = false, allow_class = true)
57
- definitions.map do |definition|
58
- to_instance(
59
- *definition.values_at(:klass, :args, :block),
60
- allow_block, allow_class
61
- )
62
- end
63
- end
64
-
65
- def to_instance(klass, args, block, allow_block, allow_class)
66
- if klass
67
- fail 'Class form is not allowed here' unless allow_class
68
- klass.new(*args)
69
- elsif block
70
- fail 'Block form is not allowed here' unless allow_block
71
- AliasingProc.new(&block)
72
- else
73
- # TODO: support block passing to a class form definition?
74
- fail 'Class and block form cannot be used together at the moment'
75
- end
76
- end
77
- end
78
- end
@@ -1,137 +0,0 @@
1
- require 'minitest/mock'
2
- require_relative '../support/test_enumerable_source'
3
-
4
- module SharedRunnerTests
5
- def kiba_run(job)
6
- Kiba.run(job)
7
- end
8
-
9
- def rows
10
- @rows ||= [
11
- { identifier: 'first-row' },
12
- { identifier: 'second-row' }
13
- ]
14
- end
15
-
16
- def control
17
- @control ||= begin
18
- control = Kiba::Control.new
19
- # this will yield a single row for testing
20
- control.sources << {
21
- klass: TestEnumerableSource,
22
- args: [rows]
23
- }
24
- control
25
- end
26
- end
27
-
28
- def test_block_transform_processing
29
- # is there a better way to assert a block was called in minitest?
30
- control.transforms << { block: lambda { |r| @called = true; r } }
31
- kiba_run(control)
32
- assert_equal true, @called
33
- end
34
-
35
- def test_dismissed_row_not_passed_to_next_transform
36
- @called = nil
37
- control.transforms << { block: lambda { |_| nil } }
38
- control.transforms << { block: lambda { |_| @called = true; nil } }
39
- kiba_run(control)
40
- assert_nil @called
41
- end
42
-
43
- def test_post_process_runs_once
44
- assert_equal 2, rows.size
45
- @called = 0
46
- control.post_processes << { block: lambda { @called += 1 } }
47
- kiba_run(control)
48
- assert_equal 1, @called
49
- end
50
-
51
- def test_post_process_not_called_after_row_failure
52
- @called = nil
53
- control.transforms << { block: lambda { |_| fail 'FAIL' } }
54
- control.post_processes << { block: lambda { @called = true } }
55
- assert_raises(RuntimeError, 'FAIL') { kiba_run(control) }
56
- assert_nil @called
57
- end
58
-
59
- def test_pre_process_runs_once
60
- assert_equal 2, rows.size
61
- @called = 0
62
- control.pre_processes << { block: lambda { @called += 1 } }
63
- kiba_run(control)
64
- assert_equal 1, @called
65
- end
66
-
67
- def test_pre_process_runs_before_source_is_instantiated
68
- calls = []
69
-
70
- mock_source_class = MiniTest::Mock.new
71
- mock_source_class.expect(:new, TestEnumerableSource.new([1, 2, 3])) do
72
- calls << :source_instantiated
73
- end
74
-
75
- control = Kiba::Control.new
76
- control.pre_processes << { block: lambda { calls << :pre_processor_executed } }
77
- control.sources << { klass: mock_source_class }
78
- kiba_run(control)
79
-
80
- assert_equal [:pre_processor_executed, :source_instantiated], calls
81
- assert_mock mock_source_class
82
- end
83
-
84
- def test_no_error_raised_if_destination_close_not_implemented
85
- # NOTE: this fake destination does not implement `close`
86
- destination_instance = MiniTest::Mock.new
87
-
88
- mock_destination_class = MiniTest::Mock.new
89
- mock_destination_class.expect(:new, destination_instance)
90
-
91
- control = Kiba::Control.new
92
- control.destinations << { klass: mock_destination_class }
93
- kiba_run(control)
94
- assert_mock mock_destination_class
95
- end
96
-
97
- def test_destination_close_called_if_defined
98
- destination_instance = MiniTest::Mock.new
99
- destination_instance.expect(:close, nil)
100
- mock_destination_class = MiniTest::Mock.new
101
- mock_destination_class.expect(:new, destination_instance)
102
-
103
- control = Kiba::Control.new
104
- control.destinations << { klass: mock_destination_class }
105
- kiba_run(control)
106
- assert_mock destination_instance
107
- assert_mock mock_destination_class
108
- end
109
-
110
- def test_use_next_to_exit_early_from_block_transform
111
- assert_equal 2, rows.size
112
-
113
- # calling "return row" from a block is forbidden, but you can use "next" instead
114
- b = lambda do |row|
115
- if row.fetch(:identifier) == 'first-row'
116
- # demonstrate how to remove a row from the pipeline via next
117
- next
118
- else
119
- # demonstrate how you can reformat via next
120
- next({new_identifier: row.fetch(:identifier)})
121
- end
122
- fail "This should not be called"
123
- end
124
- control.transforms << { block: b }
125
-
126
- # keep track of the rows
127
- @remaining_rows = []
128
- checker = lambda { |row| @remaining_rows << row; row }
129
- control.transforms << { block: checker }
130
-
131
- kiba_run(control)
132
-
133
- # the first row should have been removed
134
- # and the second row should have been reformatted
135
- assert_equal [{new_identifier: 'second-row'}], @remaining_rows
136
- end
137
- end
@@ -1,2 +0,0 @@
1
- # this should fail because we have an unknown class
2
- source UnknownThing
@@ -1,9 +0,0 @@
1
- fail "Context should not be visible without Kiba namespace" if defined?(Context)
2
- fail "Control should not be visible without Kiba namespace" if defined?(Control)
3
- fail "Parser should not be visible without Kiba namespace" if defined?(Parser)
4
- fail "Config should not be visible without Kiba namespace" if defined?(DSLExtensions::Config)
5
-
6
- # verify Kiba config (namespaced under Kiba::DSLExtensions::Config)
7
- # isn't causing troubles to implementers using a top-level DSLExtensions module
8
- require_relative 'some_extension'
9
- extend DSLExtensions::SomeExtension
@@ -1,4 +0,0 @@
1
- module DSLExtensions
2
- module SomeExtension
3
- end
4
- end
@@ -1 +0,0 @@
1
- # this does nothing
data/test/test_cli.rb DELETED
@@ -1,21 +0,0 @@
1
- require_relative 'helper'
2
- require 'kiba/cli'
3
-
4
- class TestCli < Kiba::Test
5
- def test_cli_launches
6
- Kiba::Cli.run([fixture('valid.etl')])
7
- end
8
-
9
- def test_cli_reports_filename_and_lineno
10
- exception = assert_raises(NameError) do
11
- Kiba::Cli.run([fixture('bogus.etl')])
12
- end
13
-
14
- assert_match(/uninitialized constant(.*)UnknownThing/, exception.message)
15
- assert_includes exception.backtrace.to_s, 'test/fixtures/bogus.etl:2:in'
16
- end
17
-
18
- def test_namespace_conflict
19
- Kiba::Cli.run([fixture('namespace_conflict.etl')])
20
- end
21
- end
data/test/test_runner.rb DELETED
@@ -1,6 +0,0 @@
1
- require_relative 'helper'
2
- require_relative 'common/runner'
3
-
4
- class TestRunner < Kiba::Test
5
- include SharedRunnerTests
6
- end