kiba 2.0.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/FUNDING.yml +1 -0
- data/.github/workflows/ci.yml +41 -0
- data/COMM-LICENSE.md +348 -0
- data/Changes.md +38 -2
- data/Gemfile +1 -1
- data/ISSUE_TEMPLATE.md +7 -0
- data/LICENSE +3 -1
- data/Pro-Changes.md +82 -5
- data/README.md +12 -65
- data/Rakefile +8 -3
- data/kiba.gemspec +20 -17
- data/lib/kiba.rb +14 -11
- data/lib/kiba/context.rb +9 -5
- data/lib/kiba/control.rb +1 -1
- data/lib/kiba/dsl_extensions/config.rb +1 -1
- data/lib/kiba/parser.rb +6 -22
- data/lib/kiba/streaming_runner.rb +62 -5
- data/lib/kiba/version.rb +1 -1
- data/test/helper.rb +15 -7
- data/test/shared_runner_tests.rb +227 -0
- data/test/support/shared_tests.rb +1 -1
- data/test/support/test_aggregate_transform.rb +19 -0
- data/test/support/test_array_destination.rb +2 -2
- data/test/support/test_close_yielding_transform.rb +11 -0
- data/test/support/test_csv_destination.rb +2 -2
- data/test/support/test_csv_source.rb +1 -1
- data/test/support/test_destination_returning_nil.rb +12 -0
- data/test/support/test_duplicate_row_transform.rb +1 -1
- data/test/support/test_keyword_arguments_component.rb +14 -0
- data/test/support/test_mixed_arguments_component.rb +14 -0
- data/test/support/test_non_closing_transform.rb +5 -0
- data/test/support/test_yielding_transform.rb +1 -1
- data/test/test_integration.rb +38 -33
- data/test/test_parser.rb +16 -50
- data/test/test_run.rb +37 -0
- data/test/test_streaming_runner.rb +44 -23
- metadata +45 -30
- data/.travis.yml +0 -15
- data/appveyor.yml +0 -26
- data/bin/kiba +0 -5
- data/lib/kiba/cli.rb +0 -16
- data/lib/kiba/runner.rb +0 -78
- data/test/common/runner.rb +0 -137
- data/test/fixtures/bogus.etl +0 -2
- data/test/fixtures/namespace_conflict.etl +0 -9
- data/test/fixtures/some_extension.rb +0 -4
- data/test/fixtures/valid.etl +0 -1
- data/test/test_cli.rb +0 -21
- data/test/test_runner.rb +0 -6
data/bin/kiba
DELETED
data/lib/kiba/cli.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
require 'kiba'
|
2
|
-
|
3
|
-
module Kiba
|
4
|
-
class Cli
|
5
|
-
def self.run(args)
|
6
|
-
unless args.size == 1
|
7
|
-
puts 'Syntax: kiba your-script.etl'
|
8
|
-
exit(-1)
|
9
|
-
end
|
10
|
-
filename = args[0]
|
11
|
-
script_content = IO.read(filename)
|
12
|
-
job_definition = Kiba.parse(script_content, filename)
|
13
|
-
Kiba.run(job_definition)
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
data/lib/kiba/runner.rb
DELETED
@@ -1,78 +0,0 @@
|
|
1
|
-
module Kiba
|
2
|
-
module Runner
|
3
|
-
extend self
|
4
|
-
|
5
|
-
# allow to handle a block form just like a regular transform
|
6
|
-
class AliasingProc < Proc
|
7
|
-
alias_method :process, :call
|
8
|
-
end
|
9
|
-
|
10
|
-
def run(control)
|
11
|
-
# TODO: add a dry-run (not instantiating mode) to_instances call
|
12
|
-
# that will validate the job definition from a syntax pov before
|
13
|
-
# going any further. This could be shared with the parser.
|
14
|
-
run_pre_processes(control)
|
15
|
-
process_rows(
|
16
|
-
to_instances(control.sources),
|
17
|
-
to_instances(control.transforms, true),
|
18
|
-
destinations = to_instances(control.destinations)
|
19
|
-
)
|
20
|
-
close_destinations(destinations)
|
21
|
-
# TODO: when I add post processes as class, I'll have to add a test to
|
22
|
-
# make sure instantiation occurs after the main processing is done (#16)
|
23
|
-
run_post_processes(control)
|
24
|
-
end
|
25
|
-
|
26
|
-
def run_pre_processes(control)
|
27
|
-
to_instances(control.pre_processes, true, false).each(&:call)
|
28
|
-
end
|
29
|
-
|
30
|
-
def run_post_processes(control)
|
31
|
-
to_instances(control.post_processes, true, false).each(&:call)
|
32
|
-
end
|
33
|
-
|
34
|
-
def close_destinations(destinations)
|
35
|
-
destinations
|
36
|
-
.find_all { |d| d.respond_to?(:close) }
|
37
|
-
.each(&:close)
|
38
|
-
end
|
39
|
-
|
40
|
-
def process_rows(sources, transforms, destinations)
|
41
|
-
sources.each do |source|
|
42
|
-
source.each do |row|
|
43
|
-
transforms.each do |transform|
|
44
|
-
row = transform.process(row)
|
45
|
-
break unless row
|
46
|
-
end
|
47
|
-
next unless row
|
48
|
-
destinations.each do |destination|
|
49
|
-
destination.write(row)
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
# not using keyword args because JRuby defaults to 1.9 syntax currently
|
56
|
-
def to_instances(definitions, allow_block = false, allow_class = true)
|
57
|
-
definitions.map do |definition|
|
58
|
-
to_instance(
|
59
|
-
*definition.values_at(:klass, :args, :block),
|
60
|
-
allow_block, allow_class
|
61
|
-
)
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
def to_instance(klass, args, block, allow_block, allow_class)
|
66
|
-
if klass
|
67
|
-
fail 'Class form is not allowed here' unless allow_class
|
68
|
-
klass.new(*args)
|
69
|
-
elsif block
|
70
|
-
fail 'Block form is not allowed here' unless allow_block
|
71
|
-
AliasingProc.new(&block)
|
72
|
-
else
|
73
|
-
# TODO: support block passing to a class form definition?
|
74
|
-
fail 'Class and block form cannot be used together at the moment'
|
75
|
-
end
|
76
|
-
end
|
77
|
-
end
|
78
|
-
end
|
data/test/common/runner.rb
DELETED
@@ -1,137 +0,0 @@
|
|
1
|
-
require 'minitest/mock'
|
2
|
-
require_relative '../support/test_enumerable_source'
|
3
|
-
|
4
|
-
module SharedRunnerTests
|
5
|
-
def kiba_run(job)
|
6
|
-
Kiba.run(job)
|
7
|
-
end
|
8
|
-
|
9
|
-
def rows
|
10
|
-
@rows ||= [
|
11
|
-
{ identifier: 'first-row' },
|
12
|
-
{ identifier: 'second-row' }
|
13
|
-
]
|
14
|
-
end
|
15
|
-
|
16
|
-
def control
|
17
|
-
@control ||= begin
|
18
|
-
control = Kiba::Control.new
|
19
|
-
# this will yield a single row for testing
|
20
|
-
control.sources << {
|
21
|
-
klass: TestEnumerableSource,
|
22
|
-
args: [rows]
|
23
|
-
}
|
24
|
-
control
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
def test_block_transform_processing
|
29
|
-
# is there a better way to assert a block was called in minitest?
|
30
|
-
control.transforms << { block: lambda { |r| @called = true; r } }
|
31
|
-
kiba_run(control)
|
32
|
-
assert_equal true, @called
|
33
|
-
end
|
34
|
-
|
35
|
-
def test_dismissed_row_not_passed_to_next_transform
|
36
|
-
@called = nil
|
37
|
-
control.transforms << { block: lambda { |_| nil } }
|
38
|
-
control.transforms << { block: lambda { |_| @called = true; nil } }
|
39
|
-
kiba_run(control)
|
40
|
-
assert_nil @called
|
41
|
-
end
|
42
|
-
|
43
|
-
def test_post_process_runs_once
|
44
|
-
assert_equal 2, rows.size
|
45
|
-
@called = 0
|
46
|
-
control.post_processes << { block: lambda { @called += 1 } }
|
47
|
-
kiba_run(control)
|
48
|
-
assert_equal 1, @called
|
49
|
-
end
|
50
|
-
|
51
|
-
def test_post_process_not_called_after_row_failure
|
52
|
-
@called = nil
|
53
|
-
control.transforms << { block: lambda { |_| fail 'FAIL' } }
|
54
|
-
control.post_processes << { block: lambda { @called = true } }
|
55
|
-
assert_raises(RuntimeError, 'FAIL') { kiba_run(control) }
|
56
|
-
assert_nil @called
|
57
|
-
end
|
58
|
-
|
59
|
-
def test_pre_process_runs_once
|
60
|
-
assert_equal 2, rows.size
|
61
|
-
@called = 0
|
62
|
-
control.pre_processes << { block: lambda { @called += 1 } }
|
63
|
-
kiba_run(control)
|
64
|
-
assert_equal 1, @called
|
65
|
-
end
|
66
|
-
|
67
|
-
def test_pre_process_runs_before_source_is_instantiated
|
68
|
-
calls = []
|
69
|
-
|
70
|
-
mock_source_class = MiniTest::Mock.new
|
71
|
-
mock_source_class.expect(:new, TestEnumerableSource.new([1, 2, 3])) do
|
72
|
-
calls << :source_instantiated
|
73
|
-
end
|
74
|
-
|
75
|
-
control = Kiba::Control.new
|
76
|
-
control.pre_processes << { block: lambda { calls << :pre_processor_executed } }
|
77
|
-
control.sources << { klass: mock_source_class }
|
78
|
-
kiba_run(control)
|
79
|
-
|
80
|
-
assert_equal [:pre_processor_executed, :source_instantiated], calls
|
81
|
-
assert_mock mock_source_class
|
82
|
-
end
|
83
|
-
|
84
|
-
def test_no_error_raised_if_destination_close_not_implemented
|
85
|
-
# NOTE: this fake destination does not implement `close`
|
86
|
-
destination_instance = MiniTest::Mock.new
|
87
|
-
|
88
|
-
mock_destination_class = MiniTest::Mock.new
|
89
|
-
mock_destination_class.expect(:new, destination_instance)
|
90
|
-
|
91
|
-
control = Kiba::Control.new
|
92
|
-
control.destinations << { klass: mock_destination_class }
|
93
|
-
kiba_run(control)
|
94
|
-
assert_mock mock_destination_class
|
95
|
-
end
|
96
|
-
|
97
|
-
def test_destination_close_called_if_defined
|
98
|
-
destination_instance = MiniTest::Mock.new
|
99
|
-
destination_instance.expect(:close, nil)
|
100
|
-
mock_destination_class = MiniTest::Mock.new
|
101
|
-
mock_destination_class.expect(:new, destination_instance)
|
102
|
-
|
103
|
-
control = Kiba::Control.new
|
104
|
-
control.destinations << { klass: mock_destination_class }
|
105
|
-
kiba_run(control)
|
106
|
-
assert_mock destination_instance
|
107
|
-
assert_mock mock_destination_class
|
108
|
-
end
|
109
|
-
|
110
|
-
def test_use_next_to_exit_early_from_block_transform
|
111
|
-
assert_equal 2, rows.size
|
112
|
-
|
113
|
-
# calling "return row" from a block is forbidden, but you can use "next" instead
|
114
|
-
b = lambda do |row|
|
115
|
-
if row.fetch(:identifier) == 'first-row'
|
116
|
-
# demonstrate how to remove a row from the pipeline via next
|
117
|
-
next
|
118
|
-
else
|
119
|
-
# demonstrate how you can reformat via next
|
120
|
-
next({new_identifier: row.fetch(:identifier)})
|
121
|
-
end
|
122
|
-
fail "This should not be called"
|
123
|
-
end
|
124
|
-
control.transforms << { block: b }
|
125
|
-
|
126
|
-
# keep track of the rows
|
127
|
-
@remaining_rows = []
|
128
|
-
checker = lambda { |row| @remaining_rows << row; row }
|
129
|
-
control.transforms << { block: checker }
|
130
|
-
|
131
|
-
kiba_run(control)
|
132
|
-
|
133
|
-
# the first row should have been removed
|
134
|
-
# and the second row should have been reformatted
|
135
|
-
assert_equal [{new_identifier: 'second-row'}], @remaining_rows
|
136
|
-
end
|
137
|
-
end
|
data/test/fixtures/bogus.etl
DELETED
@@ -1,9 +0,0 @@
|
|
1
|
-
fail "Context should not be visible without Kiba namespace" if defined?(Context)
|
2
|
-
fail "Control should not be visible without Kiba namespace" if defined?(Control)
|
3
|
-
fail "Parser should not be visible without Kiba namespace" if defined?(Parser)
|
4
|
-
fail "Config should not be visible without Kiba namespace" if defined?(DSLExtensions::Config)
|
5
|
-
|
6
|
-
# verify Kiba config (namespaced under Kiba::DSLExtensions::Config)
|
7
|
-
# isn't causing troubles to implementers using a top-level DSLExtensions module
|
8
|
-
require_relative 'some_extension'
|
9
|
-
extend DSLExtensions::SomeExtension
|
data/test/fixtures/valid.etl
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
# this does nothing
|
data/test/test_cli.rb
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
require_relative 'helper'
|
2
|
-
require 'kiba/cli'
|
3
|
-
|
4
|
-
class TestCli < Kiba::Test
|
5
|
-
def test_cli_launches
|
6
|
-
Kiba::Cli.run([fixture('valid.etl')])
|
7
|
-
end
|
8
|
-
|
9
|
-
def test_cli_reports_filename_and_lineno
|
10
|
-
exception = assert_raises(NameError) do
|
11
|
-
Kiba::Cli.run([fixture('bogus.etl')])
|
12
|
-
end
|
13
|
-
|
14
|
-
assert_match(/uninitialized constant(.*)UnknownThing/, exception.message)
|
15
|
-
assert_includes exception.backtrace.to_s, 'test/fixtures/bogus.etl:2:in'
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_namespace_conflict
|
19
|
-
Kiba::Cli.run([fixture('namespace_conflict.etl')])
|
20
|
-
end
|
21
|
-
end
|