kiba 2.0.0 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/FUNDING.yml +1 -0
- data/.github/workflows/ci.yml +41 -0
- data/COMM-LICENSE.md +348 -0
- data/Changes.md +38 -2
- data/Gemfile +1 -1
- data/ISSUE_TEMPLATE.md +7 -0
- data/LICENSE +3 -1
- data/Pro-Changes.md +82 -5
- data/README.md +12 -65
- data/Rakefile +8 -3
- data/kiba.gemspec +20 -17
- data/lib/kiba.rb +14 -11
- data/lib/kiba/context.rb +9 -5
- data/lib/kiba/control.rb +1 -1
- data/lib/kiba/dsl_extensions/config.rb +1 -1
- data/lib/kiba/parser.rb +6 -22
- data/lib/kiba/streaming_runner.rb +62 -5
- data/lib/kiba/version.rb +1 -1
- data/test/helper.rb +15 -7
- data/test/shared_runner_tests.rb +227 -0
- data/test/support/shared_tests.rb +1 -1
- data/test/support/test_aggregate_transform.rb +19 -0
- data/test/support/test_array_destination.rb +2 -2
- data/test/support/test_close_yielding_transform.rb +11 -0
- data/test/support/test_csv_destination.rb +2 -2
- data/test/support/test_csv_source.rb +1 -1
- data/test/support/test_destination_returning_nil.rb +12 -0
- data/test/support/test_duplicate_row_transform.rb +1 -1
- data/test/support/test_keyword_arguments_component.rb +14 -0
- data/test/support/test_mixed_arguments_component.rb +14 -0
- data/test/support/test_non_closing_transform.rb +5 -0
- data/test/support/test_yielding_transform.rb +1 -1
- data/test/test_integration.rb +38 -33
- data/test/test_parser.rb +16 -50
- data/test/test_run.rb +37 -0
- data/test/test_streaming_runner.rb +44 -23
- metadata +45 -30
- data/.travis.yml +0 -15
- data/appveyor.yml +0 -26
- data/bin/kiba +0 -5
- data/lib/kiba/cli.rb +0 -16
- data/lib/kiba/runner.rb +0 -78
- data/test/common/runner.rb +0 -137
- data/test/fixtures/bogus.etl +0 -2
- data/test/fixtures/namespace_conflict.etl +0 -9
- data/test/fixtures/some_extension.rb +0 -4
- data/test/fixtures/valid.etl +0 -1
- data/test/test_cli.rb +0 -21
- data/test/test_runner.rb +0 -6
data/bin/kiba
DELETED
data/lib/kiba/cli.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
require 'kiba'
|
2
|
-
|
3
|
-
module Kiba
|
4
|
-
class Cli
|
5
|
-
def self.run(args)
|
6
|
-
unless args.size == 1
|
7
|
-
puts 'Syntax: kiba your-script.etl'
|
8
|
-
exit(-1)
|
9
|
-
end
|
10
|
-
filename = args[0]
|
11
|
-
script_content = IO.read(filename)
|
12
|
-
job_definition = Kiba.parse(script_content, filename)
|
13
|
-
Kiba.run(job_definition)
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
data/lib/kiba/runner.rb
DELETED
@@ -1,78 +0,0 @@
|
|
1
|
-
module Kiba
|
2
|
-
module Runner
|
3
|
-
extend self
|
4
|
-
|
5
|
-
# allow to handle a block form just like a regular transform
|
6
|
-
class AliasingProc < Proc
|
7
|
-
alias_method :process, :call
|
8
|
-
end
|
9
|
-
|
10
|
-
def run(control)
|
11
|
-
# TODO: add a dry-run (not instantiating mode) to_instances call
|
12
|
-
# that will validate the job definition from a syntax pov before
|
13
|
-
# going any further. This could be shared with the parser.
|
14
|
-
run_pre_processes(control)
|
15
|
-
process_rows(
|
16
|
-
to_instances(control.sources),
|
17
|
-
to_instances(control.transforms, true),
|
18
|
-
destinations = to_instances(control.destinations)
|
19
|
-
)
|
20
|
-
close_destinations(destinations)
|
21
|
-
# TODO: when I add post processes as class, I'll have to add a test to
|
22
|
-
# make sure instantiation occurs after the main processing is done (#16)
|
23
|
-
run_post_processes(control)
|
24
|
-
end
|
25
|
-
|
26
|
-
def run_pre_processes(control)
|
27
|
-
to_instances(control.pre_processes, true, false).each(&:call)
|
28
|
-
end
|
29
|
-
|
30
|
-
def run_post_processes(control)
|
31
|
-
to_instances(control.post_processes, true, false).each(&:call)
|
32
|
-
end
|
33
|
-
|
34
|
-
def close_destinations(destinations)
|
35
|
-
destinations
|
36
|
-
.find_all { |d| d.respond_to?(:close) }
|
37
|
-
.each(&:close)
|
38
|
-
end
|
39
|
-
|
40
|
-
def process_rows(sources, transforms, destinations)
|
41
|
-
sources.each do |source|
|
42
|
-
source.each do |row|
|
43
|
-
transforms.each do |transform|
|
44
|
-
row = transform.process(row)
|
45
|
-
break unless row
|
46
|
-
end
|
47
|
-
next unless row
|
48
|
-
destinations.each do |destination|
|
49
|
-
destination.write(row)
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
# not using keyword args because JRuby defaults to 1.9 syntax currently
|
56
|
-
def to_instances(definitions, allow_block = false, allow_class = true)
|
57
|
-
definitions.map do |definition|
|
58
|
-
to_instance(
|
59
|
-
*definition.values_at(:klass, :args, :block),
|
60
|
-
allow_block, allow_class
|
61
|
-
)
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
def to_instance(klass, args, block, allow_block, allow_class)
|
66
|
-
if klass
|
67
|
-
fail 'Class form is not allowed here' unless allow_class
|
68
|
-
klass.new(*args)
|
69
|
-
elsif block
|
70
|
-
fail 'Block form is not allowed here' unless allow_block
|
71
|
-
AliasingProc.new(&block)
|
72
|
-
else
|
73
|
-
# TODO: support block passing to a class form definition?
|
74
|
-
fail 'Class and block form cannot be used together at the moment'
|
75
|
-
end
|
76
|
-
end
|
77
|
-
end
|
78
|
-
end
|
data/test/common/runner.rb
DELETED
@@ -1,137 +0,0 @@
|
|
1
|
-
require 'minitest/mock'
|
2
|
-
require_relative '../support/test_enumerable_source'
|
3
|
-
|
4
|
-
module SharedRunnerTests
|
5
|
-
def kiba_run(job)
|
6
|
-
Kiba.run(job)
|
7
|
-
end
|
8
|
-
|
9
|
-
def rows
|
10
|
-
@rows ||= [
|
11
|
-
{ identifier: 'first-row' },
|
12
|
-
{ identifier: 'second-row' }
|
13
|
-
]
|
14
|
-
end
|
15
|
-
|
16
|
-
def control
|
17
|
-
@control ||= begin
|
18
|
-
control = Kiba::Control.new
|
19
|
-
# this will yield a single row for testing
|
20
|
-
control.sources << {
|
21
|
-
klass: TestEnumerableSource,
|
22
|
-
args: [rows]
|
23
|
-
}
|
24
|
-
control
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
def test_block_transform_processing
|
29
|
-
# is there a better way to assert a block was called in minitest?
|
30
|
-
control.transforms << { block: lambda { |r| @called = true; r } }
|
31
|
-
kiba_run(control)
|
32
|
-
assert_equal true, @called
|
33
|
-
end
|
34
|
-
|
35
|
-
def test_dismissed_row_not_passed_to_next_transform
|
36
|
-
@called = nil
|
37
|
-
control.transforms << { block: lambda { |_| nil } }
|
38
|
-
control.transforms << { block: lambda { |_| @called = true; nil } }
|
39
|
-
kiba_run(control)
|
40
|
-
assert_nil @called
|
41
|
-
end
|
42
|
-
|
43
|
-
def test_post_process_runs_once
|
44
|
-
assert_equal 2, rows.size
|
45
|
-
@called = 0
|
46
|
-
control.post_processes << { block: lambda { @called += 1 } }
|
47
|
-
kiba_run(control)
|
48
|
-
assert_equal 1, @called
|
49
|
-
end
|
50
|
-
|
51
|
-
def test_post_process_not_called_after_row_failure
|
52
|
-
@called = nil
|
53
|
-
control.transforms << { block: lambda { |_| fail 'FAIL' } }
|
54
|
-
control.post_processes << { block: lambda { @called = true } }
|
55
|
-
assert_raises(RuntimeError, 'FAIL') { kiba_run(control) }
|
56
|
-
assert_nil @called
|
57
|
-
end
|
58
|
-
|
59
|
-
def test_pre_process_runs_once
|
60
|
-
assert_equal 2, rows.size
|
61
|
-
@called = 0
|
62
|
-
control.pre_processes << { block: lambda { @called += 1 } }
|
63
|
-
kiba_run(control)
|
64
|
-
assert_equal 1, @called
|
65
|
-
end
|
66
|
-
|
67
|
-
def test_pre_process_runs_before_source_is_instantiated
|
68
|
-
calls = []
|
69
|
-
|
70
|
-
mock_source_class = MiniTest::Mock.new
|
71
|
-
mock_source_class.expect(:new, TestEnumerableSource.new([1, 2, 3])) do
|
72
|
-
calls << :source_instantiated
|
73
|
-
end
|
74
|
-
|
75
|
-
control = Kiba::Control.new
|
76
|
-
control.pre_processes << { block: lambda { calls << :pre_processor_executed } }
|
77
|
-
control.sources << { klass: mock_source_class }
|
78
|
-
kiba_run(control)
|
79
|
-
|
80
|
-
assert_equal [:pre_processor_executed, :source_instantiated], calls
|
81
|
-
assert_mock mock_source_class
|
82
|
-
end
|
83
|
-
|
84
|
-
def test_no_error_raised_if_destination_close_not_implemented
|
85
|
-
# NOTE: this fake destination does not implement `close`
|
86
|
-
destination_instance = MiniTest::Mock.new
|
87
|
-
|
88
|
-
mock_destination_class = MiniTest::Mock.new
|
89
|
-
mock_destination_class.expect(:new, destination_instance)
|
90
|
-
|
91
|
-
control = Kiba::Control.new
|
92
|
-
control.destinations << { klass: mock_destination_class }
|
93
|
-
kiba_run(control)
|
94
|
-
assert_mock mock_destination_class
|
95
|
-
end
|
96
|
-
|
97
|
-
def test_destination_close_called_if_defined
|
98
|
-
destination_instance = MiniTest::Mock.new
|
99
|
-
destination_instance.expect(:close, nil)
|
100
|
-
mock_destination_class = MiniTest::Mock.new
|
101
|
-
mock_destination_class.expect(:new, destination_instance)
|
102
|
-
|
103
|
-
control = Kiba::Control.new
|
104
|
-
control.destinations << { klass: mock_destination_class }
|
105
|
-
kiba_run(control)
|
106
|
-
assert_mock destination_instance
|
107
|
-
assert_mock mock_destination_class
|
108
|
-
end
|
109
|
-
|
110
|
-
def test_use_next_to_exit_early_from_block_transform
|
111
|
-
assert_equal 2, rows.size
|
112
|
-
|
113
|
-
# calling "return row" from a block is forbidden, but you can use "next" instead
|
114
|
-
b = lambda do |row|
|
115
|
-
if row.fetch(:identifier) == 'first-row'
|
116
|
-
# demonstrate how to remove a row from the pipeline via next
|
117
|
-
next
|
118
|
-
else
|
119
|
-
# demonstrate how you can reformat via next
|
120
|
-
next({new_identifier: row.fetch(:identifier)})
|
121
|
-
end
|
122
|
-
fail "This should not be called"
|
123
|
-
end
|
124
|
-
control.transforms << { block: b }
|
125
|
-
|
126
|
-
# keep track of the rows
|
127
|
-
@remaining_rows = []
|
128
|
-
checker = lambda { |row| @remaining_rows << row; row }
|
129
|
-
control.transforms << { block: checker }
|
130
|
-
|
131
|
-
kiba_run(control)
|
132
|
-
|
133
|
-
# the first row should have been removed
|
134
|
-
# and the second row should have been reformatted
|
135
|
-
assert_equal [{new_identifier: 'second-row'}], @remaining_rows
|
136
|
-
end
|
137
|
-
end
|
data/test/fixtures/bogus.etl
DELETED
@@ -1,9 +0,0 @@
|
|
1
|
-
fail "Context should not be visible without Kiba namespace" if defined?(Context)
|
2
|
-
fail "Control should not be visible without Kiba namespace" if defined?(Control)
|
3
|
-
fail "Parser should not be visible without Kiba namespace" if defined?(Parser)
|
4
|
-
fail "Config should not be visible without Kiba namespace" if defined?(DSLExtensions::Config)
|
5
|
-
|
6
|
-
# verify Kiba config (namespaced under Kiba::DSLExtensions::Config)
|
7
|
-
# isn't causing troubles to implementers using a top-level DSLExtensions module
|
8
|
-
require_relative 'some_extension'
|
9
|
-
extend DSLExtensions::SomeExtension
|
data/test/fixtures/valid.etl
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
# this does nothing
|
data/test/test_cli.rb
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
require_relative 'helper'
|
2
|
-
require 'kiba/cli'
|
3
|
-
|
4
|
-
class TestCli < Kiba::Test
|
5
|
-
def test_cli_launches
|
6
|
-
Kiba::Cli.run([fixture('valid.etl')])
|
7
|
-
end
|
8
|
-
|
9
|
-
def test_cli_reports_filename_and_lineno
|
10
|
-
exception = assert_raises(NameError) do
|
11
|
-
Kiba::Cli.run([fixture('bogus.etl')])
|
12
|
-
end
|
13
|
-
|
14
|
-
assert_match(/uninitialized constant(.*)UnknownThing/, exception.message)
|
15
|
-
assert_includes exception.backtrace.to_s, 'test/fixtures/bogus.etl:2:in'
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_namespace_conflict
|
19
|
-
Kiba::Cli.run([fixture('namespace_conflict.etl')])
|
20
|
-
end
|
21
|
-
end
|