chicago-etl 0.0.13 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +8 -3
- data/README.rdoc +4 -1
- data/VERSION +1 -1
- data/chicago-etl.gemspec +59 -22
- data/chicago-flow.gemspec +92 -0
- data/lib/chicago/etl/batch.rb +9 -2
- data/lib/chicago/etl/core_extensions.rb +12 -0
- data/lib/chicago/etl/counter.rb +8 -1
- data/lib/chicago/etl/dataset_batch_stage.rb +52 -0
- data/lib/chicago/etl/key_builder.rb +17 -39
- data/lib/chicago/etl/load_dataset_builder.rb +3 -1
- data/lib/chicago/etl/load_pipeline_stage_builder.rb +142 -0
- data/lib/chicago/etl/pipeline.rb +151 -0
- data/lib/chicago/etl/schema_table_sink_factory.rb +74 -0
- data/lib/chicago/etl/screens/column_screen.rb +26 -25
- data/lib/chicago/etl/screens/invalid_element.rb +5 -5
- data/lib/chicago/etl/screens/missing_value.rb +4 -2
- data/lib/chicago/etl/screens/out_of_bounds.rb +2 -0
- data/lib/chicago/etl/table_builder.rb +4 -2
- data/lib/chicago/etl/task_invocation.rb +0 -1
- data/lib/chicago/etl/transformations.rb +128 -0
- data/lib/chicago/etl.rb +39 -8
- data/lib/chicago/flow/array_sink.rb +35 -0
- data/lib/chicago/flow/array_source.rb +15 -0
- data/lib/chicago/flow/dataset_source.rb +23 -0
- data/lib/chicago/flow/errors.rb +14 -0
- data/lib/chicago/flow/filter.rb +15 -0
- data/lib/chicago/flow/mysql.rb +4 -0
- data/lib/chicago/{etl/mysql_load_file_value_transformer.rb → flow/mysql_file_serializer.rb} +7 -4
- data/lib/chicago/flow/mysql_file_sink.rb +68 -0
- data/lib/chicago/flow/null_sink.rb +8 -0
- data/lib/chicago/flow/pipeline_endpoint.rb +15 -0
- data/lib/chicago/flow/pipeline_stage.rb +68 -0
- data/lib/chicago/flow/sink.rb +53 -0
- data/lib/chicago/flow/transformation.rb +169 -0
- data/lib/chicago/flow/transformation_chain.rb +40 -0
- data/spec/etl/batch_spec.rb +2 -1
- data/spec/etl/core_extensions_spec.rb +13 -0
- data/spec/etl/dataset_batch_stage_spec.rb +55 -0
- data/spec/etl/key_builder_spec.rb +25 -83
- data/spec/etl/pipeline_stage_builder_spec.rb +39 -0
- data/spec/etl/schema_table_sink_factory_spec.rb +69 -0
- data/spec/etl/screens/invalid_element_spec.rb +10 -11
- data/spec/etl/screens/missing_value_spec.rb +21 -21
- data/spec/etl/screens/out_of_bounds_spec.rb +21 -29
- data/spec/etl/transformations_spec.rb +109 -0
- data/spec/flow/array_sink_spec.rb +26 -0
- data/spec/flow/array_source_spec.rb +20 -0
- data/spec/flow/dataset_source_spec.rb +15 -0
- data/spec/flow/filter_spec.rb +13 -0
- data/spec/flow/mysql_file_serializer_spec.rb +27 -0
- data/spec/flow/mysql_file_sink_spec.rb +94 -0
- data/spec/flow/mysql_integration_spec.rb +72 -0
- data/spec/flow/pipeline_stage_spec.rb +89 -0
- data/spec/flow/transformation_chain_spec.rb +76 -0
- data/spec/flow/transformation_spec.rb +91 -0
- data/spec/spec_helper.rb +5 -0
- metadata +135 -39
- data/lib/chicago/etl/buffering_insert_writer.rb +0 -36
- data/lib/chicago/etl/mysql_dumpfile.rb +0 -32
- data/lib/chicago/etl/screens/composite_screen.rb +0 -17
- data/lib/chicago/etl/sequel/load_data_infile.rb +0 -141
- data/lib/chicago/etl/sink.rb +0 -61
- data/lib/chicago/etl/transformations/add_insert_timestamp.rb +0 -16
- data/spec/etl/mysql_dumpfile_spec.rb +0 -42
- data/spec/etl/mysql_load_file_value_transformer_spec.rb +0 -27
- data/spec/etl/screens/composite_screen_spec.rb +0 -25
- data/spec/etl/sequel/load_data_infile_expression_spec.rb +0 -60
- data/spec/etl/sequel/load_data_infile_spec.rb +0 -37
- data/spec/etl/sink_spec.rb +0 -7
- data/spec/etl/transformations/add_insert_timestamp_spec.rb +0 -9
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Chicago::Flow::ArraySource do
|
4
|
+
it "has an each method that yields rows" do
|
5
|
+
described_class.new([{:a => 1}]).each do |row|
|
6
|
+
row.should == {:a => 1}
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
it "doesn't know about any fields rows have by default" do
|
11
|
+
described_class.new([]).fields.should == []
|
12
|
+
described_class.new([]).should_not have_defined_fields
|
13
|
+
end
|
14
|
+
|
15
|
+
it "can optionally define which fields will be in rows" do
|
16
|
+
described_class.new([], [:a, :b]).fields.should == [:a, :b]
|
17
|
+
described_class.new([], :a).fields.should == [:a]
|
18
|
+
described_class.new([], :a).should have_defined_fields
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Chicago::Flow::DatasetSource do
|
4
|
+
let(:dataset) { stub(:dataset) }
|
5
|
+
|
6
|
+
it "should delegtate each to the dataset" do
|
7
|
+
dataset.should_receive(:each)
|
8
|
+
described_class.new(dataset).each {|row| }
|
9
|
+
end
|
10
|
+
|
11
|
+
it "gets columns from the dataset" do
|
12
|
+
dataset.should_receive(:columns)
|
13
|
+
described_class.new(dataset).fields
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Chicago::Flow::Filter do
|
4
|
+
it "filters all rows by default" do
|
5
|
+
subject.process({:a => 1}).should be_nil
|
6
|
+
end
|
7
|
+
|
8
|
+
it "filters rows given a block" do
|
9
|
+
filter = described_class.new {|row| row.has_key?(:a) }
|
10
|
+
filter.process(:a => 1).should == {:a => 1}
|
11
|
+
filter.process(:b => 1).should be_nil
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Chicago::Flow::MysqlFileSerializer do
|
4
|
+
it "serializes nil into NULL" do
|
5
|
+
subject.serialize(nil).should == "NULL"
|
6
|
+
end
|
7
|
+
|
8
|
+
it "serializes true into '1'" do
|
9
|
+
subject.serialize(true).should == "1"
|
10
|
+
end
|
11
|
+
|
12
|
+
it "serializes false into '0'" do
|
13
|
+
subject.serialize(false).should == "0"
|
14
|
+
end
|
15
|
+
|
16
|
+
it "serializes times into mysql time format" do
|
17
|
+
subject.serialize(Time.local(2011,01,02,10,30,50)).should == "2011-01-02 10:30:50"
|
18
|
+
end
|
19
|
+
|
20
|
+
it "serializes datetimes into mysql time format" do
|
21
|
+
subject.serialize(DateTime.new(2011,01,02,10,30,50)).should == "2011-01-02 10:30:50"
|
22
|
+
end
|
23
|
+
|
24
|
+
it "serializes dates into mysql date format" do
|
25
|
+
subject.serialize(Date.new(2011,01,02)).should == "2011-01-02"
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'sequel'
|
3
|
+
|
4
|
+
describe Chicago::Flow::MysqlFileSink do
|
5
|
+
let(:dataset) { mock(:dataset).as_null_object }
|
6
|
+
let(:db) { mock(:db, :[] => dataset, :schema => []) }
|
7
|
+
let(:csv) { mock(:csv) }
|
8
|
+
|
9
|
+
let(:sink) {
|
10
|
+
described_class.new(db, :table, [:foo], :filepath => "test_file")
|
11
|
+
}
|
12
|
+
|
13
|
+
before :each do
|
14
|
+
CSV.stub(:open).and_return(csv)
|
15
|
+
csv.stub(:<<)
|
16
|
+
csv.stub(:close).and_return(csv)
|
17
|
+
csv.stub(:flush)
|
18
|
+
|
19
|
+
File.stub(:size?).and_return(true)
|
20
|
+
end
|
21
|
+
|
22
|
+
it "has the same name as the table it is loading into" do
|
23
|
+
sink.name.should == :table
|
24
|
+
end
|
25
|
+
|
26
|
+
it "writes specified columns to rows in a file" do
|
27
|
+
csv.should_receive(:<<).with([1])
|
28
|
+
sink << {:foo => 1, :bar => 2}
|
29
|
+
end
|
30
|
+
|
31
|
+
it "serializes values before writing to the file" do
|
32
|
+
Chicago::Flow::MysqlFileSerializer.any_instance.
|
33
|
+
should_receive(:serialize).with(1).and_return(1)
|
34
|
+
sink << {:foo => 1}
|
35
|
+
end
|
36
|
+
|
37
|
+
it "has defined fields" do
|
38
|
+
sink.should have_defined_fields
|
39
|
+
sink.fields.should == [:foo]
|
40
|
+
end
|
41
|
+
|
42
|
+
it "loads the csv file into the database when closed" do
|
43
|
+
dataset.should_receive(:load_csv_infile).
|
44
|
+
with("test_file", [:foo], :set => {})
|
45
|
+
sink.close
|
46
|
+
end
|
47
|
+
|
48
|
+
it "uses the :set hash to load constant values" do
|
49
|
+
sink.set_constant_values(:bar => 1).should == sink
|
50
|
+
dataset.should_receive(:load_csv_infile).
|
51
|
+
with("test_file", [:foo], :set => {:bar => 1})
|
52
|
+
sink.close
|
53
|
+
end
|
54
|
+
|
55
|
+
it "does not IGNORE rows by default" do
|
56
|
+
dataset.should_not_receive(:insert_ignore)
|
57
|
+
sink.close
|
58
|
+
end
|
59
|
+
|
60
|
+
it "can specify that INSERT IGNORE should be used" do
|
61
|
+
dataset.should_receive(:insert_ignore)
|
62
|
+
described_class.new(db, :table, [:foo],
|
63
|
+
:filepath => "test_file", :ignore => true).close
|
64
|
+
end
|
65
|
+
|
66
|
+
it "writes csv to a tempfile if no explicit filepath is given" do
|
67
|
+
described_class.new(db, :table, [:foo]).filepath.should match(/table\.\d+\.csv/)
|
68
|
+
end
|
69
|
+
|
70
|
+
it "doesn't attempt to load data if the file is empty or does not exist" do
|
71
|
+
File.stub(:size?).and_return(false)
|
72
|
+
dataset.should_not_receive(:load_csv_infile)
|
73
|
+
sink.close
|
74
|
+
end
|
75
|
+
|
76
|
+
it "removes the temporary file when closed" do
|
77
|
+
File.stub(:exists?).and_return(true)
|
78
|
+
File.should_receive(:unlink).with("test_file")
|
79
|
+
|
80
|
+
sink.close
|
81
|
+
end
|
82
|
+
|
83
|
+
it "truncates the table by default" do
|
84
|
+
dataset.should_receive(:truncate)
|
85
|
+
sink.truncate
|
86
|
+
end
|
87
|
+
|
88
|
+
it "can have a truncation strategy set" do
|
89
|
+
x = nil
|
90
|
+
sink.truncation_strategy = lambda { x = "deleted table" }
|
91
|
+
sink.truncate
|
92
|
+
x.should == "deleted table"
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe "Mysql -> Mysql through transformation chain" do
|
4
|
+
let(:dup_row) {
|
5
|
+
Class.new(Chicago::Flow::Transformation) {
|
6
|
+
def output_streams
|
7
|
+
[:default, @options[:onto]].flatten
|
8
|
+
end
|
9
|
+
|
10
|
+
def process_row(row)
|
11
|
+
new_row = assign_stream(row.dup, @options[:onto])
|
12
|
+
[row, new_row]
|
13
|
+
end
|
14
|
+
}
|
15
|
+
}
|
16
|
+
|
17
|
+
before :all do
|
18
|
+
unless TEST_DB.table_exists?(:source)
|
19
|
+
TEST_DB.create_table(:source) do
|
20
|
+
primary_key :id
|
21
|
+
varchar :foo
|
22
|
+
binary :bin, :size => 1
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
unless TEST_DB.table_exists?(:destination)
|
27
|
+
TEST_DB.create_table(:destination) do
|
28
|
+
primary_key :id
|
29
|
+
varchar :foo
|
30
|
+
binary :bin, :size => 1
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
before :each do
|
36
|
+
TEST_DB[:source].truncate
|
37
|
+
TEST_DB[:destination].truncate
|
38
|
+
end
|
39
|
+
|
40
|
+
after :each do
|
41
|
+
TEST_DB[:source].truncate
|
42
|
+
TEST_DB[:destination].truncate
|
43
|
+
end
|
44
|
+
|
45
|
+
it "copies data from source to destination" do
|
46
|
+
TEST_DB[:source].multi_insert([{:foo => nil, :bin => :unhex.sql_function("1F")},
|
47
|
+
{:foo => "Hello", :bin => :unhex.sql_function("1F")}])
|
48
|
+
|
49
|
+
source = Chicago::Flow::DatasetSource.
|
50
|
+
new(TEST_DB[:source].
|
51
|
+
select(:id, :foo, :hex.sql_function(:bin).as(:bin)))
|
52
|
+
sink_1 = Chicago::Flow::MysqlFileSink.
|
53
|
+
new(TEST_DB, :destination, [:id, :foo, :bin])
|
54
|
+
sink_2 = Chicago::Flow::ArraySink.new([:id, :foo, :bin])
|
55
|
+
|
56
|
+
stage = Chicago::Flow::PipelineStage.
|
57
|
+
new(:transformations => [dup_row.new(:onto => :other)])
|
58
|
+
|
59
|
+
expect { stage.execute(source) }.to raise_error
|
60
|
+
|
61
|
+
stage.register_sink(:default, sink_1)
|
62
|
+
stage.register_sink(:other, sink_2)
|
63
|
+
|
64
|
+
stage.execute(source)
|
65
|
+
|
66
|
+
expected = [{:id => 1, :foo => nil, :bin => "1F"},
|
67
|
+
{:id => 2, :foo => "Hello", :bin => "1F"}]
|
68
|
+
|
69
|
+
sink_2.data.should == expected
|
70
|
+
TEST_DB[:destination].select(:id, :foo, :hex.sql_function(:bin).as(:bin)).all.should == expected
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Chicago::Flow::PipelineStage do
|
4
|
+
let(:transform) {
|
5
|
+
Class.new(Chicago::Flow::Transformation) {
|
6
|
+
def process_row(row)
|
7
|
+
row[:a] += 1
|
8
|
+
row
|
9
|
+
end
|
10
|
+
}
|
11
|
+
}
|
12
|
+
|
13
|
+
let(:add_error) {
|
14
|
+
Class.new(Chicago::Flow::Transformation) {
|
15
|
+
# add_output_stream :error
|
16
|
+
def output_streams
|
17
|
+
[:default, :error]
|
18
|
+
end
|
19
|
+
|
20
|
+
def process_row(row)
|
21
|
+
[row, {Chicago::Flow::STREAM => :error, :message => "error"}]
|
22
|
+
end
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
let(:sink) { Chicago::Flow::ArraySink.new(:test) }
|
27
|
+
let(:source) { Chicago::Flow::ArraySource.new([{:a => 1}]) }
|
28
|
+
|
29
|
+
it "returns all sinks" do
|
30
|
+
stage = described_class.new.register_sink(:default, sink)
|
31
|
+
stage.sinks.should == [sink]
|
32
|
+
end
|
33
|
+
|
34
|
+
it "returns a sink by name" do
|
35
|
+
stage = described_class.new.register_sink(:default, sink)
|
36
|
+
stage.sink(:default).should == sink
|
37
|
+
end
|
38
|
+
|
39
|
+
it "reads from source to sink" do
|
40
|
+
pipeline = described_class.new.register_sink(:default, sink)
|
41
|
+
pipeline.execute(source)
|
42
|
+
sink.data.should == [{:a => 1}]
|
43
|
+
end
|
44
|
+
|
45
|
+
it "passes rows through transforms" do
|
46
|
+
pipeline = described_class.new(:transformations => [transform.new]).
|
47
|
+
register_sink(:default, sink)
|
48
|
+
|
49
|
+
pipeline.execute(source)
|
50
|
+
sink.data.should == [{:a => 2}]
|
51
|
+
end
|
52
|
+
|
53
|
+
it "writes rows to the appropriate sink for their stream, and strips the stream tag" do
|
54
|
+
error_sink = Chicago::Flow::ArraySink.new(:test)
|
55
|
+
|
56
|
+
pipeline = described_class.new(:transformations => [add_error.new]).
|
57
|
+
register_sink(:default, sink).
|
58
|
+
register_sink(:error, error_sink)
|
59
|
+
|
60
|
+
pipeline.execute(source)
|
61
|
+
sink.data.should == [{:a => 1}]
|
62
|
+
error_sink.data.should == [{:message => "error"}]
|
63
|
+
end
|
64
|
+
|
65
|
+
it "calls an error handler if sinks are not registered" do
|
66
|
+
error_handler = mock(:error_handler)
|
67
|
+
error_handler.should_receive(:unregistered_sinks).
|
68
|
+
with([:default, :error])
|
69
|
+
|
70
|
+
pipeline = described_class.new(:transformations => [add_error.new],
|
71
|
+
:error_handler => error_handler)
|
72
|
+
|
73
|
+
pipeline.validate_pipeline
|
74
|
+
end
|
75
|
+
|
76
|
+
it "by default raises an exception if the pipeline is not valid when executed" do
|
77
|
+
pipeline = described_class.new(:transformations => [add_error.new])
|
78
|
+
expect { pipeline.execute(source) }.to raise_error(Chicago::Flow::Error)
|
79
|
+
end
|
80
|
+
|
81
|
+
it "opens sinks before writing and closes them afterwards" do
|
82
|
+
sink = mock(:sink)
|
83
|
+
pipeline = described_class.new.register_sink(:default, sink)
|
84
|
+
sink.should_receive(:open)
|
85
|
+
sink.stub(:<<)
|
86
|
+
sink.should_receive(:close)
|
87
|
+
pipeline.execute(source)
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Chicago::Flow::TransformationChain do
|
4
|
+
let(:add_1_to_a) {
|
5
|
+
Class.new(Chicago::Flow::Transformation) {
|
6
|
+
def process_row(row)
|
7
|
+
row[:a] += 1
|
8
|
+
row
|
9
|
+
end
|
10
|
+
}
|
11
|
+
}
|
12
|
+
|
13
|
+
let(:dup_row) {
|
14
|
+
Class.new(Chicago::Flow::Transformation) {
|
15
|
+
def output_streams
|
16
|
+
[:default, @options[:onto]].flatten
|
17
|
+
end
|
18
|
+
|
19
|
+
def process_row(row)
|
20
|
+
new_row = assign_stream(row.dup, @options[:onto])
|
21
|
+
[row, new_row]
|
22
|
+
end
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
let(:store_until_flush) {
|
27
|
+
Class.new(Chicago::Flow::Transformation) {
|
28
|
+
def process_row(row)
|
29
|
+
@cache ||= []
|
30
|
+
@cache << row
|
31
|
+
nil
|
32
|
+
end
|
33
|
+
|
34
|
+
def flush
|
35
|
+
@cache
|
36
|
+
end
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
it "chains transformations" do
|
41
|
+
described_class.new(add_1_to_a.new, add_1_to_a.new).process({:a => 1}).
|
42
|
+
should == [{:a => 3}]
|
43
|
+
end
|
44
|
+
|
45
|
+
it "can cope with multiple return rows from transformations" do
|
46
|
+
described_class.new(add_1_to_a.new, dup_row.new, add_1_to_a.new).process({:a => 1}).
|
47
|
+
should == [{:a => 3}, {:a => 3}]
|
48
|
+
end
|
49
|
+
|
50
|
+
it "can cope with a filter returning nil" do
|
51
|
+
described_class.new(Chicago::Flow::Filter.new,
|
52
|
+
dup_row.new, add_1_to_a.new).process({:a => 1}).
|
53
|
+
should == []
|
54
|
+
end
|
55
|
+
|
56
|
+
it "can write to different streams" do
|
57
|
+
described_class.new(dup_row.new(:onto => :other),
|
58
|
+
add_1_to_a.new).process({:a => 1}).
|
59
|
+
should == [{:a => 2}, {:a => 1, Chicago::Flow::STREAM => :other}]
|
60
|
+
end
|
61
|
+
|
62
|
+
it "knows what streams it writes to as a chain" do
|
63
|
+
described_class.new(dup_row.new(:onto => :other),
|
64
|
+
add_1_to_a.new).output_streams.should == [:default, :other]
|
65
|
+
end
|
66
|
+
|
67
|
+
it "can flush rows held back by transforms" do
|
68
|
+
chain = described_class.new(store_until_flush.new,
|
69
|
+
add_1_to_a.new,
|
70
|
+
store_until_flush.new,
|
71
|
+
add_1_to_a.new)
|
72
|
+
chain.process({:a => 1}).should == []
|
73
|
+
chain.process({:a => 2}).should == []
|
74
|
+
chain.flush.should == [{:a => 3}, {:a => 4}]
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Chicago::Flow::Transformation do
|
4
|
+
let(:add_1_to_a) {
|
5
|
+
Class.new(described_class) {
|
6
|
+
def process_row(row)
|
7
|
+
row[:a] += 1
|
8
|
+
row
|
9
|
+
end
|
10
|
+
}
|
11
|
+
}
|
12
|
+
|
13
|
+
let(:add_and_remove) {
|
14
|
+
Class.new(described_class) {
|
15
|
+
adds_fields :b, :c
|
16
|
+
removes_fields :a
|
17
|
+
|
18
|
+
def process_row(row)
|
19
|
+
row.delete(:a)
|
20
|
+
row[:b] = 1
|
21
|
+
row[:c] = 2
|
22
|
+
row
|
23
|
+
end
|
24
|
+
}
|
25
|
+
}
|
26
|
+
|
27
|
+
it "writes to the :default stream by default" do
|
28
|
+
subject.output_streams.should == [:default]
|
29
|
+
end
|
30
|
+
|
31
|
+
it "may apply to a particular stream" do
|
32
|
+
subject.applies_to_stream?(:default).should be_true
|
33
|
+
subject.applies_to_stream?(nil).should be_true
|
34
|
+
described_class.new(:other).applies_to_stream?(:default).should be_false
|
35
|
+
described_class.new(:other).applies_to_stream?(:other).should be_true
|
36
|
+
end
|
37
|
+
|
38
|
+
it "processes a row via #process_row" do
|
39
|
+
add_1_to_a.new.process({:a => 1}).should == {:a => 2}
|
40
|
+
end
|
41
|
+
|
42
|
+
it "passes through rows not on its stream" do
|
43
|
+
add_1_to_a.new(:other).process({:a => 1}).should == {:a => 1}
|
44
|
+
end
|
45
|
+
|
46
|
+
it "can apply to all streams using :all" do
|
47
|
+
add_1_to_a.new(:all).process({:a => 1}).should == {:a => 2}
|
48
|
+
add_1_to_a.new(:all).process({:a => 1, Chicago::Flow::STREAM => :other}).
|
49
|
+
should == {:a => 2, Chicago::Flow::STREAM => :other}
|
50
|
+
end
|
51
|
+
|
52
|
+
it "can be flushed" do
|
53
|
+
subject.flush.should == []
|
54
|
+
end
|
55
|
+
|
56
|
+
it "can specify which fields are added" do
|
57
|
+
add_and_remove.new.added_fields.should == [:b, :c]
|
58
|
+
end
|
59
|
+
|
60
|
+
it "can specify which fields are removed" do
|
61
|
+
add_and_remove.new.removed_fields.should == [:a]
|
62
|
+
end
|
63
|
+
|
64
|
+
it "can calculate downstream fields" do
|
65
|
+
Set.new(add_and_remove.new.downstream_fields([:a, :b, :d])).
|
66
|
+
should == Set.new([:b, :c, :d])
|
67
|
+
end
|
68
|
+
|
69
|
+
it "can calculate upstream fields" do
|
70
|
+
Set.new(add_and_remove.new.upstream_fields([:b, :c, :d])).
|
71
|
+
should == Set.new([:a, :d])
|
72
|
+
end
|
73
|
+
|
74
|
+
it "has an empty array of added fields by default" do
|
75
|
+
subject.added_fields.should == []
|
76
|
+
end
|
77
|
+
|
78
|
+
it "has an empty array of removed fields by default" do
|
79
|
+
subject.removed_fields.should == []
|
80
|
+
end
|
81
|
+
|
82
|
+
it "has an empty array of required options by default" do
|
83
|
+
subject.required_options.should == []
|
84
|
+
end
|
85
|
+
|
86
|
+
it "can enforce options" do
|
87
|
+
klass = Class.new(described_class) { requires_options :foo }
|
88
|
+
expect { klass.new }.to raise_error(ArgumentError)
|
89
|
+
expect { klass.new(:foo => :bar) }.to_not raise_error(ArgumentError)
|
90
|
+
end
|
91
|
+
end
|