chicago-etl 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +16 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +21 -0
- data/Rakefile +42 -0
- data/VERSION +1 -0
- data/chicago-etl.gemspec +117 -0
- data/lib/chicago/etl/batch.rb +110 -0
- data/lib/chicago/etl/buffering_insert_writer.rb +36 -0
- data/lib/chicago/etl/counter.rb +36 -0
- data/lib/chicago/etl/key_builder.rb +198 -0
- data/lib/chicago/etl/load_dataset_builder.rb +75 -0
- data/lib/chicago/etl/mysql_dumpfile.rb +32 -0
- data/lib/chicago/etl/mysql_load_file_value_transformer.rb +24 -0
- data/lib/chicago/etl/screens/column_screen.rb +59 -0
- data/lib/chicago/etl/screens/composite_screen.rb +17 -0
- data/lib/chicago/etl/screens/invalid_element.rb +27 -0
- data/lib/chicago/etl/screens/missing_value.rb +22 -0
- data/lib/chicago/etl/screens/out_of_bounds.rb +33 -0
- data/lib/chicago/etl/sequel/dependant_tables.rb +48 -0
- data/lib/chicago/etl/sequel/filter_to_etl_batch.rb +53 -0
- data/lib/chicago/etl/sequel/load_data_infile.rb +19 -0
- data/lib/chicago/etl/sink.rb +61 -0
- data/lib/chicago/etl/table_builder.rb +45 -0
- data/lib/chicago/etl/task_invocation.rb +32 -0
- data/lib/chicago/etl/tasks.rb +34 -0
- data/lib/chicago/etl/transformations/add_insert_timestamp.rb +16 -0
- data/lib/chicago/etl/transformations/uk_post_code.rb +40 -0
- data/lib/chicago/etl/transformations/uk_post_code_field.rb +59 -0
- data/lib/chicago/etl.rb +35 -0
- data/lib/chicago-etl.rb +0 -0
- data/spec/db_connections.yml.dist +4 -0
- data/spec/etl/batch_spec.rb +86 -0
- data/spec/etl/counter_spec.rb +44 -0
- data/spec/etl/etl_batch_id_dataset_filter.rb +29 -0
- data/spec/etl/key_builder_spec.rb +190 -0
- data/spec/etl/load_dataset_builder_spec.rb +86 -0
- data/spec/etl/mysql_dumpfile_spec.rb +42 -0
- data/spec/etl/mysql_load_file_value_transformer_spec.rb +27 -0
- data/spec/etl/screens/composite_screen_spec.rb +25 -0
- data/spec/etl/screens/invalid_element_spec.rb +27 -0
- data/spec/etl/screens/missing_value_spec.rb +58 -0
- data/spec/etl/screens/out_of_bounds_spec.rb +64 -0
- data/spec/etl/sequel/dependant_tables_spec.rb +41 -0
- data/spec/etl/sequel/filter_to_etl_batch_spec.rb +54 -0
- data/spec/etl/sequel/load_data_infile_spec.rb +37 -0
- data/spec/etl/sink_spec.rb +7 -0
- data/spec/etl/table_builder_spec.rb +22 -0
- data/spec/etl/task_spec.rb +87 -0
- data/spec/etl/transformations/add_insert_timestamp_spec.rb +9 -0
- data/spec/etl/transformations/uk_post_code_field_spec.rb +95 -0
- data/spec/etl/transformations/uk_post_code_spec.rb +102 -0
- data/spec/spec_helper.rb +20 -0
- metadata +245 -0
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Chicago::ETL::LoadDatasetBuilder do
|
4
|
+
let(:db) { stub(:database).as_null_object }
|
5
|
+
|
6
|
+
before :each do
|
7
|
+
db.stub(:[]).with(:original_users).
|
8
|
+
and_return(TEST_DB[:original_users])
|
9
|
+
db.stub(:[]).with(:original_preferences).
|
10
|
+
and_return(TEST_DB[:original_preferences])
|
11
|
+
db[:original_users].stub(:columns).
|
12
|
+
and_return([:id, :name, :email])
|
13
|
+
db[:original_preferences].stub(:columns).
|
14
|
+
and_return([:id, :spam])
|
15
|
+
end
|
16
|
+
|
17
|
+
it "selects from the specified table" do
|
18
|
+
subject.table(:original_users)
|
19
|
+
subject.build(db, [:name]).opts[:from].should == [:original_users]
|
20
|
+
end
|
21
|
+
|
22
|
+
it "selects the columns from the table" do
|
23
|
+
subject.configure { table(:original_users) }
|
24
|
+
|
25
|
+
subject.build(db, [:id, :name]).opts[:select].should == [:id.qualify(:original_users), :name.qualify(:original_users)]
|
26
|
+
end
|
27
|
+
|
28
|
+
it "can handle column renaming" do
|
29
|
+
subject.configure do
|
30
|
+
table :original_users
|
31
|
+
provide :original_id, :id
|
32
|
+
end
|
33
|
+
|
34
|
+
subject.build(db, [:original_id, :name]).opts[:select].
|
35
|
+
should == [:id.qualify(:original_users).as(:original_id), :name.qualify(:original_users)]
|
36
|
+
end
|
37
|
+
|
38
|
+
it "can provide constructed columns" do
|
39
|
+
subject.configure do
|
40
|
+
table :original_users
|
41
|
+
provide :original_id, :foo.qualify(:bar)
|
42
|
+
end
|
43
|
+
|
44
|
+
subject.build(db, [:original_id, :name]).opts[:select].
|
45
|
+
should == [:foo.qualify(:bar).as(:original_id), :name.qualify(:original_users)]
|
46
|
+
end
|
47
|
+
|
48
|
+
it "left outer joins a denormalized table" do
|
49
|
+
subject.configure do
|
50
|
+
table :original_users
|
51
|
+
denormalize :original_preferences, :id => :id
|
52
|
+
end
|
53
|
+
|
54
|
+
subject.build(db, [:id, :name]).sql.should =~ /LEFT OUTER JOIN `original_preferences` ON \(`original_preferences`.`id` = `original_users`.`id`\)/
|
55
|
+
end
|
56
|
+
|
57
|
+
it "takes columns from the appropriate tables where possible" do
|
58
|
+
subject.configure do
|
59
|
+
table :original_users
|
60
|
+
denormalize :original_preferences, :id => :id
|
61
|
+
end
|
62
|
+
|
63
|
+
subject.build(db, [:id, :name, :spam]).opts[:select].
|
64
|
+
should == [:id.qualify(:original_users),
|
65
|
+
:name.qualify(:original_users),
|
66
|
+
:spam.qualify(:original_preferences)]
|
67
|
+
end
|
68
|
+
|
69
|
+
it "takes renames columns from denormalized tables" do
|
70
|
+
subject.configure do
|
71
|
+
table :original_users
|
72
|
+
denormalize :original_preferences, :id => :id
|
73
|
+
provide :email_allowed, :spam
|
74
|
+
end
|
75
|
+
|
76
|
+
subject.build(db, [:id, :name, :email_allowed]).opts[:select].
|
77
|
+
should include(:spam.qualify(:original_preferences).as(:email_allowed))
|
78
|
+
end
|
79
|
+
|
80
|
+
it "automatically renames ids of denormalized tables" do
|
81
|
+
subject.configure do
|
82
|
+
table :original_users
|
83
|
+
denormalize :original_preferences, :id => :id
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Chicago::ETL::MysqlDumpfile do
|
4
|
+
before :each do
|
5
|
+
@csv = mock(:csv)
|
6
|
+
end
|
7
|
+
|
8
|
+
it "outputs specified column values in order" do
|
9
|
+
dumpfile = described_class.new(@csv, [:foo, :bar])
|
10
|
+
@csv.should_receive(:<<).with(["1", "2"])
|
11
|
+
|
12
|
+
dumpfile << {:foo => "1", :bar => "2", :baz => "not output"}
|
13
|
+
end
|
14
|
+
|
15
|
+
it "transforms values with a MysqlLoadFileValueTransformer" do
|
16
|
+
transformer = mock(:transformer)
|
17
|
+
Chicago::ETL::MysqlLoadFileValueTransformer.stub(:new).and_return(transformer)
|
18
|
+
|
19
|
+
transformer.should_receive(:transform).with("bar").and_return("baz")
|
20
|
+
@csv.should_receive(:<<).with(["baz"])
|
21
|
+
|
22
|
+
dumpfile = described_class.new(@csv, [:foo])
|
23
|
+
dumpfile << {:foo => "bar"}
|
24
|
+
end
|
25
|
+
|
26
|
+
it "will write a row only once with the same key" do
|
27
|
+
dumpfile = described_class.new(@csv, [:foo], :id)
|
28
|
+
@csv.should_receive(:<<).with(["bar"])
|
29
|
+
|
30
|
+
dumpfile << {:id => 1, :foo => "bar"}
|
31
|
+
dumpfile << {:id => 1, :foo => "baz"}
|
32
|
+
end
|
33
|
+
|
34
|
+
it "will write a row multiple times if no key is specified" do
|
35
|
+
dumpfile = described_class.new(@csv, [:foo])
|
36
|
+
@csv.should_receive(:<<).with(["bar"])
|
37
|
+
@csv.should_receive(:<<).with(["baz"])
|
38
|
+
|
39
|
+
dumpfile << {:id => 1, :foo => "bar"}
|
40
|
+
dumpfile << {:id => 1, :foo => "baz"}
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Chicago::ETL::MysqlLoadFileValueTransformer do
|
4
|
+
it "transforms nil into \\N" do
|
5
|
+
subject.transform(nil).should == "\\N"
|
6
|
+
end
|
7
|
+
|
8
|
+
it "transforms true into '1'" do
|
9
|
+
subject.transform(true).should == "1"
|
10
|
+
end
|
11
|
+
|
12
|
+
it "transforms false into '0'" do
|
13
|
+
subject.transform(false).should == "0"
|
14
|
+
end
|
15
|
+
|
16
|
+
it "transforms times into mysql time format" do
|
17
|
+
subject.transform(Time.local(2011,01,02,10,30,50)).should == "2011-01-02 10:30:50"
|
18
|
+
end
|
19
|
+
|
20
|
+
it "transforms datetimes into mysql time format" do
|
21
|
+
subject.transform(DateTime.new(2011,01,02,10,30,50)).should == "2011-01-02 10:30:50"
|
22
|
+
end
|
23
|
+
|
24
|
+
it "transforms dates into mysql date format" do
|
25
|
+
subject.transform(Date.new(2011,01,02)).should == "2011-01-02"
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Chicago::ETL::Screens::CompositeScreen do
|
4
|
+
let(:screen) do
|
5
|
+
i = 0
|
6
|
+
|
7
|
+
lambda {|row, errors|
|
8
|
+
i += 1
|
9
|
+
errors << i
|
10
|
+
[row, errors]
|
11
|
+
}
|
12
|
+
end
|
13
|
+
|
14
|
+
it "calls all child screens" do
|
15
|
+
row, errors = described_class.new([screen, screen]).call({:a => 1}, [])
|
16
|
+
row.should == {:a => 1}
|
17
|
+
errors.should == [1,2]
|
18
|
+
end
|
19
|
+
|
20
|
+
it "supports variable arguments in the constructor" do
|
21
|
+
row, errors = described_class.new(screen, screen).call({:a => 1}, [])
|
22
|
+
row.should == {:a => 1}
|
23
|
+
errors.should == [1,2]
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Chicago::ETL::Screens::InvalidElement do
|
4
|
+
let(:enum_col) {
|
5
|
+
Chicago::Schema::Column.new(:enum, :string, :elements => ["Foo", "Unknown"], :default => "Unknown", :optional => true)
|
6
|
+
}
|
7
|
+
|
8
|
+
it "has a severity of 3" do
|
9
|
+
described_class.new(:dimension_foo, enum_col).severity.should == 3
|
10
|
+
end
|
11
|
+
|
12
|
+
it "reports invalid element for enum columns" do
|
13
|
+
row, errors = described_class.new(:dimension_foo, enum_col).
|
14
|
+
call({:enum => "Bar"})
|
15
|
+
row.should == {:enum => 'Unknown'}
|
16
|
+
|
17
|
+
errors.first[:error].should == "Invalid Element"
|
18
|
+
end
|
19
|
+
|
20
|
+
it "does not report a valid element" do
|
21
|
+
row, errors = described_class.new(:dimension_foo, enum_col).
|
22
|
+
call({:enum => "foo"})
|
23
|
+
row.should == {:enum => 'foo'}
|
24
|
+
|
25
|
+
errors.should be_empty
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Chicago::ETL::Screens::MissingValue do
|
4
|
+
let(:string_col) { Chicago::Schema::Column.new(:str, :string) }
|
5
|
+
let(:int_col) { Chicago::Schema::Column.new(:int, :integer) }
|
6
|
+
let(:bool_col) { Chicago::Schema::Column.new(:bool, :boolean) }
|
7
|
+
|
8
|
+
let(:descriptive_col) {
|
9
|
+
Chicago::Schema::Column.new(:str, :string, :descriptive => true)
|
10
|
+
}
|
11
|
+
|
12
|
+
let(:optional_col) {
|
13
|
+
Chicago::Schema::Column.new(:str, :string, :optional => true)
|
14
|
+
}
|
15
|
+
|
16
|
+
it "reports nil in an expected column as a missing value, with severity 2" do
|
17
|
+
row, errors = described_class.new(:dimension_foo, string_col).call({})
|
18
|
+
|
19
|
+
errors.first[:table].should == "dimension_foo"
|
20
|
+
errors.first[:column].should == "str"
|
21
|
+
errors.first[:error].should == "Missing Value"
|
22
|
+
errors.first[:severity].should == 2
|
23
|
+
end
|
24
|
+
|
25
|
+
it "reports an empty string value in an expected column as a missing value" do
|
26
|
+
row, errors = described_class.new(:dimension_foo, string_col).
|
27
|
+
call({:str => " "})
|
28
|
+
|
29
|
+
errors.first[:error].should == "Missing Value"
|
30
|
+
end
|
31
|
+
|
32
|
+
it "does not report 0 as a missing value" do
|
33
|
+
row, errors = described_class.new(:dimension_foo, int_col).
|
34
|
+
call({:int => 0})
|
35
|
+
|
36
|
+
errors.should be_empty
|
37
|
+
end
|
38
|
+
|
39
|
+
it "reports missing values with severity 1 if the column is descriptive" do
|
40
|
+
row, errors = described_class.new(:dimension_foo, descriptive_col).call({})
|
41
|
+
errors.first[:severity].should == 1
|
42
|
+
end
|
43
|
+
|
44
|
+
it "does not report boolean values as missing" do
|
45
|
+
row, errors = described_class.new(:dimension_foo, bool_col).call({})
|
46
|
+
errors.should be_empty
|
47
|
+
end
|
48
|
+
|
49
|
+
it "does not report optional columns as missing values" do
|
50
|
+
row, errors = described_class.new(:dimension_foo, optional_col).call({})
|
51
|
+
errors.should be_empty
|
52
|
+
end
|
53
|
+
|
54
|
+
it "fills in a default value for missing values" do
|
55
|
+
row, errors = described_class.new(:dimension_foo, optional_col).call({})
|
56
|
+
row.should == {:str => ''}
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Chicago::ETL::Screens::OutOfBounds do
|
4
|
+
let(:int_col) {
|
5
|
+
Chicago::Schema::Column.new(:int, :integer, :min => 0, :max => 100)
|
6
|
+
}
|
7
|
+
|
8
|
+
let(:str_col) {
|
9
|
+
Chicago::Schema::Column.new(:str, :string, :min => 2, :max => 5)
|
10
|
+
}
|
11
|
+
|
12
|
+
it "applies to numeric columns when the value is lower than the minimum" do
|
13
|
+
row, errors = described_class.new(:dimension_foo, int_col).
|
14
|
+
call(:int => -1)
|
15
|
+
|
16
|
+
errors.first[:error].should == "Out Of Bounds"
|
17
|
+
end
|
18
|
+
|
19
|
+
it "applies to numeric columns when the value is above the minimum" do
|
20
|
+
row, errors = described_class.new(:dimension_foo, int_col).
|
21
|
+
call(:int => 101)
|
22
|
+
|
23
|
+
errors.first[:error].should == "Out Of Bounds"
|
24
|
+
end
|
25
|
+
|
26
|
+
it "applies to string columns when the number of chars is below minimum" do
|
27
|
+
row, errors = described_class.new(:dimension_foo, str_col).
|
28
|
+
call(:str => "a")
|
29
|
+
|
30
|
+
errors.first[:error].should == "Out Of Bounds"
|
31
|
+
end
|
32
|
+
|
33
|
+
it "applies to string columns when the number of chars is above maximum" do
|
34
|
+
row, errors = described_class.new(:dimension_foo, str_col).
|
35
|
+
call(:str => "abcdef")
|
36
|
+
|
37
|
+
errors.first[:error].should == "Out Of Bounds"
|
38
|
+
end
|
39
|
+
|
40
|
+
it "does not apply to string values in range" do
|
41
|
+
row, errors = described_class.new(:dimension_foo, str_col).
|
42
|
+
call(:str => "abcde")
|
43
|
+
|
44
|
+
errors.should be_empty
|
45
|
+
end
|
46
|
+
|
47
|
+
it "does not apply to numeric values in range" do
|
48
|
+
row, errors = described_class.new(:dimension_foo, int_col).
|
49
|
+
call(:int => 0)
|
50
|
+
|
51
|
+
errors.should be_empty
|
52
|
+
end
|
53
|
+
|
54
|
+
it "has severity 2" do
|
55
|
+
described_class.new(:dimension_foo, int_col).severity.should == 2
|
56
|
+
end
|
57
|
+
|
58
|
+
it "does not replace values with default" do
|
59
|
+
row, errors = described_class.new(:dimension_foo, str_col).
|
60
|
+
call(:str => "a")
|
61
|
+
|
62
|
+
row.should == {:str => "a"}
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Chicago::ETL::SequelExtensions::DependantTables do
|
4
|
+
it "returns the table in the from clause" do
|
5
|
+
TEST_DB[:foo].dependant_tables.should == [:foo]
|
6
|
+
end
|
7
|
+
|
8
|
+
it "returns tables from join clauses" do
|
9
|
+
TEST_DB[:foo].join(:bar).join(:baz).dependant_tables.
|
10
|
+
should == [:foo, :bar, :baz]
|
11
|
+
end
|
12
|
+
|
13
|
+
it "returns unique real tables from join clauses when aliased" do
|
14
|
+
TEST_DB[:foo].join(:bar).join(:bar.as(:baz)).dependant_tables.
|
15
|
+
should == [:foo, :bar]
|
16
|
+
end
|
17
|
+
|
18
|
+
it "returns real tables from 'from' clauses when aliased" do
|
19
|
+
TEST_DB[:foo.as(:bar)].join(:bar).join(:bar.as(:baz)).
|
20
|
+
dependant_tables.should == [:foo, :bar]
|
21
|
+
end
|
22
|
+
|
23
|
+
it "returns tables from nested datasets in the from clause" do
|
24
|
+
TEST_DB[TEST_DB[:foo].as(:bar)].dependant_tables.should == [:foo]
|
25
|
+
end
|
26
|
+
|
27
|
+
it "returns tables from nested datasets in the join clause" do
|
28
|
+
TEST_DB[:foo].join(TEST_DB[:bar].as(:baz)).dependant_tables.
|
29
|
+
should == [:foo, :bar]
|
30
|
+
end
|
31
|
+
|
32
|
+
it "handles unioned datasets" do
|
33
|
+
TEST_DB[:foo].union(TEST_DB[:bar]).union(TEST_DB[:baz]).
|
34
|
+
dependant_tables.should == [:foo, :bar, :baz]
|
35
|
+
end
|
36
|
+
|
37
|
+
it "handles unioned datasets where from_self is false" do
|
38
|
+
TEST_DB[:foo].union(TEST_DB[:bar], :from_self => false).
|
39
|
+
dependant_tables.should == [:foo, :bar]
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Chicago::ETL::SequelExtensions::FilterToEtlBatch do
|
4
|
+
let(:batch) { stub(:batch, :id => 42) }
|
5
|
+
|
6
|
+
it "should do nothing to a table without an etl_batch_id column" do
|
7
|
+
TEST_DB.should_receive(:schema).with(:foo).and_return([])
|
8
|
+
TEST_DB[:foo].filter_to_etl_batch(batch).should == TEST_DB[:foo]
|
9
|
+
end
|
10
|
+
|
11
|
+
it "filters a table with an ETL batch id column" do
|
12
|
+
TEST_DB.should_receive(:schema).with(:foo).and_return([[:etl_batch_id, {}]])
|
13
|
+
TEST_DB[:foo].filter_to_etl_batch(batch).sql.
|
14
|
+
should include("\(`foo`.`etl_batch_id` = 42\)")
|
15
|
+
end
|
16
|
+
|
17
|
+
it "filters an aliased table with an ETL batch id column" do
|
18
|
+
TEST_DB.should_receive(:schema).with(:foo).and_return([[:etl_batch_id, {}]])
|
19
|
+
TEST_DB[:foo.as(:bar)].filter_to_etl_batch(batch).sql.
|
20
|
+
should include("\(`bar`.`etl_batch_id` = 42\)")
|
21
|
+
end
|
22
|
+
|
23
|
+
it "doesn't attempt to look for etl columns in nested queries" do
|
24
|
+
TEST_DB[TEST_DB[:foo].as(:bar)].filter_to_etl_batch(batch).sql.
|
25
|
+
should_not include("`bar`.`etl_batch_id` = 42")
|
26
|
+
end
|
27
|
+
|
28
|
+
it "filters based on joins" do
|
29
|
+
TEST_DB.should_receive(:schema).with(:baz).and_return([[:etl_batch_id, {}]])
|
30
|
+
TEST_DB.should_receive(:schema).with(:bar).and_return([])
|
31
|
+
TEST_DB.should_receive(:schema).with(:foo).and_return([])
|
32
|
+
|
33
|
+
sql = TEST_DB[:foo].join_table(:left_outer, :bar, :id => :id).join(:baz).filter_to_etl_batch(batch).sql
|
34
|
+
sql.should include("\(`baz`.`etl_batch_id` = 42\)")
|
35
|
+
end
|
36
|
+
|
37
|
+
it "filters based on joined aliases" do
|
38
|
+
TEST_DB.should_receive(:schema).with(:bar).and_return([[:etl_batch_id, {}]])
|
39
|
+
TEST_DB.should_receive(:schema).with(:foo).and_return([])
|
40
|
+
|
41
|
+
TEST_DB[:foo].join(:bar.as(:baz)).filter_to_etl_batch(batch).sql.
|
42
|
+
should include("\(`baz`.`etl_batch_id` = 42\)")
|
43
|
+
end
|
44
|
+
|
45
|
+
it "applies filters to each unioned dataset" do
|
46
|
+
TEST_DB.should_receive(:schema).with(:bar).and_return([[:etl_batch_id, {}]])
|
47
|
+
TEST_DB.should_receive(:schema).with(:foo).and_return([[:etl_batch_id, {}]])
|
48
|
+
|
49
|
+
sql = TEST_DB[:foo].union(TEST_DB[:bar], :from_self => false).filter_to_etl_batch(batch).sql
|
50
|
+
|
51
|
+
sql.should include("\(`foo`.`etl_batch_id` = 42\)")
|
52
|
+
sql.should include("\(`bar`.`etl_batch_id` = 42\)")
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Chicago::ETL::SequelExtensions::LoadDataInfile do
|
4
|
+
before :each do
|
5
|
+
@sql = TEST_DB[:foo].load_csv_infile_sql("bar.csv", [:bar, :baz])
|
6
|
+
end
|
7
|
+
|
8
|
+
it "loads the data in the file" do
|
9
|
+
@sql.should include("LOAD DATA INFILE 'bar.csv'")
|
10
|
+
end
|
11
|
+
|
12
|
+
it "replaces rows currently in the table" do
|
13
|
+
@sql.should include("REPLACE INTO TABLE `foo`")
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should be in the UTF 8 character set" do
|
17
|
+
@sql.should include("CHARACTER SET 'utf8'")
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should escape with the \" character" do
|
21
|
+
@sql.should include("ESCAPED BY '\"'")
|
22
|
+
end
|
23
|
+
|
24
|
+
it "supports standard csv, with optional quoting" do
|
25
|
+
@sql.should include("FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"'")
|
26
|
+
end
|
27
|
+
|
28
|
+
it "loads into the columns specified" do
|
29
|
+
@sql.should include("(`bar`,`baz`);")
|
30
|
+
end
|
31
|
+
|
32
|
+
it "can ignore instead of replacing rows" do
|
33
|
+
@sql = TEST_DB[:foo].insert_ignore.
|
34
|
+
load_csv_infile_sql("bar.csv", [:bar, :baz])
|
35
|
+
@sql.should include("IGNORE INTO TABLE `foo`")
|
36
|
+
end
|
37
|
+
end
|