chicago-etl 0.0.13 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +8 -3
- data/README.rdoc +4 -1
- data/VERSION +1 -1
- data/chicago-etl.gemspec +59 -22
- data/chicago-flow.gemspec +92 -0
- data/lib/chicago/etl/batch.rb +9 -2
- data/lib/chicago/etl/core_extensions.rb +12 -0
- data/lib/chicago/etl/counter.rb +8 -1
- data/lib/chicago/etl/dataset_batch_stage.rb +52 -0
- data/lib/chicago/etl/key_builder.rb +17 -39
- data/lib/chicago/etl/load_dataset_builder.rb +3 -1
- data/lib/chicago/etl/load_pipeline_stage_builder.rb +142 -0
- data/lib/chicago/etl/pipeline.rb +151 -0
- data/lib/chicago/etl/schema_table_sink_factory.rb +74 -0
- data/lib/chicago/etl/screens/column_screen.rb +26 -25
- data/lib/chicago/etl/screens/invalid_element.rb +5 -5
- data/lib/chicago/etl/screens/missing_value.rb +4 -2
- data/lib/chicago/etl/screens/out_of_bounds.rb +2 -0
- data/lib/chicago/etl/table_builder.rb +4 -2
- data/lib/chicago/etl/task_invocation.rb +0 -1
- data/lib/chicago/etl/transformations.rb +128 -0
- data/lib/chicago/etl.rb +39 -8
- data/lib/chicago/flow/array_sink.rb +35 -0
- data/lib/chicago/flow/array_source.rb +15 -0
- data/lib/chicago/flow/dataset_source.rb +23 -0
- data/lib/chicago/flow/errors.rb +14 -0
- data/lib/chicago/flow/filter.rb +15 -0
- data/lib/chicago/flow/mysql.rb +4 -0
- data/lib/chicago/{etl/mysql_load_file_value_transformer.rb → flow/mysql_file_serializer.rb} +7 -4
- data/lib/chicago/flow/mysql_file_sink.rb +68 -0
- data/lib/chicago/flow/null_sink.rb +8 -0
- data/lib/chicago/flow/pipeline_endpoint.rb +15 -0
- data/lib/chicago/flow/pipeline_stage.rb +68 -0
- data/lib/chicago/flow/sink.rb +53 -0
- data/lib/chicago/flow/transformation.rb +169 -0
- data/lib/chicago/flow/transformation_chain.rb +40 -0
- data/spec/etl/batch_spec.rb +2 -1
- data/spec/etl/core_extensions_spec.rb +13 -0
- data/spec/etl/dataset_batch_stage_spec.rb +55 -0
- data/spec/etl/key_builder_spec.rb +25 -83
- data/spec/etl/pipeline_stage_builder_spec.rb +39 -0
- data/spec/etl/schema_table_sink_factory_spec.rb +69 -0
- data/spec/etl/screens/invalid_element_spec.rb +10 -11
- data/spec/etl/screens/missing_value_spec.rb +21 -21
- data/spec/etl/screens/out_of_bounds_spec.rb +21 -29
- data/spec/etl/transformations_spec.rb +109 -0
- data/spec/flow/array_sink_spec.rb +26 -0
- data/spec/flow/array_source_spec.rb +20 -0
- data/spec/flow/dataset_source_spec.rb +15 -0
- data/spec/flow/filter_spec.rb +13 -0
- data/spec/flow/mysql_file_serializer_spec.rb +27 -0
- data/spec/flow/mysql_file_sink_spec.rb +94 -0
- data/spec/flow/mysql_integration_spec.rb +72 -0
- data/spec/flow/pipeline_stage_spec.rb +89 -0
- data/spec/flow/transformation_chain_spec.rb +76 -0
- data/spec/flow/transformation_spec.rb +91 -0
- data/spec/spec_helper.rb +5 -0
- metadata +135 -39
- data/lib/chicago/etl/buffering_insert_writer.rb +0 -36
- data/lib/chicago/etl/mysql_dumpfile.rb +0 -32
- data/lib/chicago/etl/screens/composite_screen.rb +0 -17
- data/lib/chicago/etl/sequel/load_data_infile.rb +0 -141
- data/lib/chicago/etl/sink.rb +0 -61
- data/lib/chicago/etl/transformations/add_insert_timestamp.rb +0 -16
- data/spec/etl/mysql_dumpfile_spec.rb +0 -42
- data/spec/etl/mysql_load_file_value_transformer_spec.rb +0 -27
- data/spec/etl/screens/composite_screen_spec.rb +0 -25
- data/spec/etl/sequel/load_data_infile_expression_spec.rb +0 -60
- data/spec/etl/sequel/load_data_infile_spec.rb +0 -37
- data/spec/etl/sink_spec.rb +0 -7
- data/spec/etl/transformations/add_insert_timestamp_spec.rb +0 -9
@@ -1,60 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Chicago::ETL::SequelExtensions::LoadDataInfileExpression do
|
4
|
-
it "loads the data in the file into the table" do
|
5
|
-
described_class.new("bar.csv", :foo, ['bar', 'quux']).
|
6
|
-
to_sql(TEST_DB).should include("LOAD DATA INFILE 'bar.csv' INTO TABLE `foo`")
|
7
|
-
end
|
8
|
-
|
9
|
-
it "loads the data with replacment" do
|
10
|
-
described_class.new("bar.csv", :foo, ['bar', 'quux'],
|
11
|
-
:update => :replace).
|
12
|
-
to_sql(TEST_DB).should include("REPLACE INTO TABLE")
|
13
|
-
end
|
14
|
-
|
15
|
-
it "loads the data ignoring rows" do
|
16
|
-
described_class.new("bar.csv", :foo, ['bar', 'quux'], :update => :ignore).
|
17
|
-
to_sql(TEST_DB).should include("IGNORE INTO TABLE")
|
18
|
-
end
|
19
|
-
|
20
|
-
it "should be in UTF-8 character set by default" do
|
21
|
-
described_class.new("bar.csv", :foo, ['bar', 'quux']).
|
22
|
-
to_sql(TEST_DB).should include("CHARACTER SET 'utf8'")
|
23
|
-
end
|
24
|
-
|
25
|
-
it "may be in other character sets" do
|
26
|
-
described_class.new("bar.csv", :foo, ['bar', 'quux'], :character_set => "ascii").
|
27
|
-
to_sql(TEST_DB).should include("CHARACTER SET 'ascii'")
|
28
|
-
end
|
29
|
-
|
30
|
-
it "should load columns" do
|
31
|
-
described_class.new("bar.csv", :foo, ['bar', 'quux']).
|
32
|
-
to_sql(TEST_DB).should include("(`bar`,`quux`)")
|
33
|
-
end
|
34
|
-
|
35
|
-
it "should load into variables if column begins with @" do
|
36
|
-
described_class.new("bar.csv", :foo, ['@bar', 'quux']).
|
37
|
-
to_sql(TEST_DB).should include("(@bar,`quux`)")
|
38
|
-
end
|
39
|
-
|
40
|
-
it "can ignore lines" do
|
41
|
-
described_class.new("bar.csv", :foo, ['bar', 'quux'], :ignore => 2).
|
42
|
-
to_sql(TEST_DB).should include("IGNORE 2 LINES")
|
43
|
-
end
|
44
|
-
|
45
|
-
it "can be in csv format" do
|
46
|
-
described_class.new("bar.csv", :foo, ['bar', 'quux'], :format => :csv).
|
47
|
-
to_sql(TEST_DB).should include("FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' ESCAPED BY '\"'")
|
48
|
-
end
|
49
|
-
|
50
|
-
it "can set column values" do
|
51
|
-
sql = described_class.new("bar.csv", :foo, ['@bar', 'quux'],
|
52
|
-
:set => {:bar => :unhex.sql_function("@bar".lit),
|
53
|
-
:etl_batch_id => 3}).
|
54
|
-
to_sql(TEST_DB)
|
55
|
-
|
56
|
-
sql.should include("SET")
|
57
|
-
sql.should include("`etl_batch_id` = 3")
|
58
|
-
sql.should include("`bar` = unhex(@bar)")
|
59
|
-
end
|
60
|
-
end
|
@@ -1,37 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Chicago::ETL::SequelExtensions::LoadDataInfile do
|
4
|
-
before :each do
|
5
|
-
@sql = TEST_DB[:foo].load_csv_infile_sql("bar.csv", [:bar, :baz])
|
6
|
-
end
|
7
|
-
|
8
|
-
it "loads the data in the file" do
|
9
|
-
@sql.should include("LOAD DATA INFILE 'bar.csv'")
|
10
|
-
end
|
11
|
-
|
12
|
-
it "replaces rows currently in the table" do
|
13
|
-
@sql.should include("REPLACE INTO TABLE `foo`")
|
14
|
-
end
|
15
|
-
|
16
|
-
it "should be in the UTF 8 character set" do
|
17
|
-
@sql.should include("CHARACTER SET 'utf8'")
|
18
|
-
end
|
19
|
-
|
20
|
-
it "should escape with the \" character" do
|
21
|
-
@sql.should include("ESCAPED BY '\"'")
|
22
|
-
end
|
23
|
-
|
24
|
-
it "supports standard csv, with optional quoting" do
|
25
|
-
@sql.should include("FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"'")
|
26
|
-
end
|
27
|
-
|
28
|
-
it "loads into the columns specified" do
|
29
|
-
@sql.should include("(`bar`,`baz`)")
|
30
|
-
end
|
31
|
-
|
32
|
-
it "can ignore instead of replacing rows" do
|
33
|
-
@sql = TEST_DB[:foo].insert_ignore.
|
34
|
-
load_csv_infile_sql("bar.csv", [:bar, :baz])
|
35
|
-
@sql.should include("IGNORE INTO TABLE `foo`")
|
36
|
-
end
|
37
|
-
end
|
data/spec/etl/sink_spec.rb
DELETED
@@ -1,9 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Chicago::ETL::Transformations::AddInsertTimestamp do
|
4
|
-
it "adds a timestamp in UTC in the _inserted_at field" do
|
5
|
-
time = subject.call({}).first[:_inserted_at]
|
6
|
-
time.should be_kind_of(Time)
|
7
|
-
time.zone.should == "UTC"
|
8
|
-
end
|
9
|
-
end
|