chicago-etl 0.0.13 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +8 -3
- data/README.rdoc +4 -1
- data/VERSION +1 -1
- data/chicago-etl.gemspec +59 -22
- data/chicago-flow.gemspec +92 -0
- data/lib/chicago/etl/batch.rb +9 -2
- data/lib/chicago/etl/core_extensions.rb +12 -0
- data/lib/chicago/etl/counter.rb +8 -1
- data/lib/chicago/etl/dataset_batch_stage.rb +52 -0
- data/lib/chicago/etl/key_builder.rb +17 -39
- data/lib/chicago/etl/load_dataset_builder.rb +3 -1
- data/lib/chicago/etl/load_pipeline_stage_builder.rb +142 -0
- data/lib/chicago/etl/pipeline.rb +151 -0
- data/lib/chicago/etl/schema_table_sink_factory.rb +74 -0
- data/lib/chicago/etl/screens/column_screen.rb +26 -25
- data/lib/chicago/etl/screens/invalid_element.rb +5 -5
- data/lib/chicago/etl/screens/missing_value.rb +4 -2
- data/lib/chicago/etl/screens/out_of_bounds.rb +2 -0
- data/lib/chicago/etl/table_builder.rb +4 -2
- data/lib/chicago/etl/task_invocation.rb +0 -1
- data/lib/chicago/etl/transformations.rb +128 -0
- data/lib/chicago/etl.rb +39 -8
- data/lib/chicago/flow/array_sink.rb +35 -0
- data/lib/chicago/flow/array_source.rb +15 -0
- data/lib/chicago/flow/dataset_source.rb +23 -0
- data/lib/chicago/flow/errors.rb +14 -0
- data/lib/chicago/flow/filter.rb +15 -0
- data/lib/chicago/flow/mysql.rb +4 -0
- data/lib/chicago/{etl/mysql_load_file_value_transformer.rb → flow/mysql_file_serializer.rb} +7 -4
- data/lib/chicago/flow/mysql_file_sink.rb +68 -0
- data/lib/chicago/flow/null_sink.rb +8 -0
- data/lib/chicago/flow/pipeline_endpoint.rb +15 -0
- data/lib/chicago/flow/pipeline_stage.rb +68 -0
- data/lib/chicago/flow/sink.rb +53 -0
- data/lib/chicago/flow/transformation.rb +169 -0
- data/lib/chicago/flow/transformation_chain.rb +40 -0
- data/spec/etl/batch_spec.rb +2 -1
- data/spec/etl/core_extensions_spec.rb +13 -0
- data/spec/etl/dataset_batch_stage_spec.rb +55 -0
- data/spec/etl/key_builder_spec.rb +25 -83
- data/spec/etl/pipeline_stage_builder_spec.rb +39 -0
- data/spec/etl/schema_table_sink_factory_spec.rb +69 -0
- data/spec/etl/screens/invalid_element_spec.rb +10 -11
- data/spec/etl/screens/missing_value_spec.rb +21 -21
- data/spec/etl/screens/out_of_bounds_spec.rb +21 -29
- data/spec/etl/transformations_spec.rb +109 -0
- data/spec/flow/array_sink_spec.rb +26 -0
- data/spec/flow/array_source_spec.rb +20 -0
- data/spec/flow/dataset_source_spec.rb +15 -0
- data/spec/flow/filter_spec.rb +13 -0
- data/spec/flow/mysql_file_serializer_spec.rb +27 -0
- data/spec/flow/mysql_file_sink_spec.rb +94 -0
- data/spec/flow/mysql_integration_spec.rb +72 -0
- data/spec/flow/pipeline_stage_spec.rb +89 -0
- data/spec/flow/transformation_chain_spec.rb +76 -0
- data/spec/flow/transformation_spec.rb +91 -0
- data/spec/spec_helper.rb +5 -0
- metadata +135 -39
- data/lib/chicago/etl/buffering_insert_writer.rb +0 -36
- data/lib/chicago/etl/mysql_dumpfile.rb +0 -32
- data/lib/chicago/etl/screens/composite_screen.rb +0 -17
- data/lib/chicago/etl/sequel/load_data_infile.rb +0 -141
- data/lib/chicago/etl/sink.rb +0 -61
- data/lib/chicago/etl/transformations/add_insert_timestamp.rb +0 -16
- data/spec/etl/mysql_dumpfile_spec.rb +0 -42
- data/spec/etl/mysql_load_file_value_transformer_spec.rb +0 -27
- data/spec/etl/screens/composite_screen_spec.rb +0 -25
- data/spec/etl/sequel/load_data_infile_expression_spec.rb +0 -60
- data/spec/etl/sequel/load_data_infile_spec.rb +0 -37
- data/spec/etl/sink_spec.rb +0 -7
- data/spec/etl/transformations/add_insert_timestamp_spec.rb +0 -9
@@ -1,60 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Chicago::ETL::SequelExtensions::LoadDataInfileExpression do
|
4
|
-
it "loads the data in the file into the table" do
|
5
|
-
described_class.new("bar.csv", :foo, ['bar', 'quux']).
|
6
|
-
to_sql(TEST_DB).should include("LOAD DATA INFILE 'bar.csv' INTO TABLE `foo`")
|
7
|
-
end
|
8
|
-
|
9
|
-
it "loads the data with replacment" do
|
10
|
-
described_class.new("bar.csv", :foo, ['bar', 'quux'],
|
11
|
-
:update => :replace).
|
12
|
-
to_sql(TEST_DB).should include("REPLACE INTO TABLE")
|
13
|
-
end
|
14
|
-
|
15
|
-
it "loads the data ignoring rows" do
|
16
|
-
described_class.new("bar.csv", :foo, ['bar', 'quux'], :update => :ignore).
|
17
|
-
to_sql(TEST_DB).should include("IGNORE INTO TABLE")
|
18
|
-
end
|
19
|
-
|
20
|
-
it "should be in UTF-8 character set by default" do
|
21
|
-
described_class.new("bar.csv", :foo, ['bar', 'quux']).
|
22
|
-
to_sql(TEST_DB).should include("CHARACTER SET 'utf8'")
|
23
|
-
end
|
24
|
-
|
25
|
-
it "may be in other character sets" do
|
26
|
-
described_class.new("bar.csv", :foo, ['bar', 'quux'], :character_set => "ascii").
|
27
|
-
to_sql(TEST_DB).should include("CHARACTER SET 'ascii'")
|
28
|
-
end
|
29
|
-
|
30
|
-
it "should load columns" do
|
31
|
-
described_class.new("bar.csv", :foo, ['bar', 'quux']).
|
32
|
-
to_sql(TEST_DB).should include("(`bar`,`quux`)")
|
33
|
-
end
|
34
|
-
|
35
|
-
it "should load into variables if column begins with @" do
|
36
|
-
described_class.new("bar.csv", :foo, ['@bar', 'quux']).
|
37
|
-
to_sql(TEST_DB).should include("(@bar,`quux`)")
|
38
|
-
end
|
39
|
-
|
40
|
-
it "can ignore lines" do
|
41
|
-
described_class.new("bar.csv", :foo, ['bar', 'quux'], :ignore => 2).
|
42
|
-
to_sql(TEST_DB).should include("IGNORE 2 LINES")
|
43
|
-
end
|
44
|
-
|
45
|
-
it "can be in csv format" do
|
46
|
-
described_class.new("bar.csv", :foo, ['bar', 'quux'], :format => :csv).
|
47
|
-
to_sql(TEST_DB).should include("FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' ESCAPED BY '\"'")
|
48
|
-
end
|
49
|
-
|
50
|
-
it "can set column values" do
|
51
|
-
sql = described_class.new("bar.csv", :foo, ['@bar', 'quux'],
|
52
|
-
:set => {:bar => :unhex.sql_function("@bar".lit),
|
53
|
-
:etl_batch_id => 3}).
|
54
|
-
to_sql(TEST_DB)
|
55
|
-
|
56
|
-
sql.should include("SET")
|
57
|
-
sql.should include("`etl_batch_id` = 3")
|
58
|
-
sql.should include("`bar` = unhex(@bar)")
|
59
|
-
end
|
60
|
-
end
|
@@ -1,37 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Chicago::ETL::SequelExtensions::LoadDataInfile do
|
4
|
-
before :each do
|
5
|
-
@sql = TEST_DB[:foo].load_csv_infile_sql("bar.csv", [:bar, :baz])
|
6
|
-
end
|
7
|
-
|
8
|
-
it "loads the data in the file" do
|
9
|
-
@sql.should include("LOAD DATA INFILE 'bar.csv'")
|
10
|
-
end
|
11
|
-
|
12
|
-
it "replaces rows currently in the table" do
|
13
|
-
@sql.should include("REPLACE INTO TABLE `foo`")
|
14
|
-
end
|
15
|
-
|
16
|
-
it "should be in the UTF 8 character set" do
|
17
|
-
@sql.should include("CHARACTER SET 'utf8'")
|
18
|
-
end
|
19
|
-
|
20
|
-
it "should escape with the \" character" do
|
21
|
-
@sql.should include("ESCAPED BY '\"'")
|
22
|
-
end
|
23
|
-
|
24
|
-
it "supports standard csv, with optional quoting" do
|
25
|
-
@sql.should include("FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"'")
|
26
|
-
end
|
27
|
-
|
28
|
-
it "loads into the columns specified" do
|
29
|
-
@sql.should include("(`bar`,`baz`)")
|
30
|
-
end
|
31
|
-
|
32
|
-
it "can ignore instead of replacing rows" do
|
33
|
-
@sql = TEST_DB[:foo].insert_ignore.
|
34
|
-
load_csv_infile_sql("bar.csv", [:bar, :baz])
|
35
|
-
@sql.should include("IGNORE INTO TABLE `foo`")
|
36
|
-
end
|
37
|
-
end
|
data/spec/etl/sink_spec.rb
DELETED
@@ -1,9 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Chicago::ETL::Transformations::AddInsertTimestamp do
|
4
|
-
it "adds a timestamp in UTC in the _inserted_at field" do
|
5
|
-
time = subject.call({}).first[:_inserted_at]
|
6
|
-
time.should be_kind_of(Time)
|
7
|
-
time.zone.should == "UTC"
|
8
|
-
end
|
9
|
-
end
|