chicago-etl 0.1.4 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/chicago-etl.gemspec +24 -24
- data/lib/chicago/{flow → etl}/array_sink.rb +1 -1
- data/lib/chicago/{flow → etl}/array_source.rb +1 -1
- data/lib/chicago/etl/dataset_batch_stage.rb +0 -1
- data/lib/chicago/{flow → etl}/dataset_source.rb +1 -1
- data/lib/chicago/{flow → etl}/errors.rb +1 -1
- data/lib/chicago/{flow → etl}/filter.rb +1 -1
- data/lib/chicago/etl/mysql.rb +4 -0
- data/lib/chicago/{flow → etl}/mysql_file_serializer.rb +1 -1
- data/lib/chicago/{flow → etl}/mysql_file_sink.rb +1 -1
- data/lib/chicago/{flow → etl}/null_sink.rb +1 -1
- data/lib/chicago/{flow → etl}/pipeline_endpoint.rb +1 -1
- data/lib/chicago/etl/schema_sinks_and_transformations_builder.rb +1 -1
- data/lib/chicago/etl/schema_table_sink_factory.rb +11 -11
- data/lib/chicago/etl/screens/column_screen.rb +1 -1
- data/lib/chicago/{flow → etl}/sink.rb +1 -1
- data/lib/chicago/etl/stage.rb +2 -3
- data/lib/chicago/{flow → etl}/transformation.rb +1 -1
- data/lib/chicago/{flow → etl}/transformation_chain.rb +1 -1
- data/lib/chicago/etl/transformations/deduplicate_rows.rb +1 -1
- data/lib/chicago/etl/transformations.rb +5 -5
- data/lib/chicago/etl.rb +14 -11
- data/spec/{flow → etl}/array_sink_spec.rb +1 -1
- data/spec/{flow → etl}/array_source_spec.rb +1 -1
- data/spec/{flow → etl}/dataset_source_spec.rb +1 -1
- data/spec/etl/define_stage_spec.rb +6 -6
- data/spec/{flow → etl}/filter_spec.rb +1 -1
- data/spec/{flow → etl}/mysql_file_serializer_spec.rb +1 -1
- data/spec/{flow → etl}/mysql_file_sink_spec.rb +2 -2
- data/spec/{flow → etl}/mysql_integration_spec.rb +4 -4
- data/spec/etl/schema_table_sink_factory_spec.rb +1 -1
- data/spec/{flow → etl}/transformation_chain_spec.rb +6 -6
- data/spec/{flow → etl}/transformation_spec.rb +3 -3
- data/spec/etl/transformations_spec.rb +1 -1
- metadata +27 -27
- data/lib/chicago/flow/mysql.rb +0 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/chicago-etl.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "chicago-etl"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Roland Swingler"]
|
12
|
-
s.date = "2013-11-
|
12
|
+
s.date = "2013-11-18"
|
13
13
|
s.description = "ETL tools for Chicago"
|
14
14
|
s.email = "roland.swingler@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -28,14 +28,24 @@ Gem::Specification.new do |s|
|
|
28
28
|
"chicago-flow.gemspec",
|
29
29
|
"lib/chicago-etl.rb",
|
30
30
|
"lib/chicago/etl.rb",
|
31
|
+
"lib/chicago/etl/array_sink.rb",
|
32
|
+
"lib/chicago/etl/array_source.rb",
|
31
33
|
"lib/chicago/etl/batch.rb",
|
32
34
|
"lib/chicago/etl/core_extensions.rb",
|
33
35
|
"lib/chicago/etl/counter.rb",
|
34
36
|
"lib/chicago/etl/dataset_batch_stage.rb",
|
35
37
|
"lib/chicago/etl/dataset_builder.rb",
|
38
|
+
"lib/chicago/etl/dataset_source.rb",
|
39
|
+
"lib/chicago/etl/errors.rb",
|
40
|
+
"lib/chicago/etl/filter.rb",
|
36
41
|
"lib/chicago/etl/key_builder.rb",
|
37
42
|
"lib/chicago/etl/load_dataset_builder.rb",
|
43
|
+
"lib/chicago/etl/mysql.rb",
|
44
|
+
"lib/chicago/etl/mysql_file_serializer.rb",
|
45
|
+
"lib/chicago/etl/mysql_file_sink.rb",
|
46
|
+
"lib/chicago/etl/null_sink.rb",
|
38
47
|
"lib/chicago/etl/pipeline.rb",
|
48
|
+
"lib/chicago/etl/pipeline_endpoint.rb",
|
39
49
|
"lib/chicago/etl/schema_sinks_and_transformations_builder.rb",
|
40
50
|
"lib/chicago/etl/schema_table_sink_factory.rb",
|
41
51
|
"lib/chicago/etl/screens/column_screen.rb",
|
@@ -44,37 +54,34 @@ Gem::Specification.new do |s|
|
|
44
54
|
"lib/chicago/etl/screens/out_of_bounds.rb",
|
45
55
|
"lib/chicago/etl/sequel/dependant_tables.rb",
|
46
56
|
"lib/chicago/etl/sequel/filter_to_etl_batch.rb",
|
57
|
+
"lib/chicago/etl/sink.rb",
|
47
58
|
"lib/chicago/etl/stage.rb",
|
48
59
|
"lib/chicago/etl/stage_builder.rb",
|
49
60
|
"lib/chicago/etl/table_builder.rb",
|
50
61
|
"lib/chicago/etl/task_invocation.rb",
|
51
62
|
"lib/chicago/etl/tasks.rb",
|
63
|
+
"lib/chicago/etl/transformation.rb",
|
64
|
+
"lib/chicago/etl/transformation_chain.rb",
|
52
65
|
"lib/chicago/etl/transformations.rb",
|
53
66
|
"lib/chicago/etl/transformations/deduplicate_rows.rb",
|
54
67
|
"lib/chicago/etl/transformations/uk_post_code.rb",
|
55
68
|
"lib/chicago/etl/transformations/uk_post_code_field.rb",
|
56
|
-
"lib/chicago/flow/array_sink.rb",
|
57
|
-
"lib/chicago/flow/array_source.rb",
|
58
|
-
"lib/chicago/flow/dataset_source.rb",
|
59
|
-
"lib/chicago/flow/errors.rb",
|
60
|
-
"lib/chicago/flow/filter.rb",
|
61
|
-
"lib/chicago/flow/mysql.rb",
|
62
|
-
"lib/chicago/flow/mysql_file_serializer.rb",
|
63
|
-
"lib/chicago/flow/mysql_file_sink.rb",
|
64
|
-
"lib/chicago/flow/null_sink.rb",
|
65
|
-
"lib/chicago/flow/pipeline_endpoint.rb",
|
66
|
-
"lib/chicago/flow/sink.rb",
|
67
|
-
"lib/chicago/flow/transformation.rb",
|
68
|
-
"lib/chicago/flow/transformation_chain.rb",
|
69
69
|
"spec/db_connections.yml.dist",
|
70
|
+
"spec/etl/array_sink_spec.rb",
|
71
|
+
"spec/etl/array_source_spec.rb",
|
70
72
|
"spec/etl/batch_spec.rb",
|
71
73
|
"spec/etl/core_extensions_spec.rb",
|
72
74
|
"spec/etl/counter_spec.rb",
|
75
|
+
"spec/etl/dataset_source_spec.rb",
|
73
76
|
"spec/etl/define_dimension_stage_spec.rb",
|
74
77
|
"spec/etl/define_stage_spec.rb",
|
75
78
|
"spec/etl/etl_batch_id_dataset_filter.rb",
|
79
|
+
"spec/etl/filter_spec.rb",
|
76
80
|
"spec/etl/key_builder_spec.rb",
|
77
81
|
"spec/etl/load_dataset_builder_spec.rb",
|
82
|
+
"spec/etl/mysql_file_serializer_spec.rb",
|
83
|
+
"spec/etl/mysql_file_sink_spec.rb",
|
84
|
+
"spec/etl/mysql_integration_spec.rb",
|
78
85
|
"spec/etl/pipeline_stage_builder_spec.rb",
|
79
86
|
"spec/etl/schema_table_sink_factory_spec.rb",
|
80
87
|
"spec/etl/screens/invalid_element_spec.rb",
|
@@ -85,19 +92,12 @@ Gem::Specification.new do |s|
|
|
85
92
|
"spec/etl/stage_spec.rb",
|
86
93
|
"spec/etl/table_builder_spec.rb",
|
87
94
|
"spec/etl/task_spec.rb",
|
95
|
+
"spec/etl/transformation_chain_spec.rb",
|
96
|
+
"spec/etl/transformation_spec.rb",
|
88
97
|
"spec/etl/transformations/deduplicate_rows_spec.rb",
|
89
98
|
"spec/etl/transformations/uk_post_code_field_spec.rb",
|
90
99
|
"spec/etl/transformations/uk_post_code_spec.rb",
|
91
100
|
"spec/etl/transformations_spec.rb",
|
92
|
-
"spec/flow/array_sink_spec.rb",
|
93
|
-
"spec/flow/array_source_spec.rb",
|
94
|
-
"spec/flow/dataset_source_spec.rb",
|
95
|
-
"spec/flow/filter_spec.rb",
|
96
|
-
"spec/flow/mysql_file_serializer_spec.rb",
|
97
|
-
"spec/flow/mysql_file_sink_spec.rb",
|
98
|
-
"spec/flow/mysql_integration_spec.rb",
|
99
|
-
"spec/flow/transformation_chain_spec.rb",
|
100
|
-
"spec/flow/transformation_spec.rb",
|
101
101
|
"spec/spec_helper.rb"
|
102
102
|
]
|
103
103
|
s.homepage = "http://github.com/notonthehighstreet/chicago-etl"
|
@@ -13,10 +13,10 @@ module Chicago
|
|
13
13
|
# Pass an :exclude option if you don't want all columns of the
|
14
14
|
# schema table to be loaded via this sink.
|
15
15
|
def sink(options={})
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
16
|
+
MysqlFileSink.new(@db,
|
17
|
+
@schema_table.table_name,
|
18
|
+
load_columns(options[:exclude]),
|
19
|
+
mysql_options(options))
|
20
20
|
end
|
21
21
|
|
22
22
|
# Returns a sink to load data into the MySQL table backing the
|
@@ -26,20 +26,20 @@ module Chicago
|
|
26
26
|
# schema table's key table name will be used otherwise.
|
27
27
|
def key_sink(options={})
|
28
28
|
table = options.delete(:table) || @schema_table.key_table_name
|
29
|
-
sink =
|
30
|
-
|
31
|
-
|
32
|
-
|
29
|
+
sink = MysqlFileSink.new(@db,
|
30
|
+
table,
|
31
|
+
[:original_id, :dimension_id],
|
32
|
+
mysql_options(options))
|
33
33
|
sink.truncation_strategy = lambda do
|
34
34
|
# No Op - we want to maintain keys to avoid having to sort
|
35
35
|
# out fact tables.
|
36
36
|
end
|
37
37
|
sink
|
38
38
|
end
|
39
|
-
|
39
|
+
|
40
40
|
# Returns a sink to load errors generated in the ETL process.
|
41
41
|
def error_sink(options={})
|
42
|
-
sink =
|
42
|
+
sink = MysqlFileSink.
|
43
43
|
new(@db, :etl_error_log,
|
44
44
|
[:column, :row_id, :error, :severity, :error_detail], mysql_options(options)).
|
45
45
|
set_constant_values(:table => @schema_table.table_name.to_s,
|
@@ -53,7 +53,7 @@ module Chicago
|
|
53
53
|
end
|
54
54
|
sink
|
55
55
|
end
|
56
|
-
|
56
|
+
|
57
57
|
private
|
58
58
|
|
59
59
|
def load_columns(exclude=nil)
|
data/lib/chicago/etl/stage.rb
CHANGED
@@ -39,7 +39,7 @@ module Chicago
|
|
39
39
|
filtered_dataset = reextract ? source :
|
40
40
|
@filter_strategy.call(source, etl_batch)
|
41
41
|
|
42
|
-
|
42
|
+
DatasetSource.new(filtered_dataset)
|
43
43
|
end
|
44
44
|
|
45
45
|
private
|
@@ -58,8 +58,7 @@ module Chicago
|
|
58
58
|
end
|
59
59
|
|
60
60
|
def transformation_chain
|
61
|
-
@transformation_chain ||=
|
62
|
-
new(*@transformations)
|
61
|
+
@transformation_chain ||= TransformationChain.new(*@transformations)
|
63
62
|
end
|
64
63
|
|
65
64
|
def process_row(row)
|
@@ -2,7 +2,7 @@ module Chicago
|
|
2
2
|
module ETL
|
3
3
|
module Transformations
|
4
4
|
# Filters rows so they only get output once, based on a :key.
|
5
|
-
class WrittenRowFilter <
|
5
|
+
class WrittenRowFilter < Transformation
|
6
6
|
requires_options :key
|
7
7
|
|
8
8
|
def initialize(*args)
|
@@ -30,7 +30,7 @@ module Chicago
|
|
30
30
|
# :_errors field.
|
31
31
|
#
|
32
32
|
# Pass the :key_builder option to set the KeyBuilder.
|
33
|
-
class AddKey <
|
33
|
+
class AddKey < Transformation
|
34
34
|
requires_options :key_builder
|
35
35
|
adds_fields :id
|
36
36
|
|
@@ -57,7 +57,7 @@ module Chicago
|
|
57
57
|
end
|
58
58
|
|
59
59
|
# Removes embedded :_errors and puts them on the error stream.
|
60
|
-
class DemultiplexErrors <
|
60
|
+
class DemultiplexErrors < Transformation
|
61
61
|
def output_streams
|
62
62
|
[:default, :error]
|
63
63
|
end
|
@@ -73,7 +73,7 @@ module Chicago
|
|
73
73
|
|
74
74
|
# Removes a field from the row, and creates a row on a
|
75
75
|
# designated key stream
|
76
|
-
class DimensionKeyMapping <
|
76
|
+
class DimensionKeyMapping < Transformation
|
77
77
|
requires_options :original_key, :key_table
|
78
78
|
|
79
79
|
def removed_fields
|
@@ -103,7 +103,7 @@ module Chicago
|
|
103
103
|
end
|
104
104
|
|
105
105
|
# Adds a hash of the specified columns as a field in the row.
|
106
|
-
class HashColumns <
|
106
|
+
class HashColumns < Transformation
|
107
107
|
requires_options :columns
|
108
108
|
|
109
109
|
def process_row(row)
|
data/lib/chicago/etl.rb
CHANGED
@@ -6,17 +6,17 @@ else
|
|
6
6
|
end
|
7
7
|
|
8
8
|
require 'sequel'
|
9
|
-
require 'chicago/
|
10
|
-
require 'chicago/
|
11
|
-
require 'chicago/
|
12
|
-
require 'chicago/
|
13
|
-
require 'chicago/
|
14
|
-
require 'chicago/
|
15
|
-
require 'chicago/
|
16
|
-
require 'chicago/
|
17
|
-
require 'chicago/
|
18
|
-
require 'chicago/
|
19
|
-
require 'chicago/
|
9
|
+
require 'chicago/etl/errors'
|
10
|
+
require 'chicago/etl/transformation'
|
11
|
+
require 'chicago/etl/filter'
|
12
|
+
require 'chicago/etl/transformation_chain'
|
13
|
+
require 'chicago/etl/pipeline_endpoint'
|
14
|
+
require 'chicago/etl/array_source'
|
15
|
+
require 'chicago/etl/dataset_source'
|
16
|
+
require 'chicago/etl/sink'
|
17
|
+
require 'chicago/etl/array_sink'
|
18
|
+
require 'chicago/etl/null_sink'
|
19
|
+
require 'chicago/etl/mysql'
|
20
20
|
|
21
21
|
require 'chicago/etl/core_extensions'
|
22
22
|
require 'chicago/etl/counter'
|
@@ -66,4 +66,7 @@ module Chicago
|
|
66
66
|
end
|
67
67
|
end
|
68
68
|
end
|
69
|
+
|
70
|
+
# Deprecated, allows clients to transition when they like.
|
71
|
+
Flow = ETL
|
69
72
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require "spec_helper"
|
2
2
|
|
3
|
-
class TestTransformation < Chicago::
|
3
|
+
class TestTransformation < Chicago::ETL::Transformation
|
4
4
|
def output_streams
|
5
5
|
[:another_stream]
|
6
6
|
end
|
@@ -23,8 +23,8 @@ describe "defining and executing a stage" do
|
|
23
23
|
end
|
24
24
|
|
25
25
|
sinks do
|
26
|
-
add Chicago::
|
27
|
-
add Chicago::
|
26
|
+
add Chicago::ETL::ArraySink.new(:test)
|
27
|
+
add Chicago::ETL::ArraySink.new(:test), :stream => :another_stream
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
@@ -50,8 +50,8 @@ describe "defining and executing a stage" do
|
|
50
50
|
end
|
51
51
|
|
52
52
|
sinks do
|
53
|
-
add Chicago::
|
54
|
-
add Chicago::
|
53
|
+
add Chicago::ETL::ArraySink.new(:test)
|
54
|
+
add Chicago::ETL::ArraySink.new(:test), :stream => :another_stream
|
55
55
|
end
|
56
56
|
end
|
57
57
|
|
@@ -78,7 +78,7 @@ describe "defining and executing a stage" do
|
|
78
78
|
end
|
79
79
|
|
80
80
|
sinks do
|
81
|
-
add Chicago::
|
81
|
+
add Chicago::ETL::ArraySink.new(:test)
|
82
82
|
end
|
83
83
|
|
84
84
|
filter_strategy do |source, etl_batch|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
require 'sequel'
|
3
3
|
|
4
|
-
describe Chicago::
|
4
|
+
describe Chicago::ETL::MysqlFileSink do
|
5
5
|
let(:dataset) { mock(:dataset).as_null_object }
|
6
6
|
let(:db) { mock(:db, :[] => dataset, :schema => []) }
|
7
7
|
let(:csv) { mock(:csv) }
|
@@ -29,7 +29,7 @@ describe Chicago::Flow::MysqlFileSink do
|
|
29
29
|
end
|
30
30
|
|
31
31
|
it "serializes values before writing to the file" do
|
32
|
-
Chicago::
|
32
|
+
Chicago::ETL::MysqlFileSerializer.any_instance.
|
33
33
|
should_receive(:serialize).with(1).and_return(1)
|
34
34
|
sink << {:foo => 1}
|
35
35
|
end
|
@@ -2,7 +2,7 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe "Mysql -> Mysql through transformation chain" do
|
4
4
|
let(:dup_row) {
|
5
|
-
Class.new(Chicago::
|
5
|
+
Class.new(Chicago::ETL::Transformation) {
|
6
6
|
def output_streams
|
7
7
|
[:default, @options[:onto]].flatten
|
8
8
|
end
|
@@ -46,15 +46,15 @@ describe "Mysql -> Mysql through transformation chain" do
|
|
46
46
|
TEST_DB[:source].multi_insert([{:foo => nil, :bin => :unhex.sql_function("1F")},
|
47
47
|
{:foo => "Hello", :bin => :unhex.sql_function("1F")}])
|
48
48
|
|
49
|
-
source = Chicago::
|
49
|
+
source = Chicago::ETL::DatasetSource.
|
50
50
|
new(TEST_DB[:source].
|
51
51
|
select(:id, :foo, :hex.sql_function(:bin).as(:bin)))
|
52
52
|
|
53
53
|
transformations = [dup_row.new(:onto => :other)]
|
54
54
|
|
55
|
-
sink_1 = Chicago::
|
55
|
+
sink_1 = Chicago::ETL::MysqlFileSink.
|
56
56
|
new(TEST_DB, :destination, [:id, :foo, :bin])
|
57
|
-
sink_2 = Chicago::
|
57
|
+
sink_2 = Chicago::ETL::ArraySink.new([:id, :foo, :bin])
|
58
58
|
|
59
59
|
stage = Chicago::ETL::Stage.new(:test,
|
60
60
|
:source => source,
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe Chicago::
|
3
|
+
describe Chicago::ETL::TransformationChain do
|
4
4
|
let(:add_1_to_a) {
|
5
|
-
Class.new(Chicago::
|
5
|
+
Class.new(Chicago::ETL::Transformation) {
|
6
6
|
def process_row(row)
|
7
7
|
row[:a] += 1
|
8
8
|
row
|
@@ -11,7 +11,7 @@ describe Chicago::Flow::TransformationChain do
|
|
11
11
|
}
|
12
12
|
|
13
13
|
let(:dup_row) {
|
14
|
-
Class.new(Chicago::
|
14
|
+
Class.new(Chicago::ETL::Transformation) {
|
15
15
|
def output_streams
|
16
16
|
[:default, @options[:onto]].flatten
|
17
17
|
end
|
@@ -24,7 +24,7 @@ describe Chicago::Flow::TransformationChain do
|
|
24
24
|
}
|
25
25
|
|
26
26
|
let(:store_until_flush) {
|
27
|
-
Class.new(Chicago::
|
27
|
+
Class.new(Chicago::ETL::Transformation) {
|
28
28
|
def process_row(row)
|
29
29
|
@cache ||= []
|
30
30
|
@cache << row
|
@@ -48,7 +48,7 @@ describe Chicago::Flow::TransformationChain do
|
|
48
48
|
end
|
49
49
|
|
50
50
|
it "can cope with a filter returning nil" do
|
51
|
-
described_class.new(Chicago::
|
51
|
+
described_class.new(Chicago::ETL::Filter.new,
|
52
52
|
dup_row.new, add_1_to_a.new).process({:a => 1}).
|
53
53
|
should == []
|
54
54
|
end
|
@@ -56,7 +56,7 @@ describe Chicago::Flow::TransformationChain do
|
|
56
56
|
it "can write to different streams" do
|
57
57
|
described_class.new(dup_row.new(:onto => :other),
|
58
58
|
add_1_to_a.new).process({:a => 1}).
|
59
|
-
should == [{:a => 2}, {:a => 1, Chicago::
|
59
|
+
should == [{:a => 2}, {:a => 1, Chicago::ETL::STREAM => :other}]
|
60
60
|
end
|
61
61
|
|
62
62
|
it "knows what streams it writes to as a chain" do
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe Chicago::
|
3
|
+
describe Chicago::ETL::Transformation do
|
4
4
|
let(:add_1_to_a) {
|
5
5
|
Class.new(described_class) {
|
6
6
|
def process_row(row)
|
@@ -45,8 +45,8 @@ describe Chicago::Flow::Transformation do
|
|
45
45
|
|
46
46
|
it "can apply to all streams using :all" do
|
47
47
|
add_1_to_a.new(:all).process({:a => 1}).should == {:a => 2}
|
48
|
-
add_1_to_a.new(:all).process({:a => 1, Chicago::
|
49
|
-
should == {:a => 2, Chicago::
|
48
|
+
add_1_to_a.new(:all).process({:a => 1, Chicago::ETL::STREAM => :other}).
|
49
|
+
should == {:a => 2, Chicago::ETL::STREAM => :other}
|
50
50
|
end
|
51
51
|
|
52
52
|
it "can be flushed" do
|
@@ -16,7 +16,7 @@ describe Chicago::ETL::Transformations::DemultiplexErrors do
|
|
16
16
|
it "adds the errors onto the error stream" do
|
17
17
|
subject.process(:_errors => [{:error => 1}]).last.should == {
|
18
18
|
:error => 1,
|
19
|
-
Chicago::
|
19
|
+
Chicago::ETL::STREAM => :error
|
20
20
|
}
|
21
21
|
end
|
22
22
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chicago-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 2
|
9
|
+
- 0
|
10
|
+
version: 0.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Roland Swingler
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2013-11-
|
18
|
+
date: 2013-11-18 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
version_requirements: &id001 !ruby/object:Gem::Requirement
|
@@ -239,14 +239,24 @@ files:
|
|
239
239
|
- chicago-flow.gemspec
|
240
240
|
- lib/chicago-etl.rb
|
241
241
|
- lib/chicago/etl.rb
|
242
|
+
- lib/chicago/etl/array_sink.rb
|
243
|
+
- lib/chicago/etl/array_source.rb
|
242
244
|
- lib/chicago/etl/batch.rb
|
243
245
|
- lib/chicago/etl/core_extensions.rb
|
244
246
|
- lib/chicago/etl/counter.rb
|
245
247
|
- lib/chicago/etl/dataset_batch_stage.rb
|
246
248
|
- lib/chicago/etl/dataset_builder.rb
|
249
|
+
- lib/chicago/etl/dataset_source.rb
|
250
|
+
- lib/chicago/etl/errors.rb
|
251
|
+
- lib/chicago/etl/filter.rb
|
247
252
|
- lib/chicago/etl/key_builder.rb
|
248
253
|
- lib/chicago/etl/load_dataset_builder.rb
|
254
|
+
- lib/chicago/etl/mysql.rb
|
255
|
+
- lib/chicago/etl/mysql_file_serializer.rb
|
256
|
+
- lib/chicago/etl/mysql_file_sink.rb
|
257
|
+
- lib/chicago/etl/null_sink.rb
|
249
258
|
- lib/chicago/etl/pipeline.rb
|
259
|
+
- lib/chicago/etl/pipeline_endpoint.rb
|
250
260
|
- lib/chicago/etl/schema_sinks_and_transformations_builder.rb
|
251
261
|
- lib/chicago/etl/schema_table_sink_factory.rb
|
252
262
|
- lib/chicago/etl/screens/column_screen.rb
|
@@ -255,37 +265,34 @@ files:
|
|
255
265
|
- lib/chicago/etl/screens/out_of_bounds.rb
|
256
266
|
- lib/chicago/etl/sequel/dependant_tables.rb
|
257
267
|
- lib/chicago/etl/sequel/filter_to_etl_batch.rb
|
268
|
+
- lib/chicago/etl/sink.rb
|
258
269
|
- lib/chicago/etl/stage.rb
|
259
270
|
- lib/chicago/etl/stage_builder.rb
|
260
271
|
- lib/chicago/etl/table_builder.rb
|
261
272
|
- lib/chicago/etl/task_invocation.rb
|
262
273
|
- lib/chicago/etl/tasks.rb
|
274
|
+
- lib/chicago/etl/transformation.rb
|
275
|
+
- lib/chicago/etl/transformation_chain.rb
|
263
276
|
- lib/chicago/etl/transformations.rb
|
264
277
|
- lib/chicago/etl/transformations/deduplicate_rows.rb
|
265
278
|
- lib/chicago/etl/transformations/uk_post_code.rb
|
266
279
|
- lib/chicago/etl/transformations/uk_post_code_field.rb
|
267
|
-
- lib/chicago/flow/array_sink.rb
|
268
|
-
- lib/chicago/flow/array_source.rb
|
269
|
-
- lib/chicago/flow/dataset_source.rb
|
270
|
-
- lib/chicago/flow/errors.rb
|
271
|
-
- lib/chicago/flow/filter.rb
|
272
|
-
- lib/chicago/flow/mysql.rb
|
273
|
-
- lib/chicago/flow/mysql_file_serializer.rb
|
274
|
-
- lib/chicago/flow/mysql_file_sink.rb
|
275
|
-
- lib/chicago/flow/null_sink.rb
|
276
|
-
- lib/chicago/flow/pipeline_endpoint.rb
|
277
|
-
- lib/chicago/flow/sink.rb
|
278
|
-
- lib/chicago/flow/transformation.rb
|
279
|
-
- lib/chicago/flow/transformation_chain.rb
|
280
280
|
- spec/db_connections.yml.dist
|
281
|
+
- spec/etl/array_sink_spec.rb
|
282
|
+
- spec/etl/array_source_spec.rb
|
281
283
|
- spec/etl/batch_spec.rb
|
282
284
|
- spec/etl/core_extensions_spec.rb
|
283
285
|
- spec/etl/counter_spec.rb
|
286
|
+
- spec/etl/dataset_source_spec.rb
|
284
287
|
- spec/etl/define_dimension_stage_spec.rb
|
285
288
|
- spec/etl/define_stage_spec.rb
|
286
289
|
- spec/etl/etl_batch_id_dataset_filter.rb
|
290
|
+
- spec/etl/filter_spec.rb
|
287
291
|
- spec/etl/key_builder_spec.rb
|
288
292
|
- spec/etl/load_dataset_builder_spec.rb
|
293
|
+
- spec/etl/mysql_file_serializer_spec.rb
|
294
|
+
- spec/etl/mysql_file_sink_spec.rb
|
295
|
+
- spec/etl/mysql_integration_spec.rb
|
289
296
|
- spec/etl/pipeline_stage_builder_spec.rb
|
290
297
|
- spec/etl/schema_table_sink_factory_spec.rb
|
291
298
|
- spec/etl/screens/invalid_element_spec.rb
|
@@ -296,19 +303,12 @@ files:
|
|
296
303
|
- spec/etl/stage_spec.rb
|
297
304
|
- spec/etl/table_builder_spec.rb
|
298
305
|
- spec/etl/task_spec.rb
|
306
|
+
- spec/etl/transformation_chain_spec.rb
|
307
|
+
- spec/etl/transformation_spec.rb
|
299
308
|
- spec/etl/transformations/deduplicate_rows_spec.rb
|
300
309
|
- spec/etl/transformations/uk_post_code_field_spec.rb
|
301
310
|
- spec/etl/transformations/uk_post_code_spec.rb
|
302
311
|
- spec/etl/transformations_spec.rb
|
303
|
-
- spec/flow/array_sink_spec.rb
|
304
|
-
- spec/flow/array_source_spec.rb
|
305
|
-
- spec/flow/dataset_source_spec.rb
|
306
|
-
- spec/flow/filter_spec.rb
|
307
|
-
- spec/flow/mysql_file_serializer_spec.rb
|
308
|
-
- spec/flow/mysql_file_sink_spec.rb
|
309
|
-
- spec/flow/mysql_integration_spec.rb
|
310
|
-
- spec/flow/transformation_chain_spec.rb
|
311
|
-
- spec/flow/transformation_spec.rb
|
312
312
|
- spec/spec_helper.rb
|
313
313
|
homepage: http://github.com/notonthehighstreet/chicago-etl
|
314
314
|
licenses:
|
data/lib/chicago/flow/mysql.rb
DELETED