chicago-etl 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/chicago-etl.gemspec +24 -24
- data/lib/chicago/{flow → etl}/array_sink.rb +1 -1
- data/lib/chicago/{flow → etl}/array_source.rb +1 -1
- data/lib/chicago/etl/dataset_batch_stage.rb +0 -1
- data/lib/chicago/{flow → etl}/dataset_source.rb +1 -1
- data/lib/chicago/{flow → etl}/errors.rb +1 -1
- data/lib/chicago/{flow → etl}/filter.rb +1 -1
- data/lib/chicago/etl/mysql.rb +4 -0
- data/lib/chicago/{flow → etl}/mysql_file_serializer.rb +1 -1
- data/lib/chicago/{flow → etl}/mysql_file_sink.rb +1 -1
- data/lib/chicago/{flow → etl}/null_sink.rb +1 -1
- data/lib/chicago/{flow → etl}/pipeline_endpoint.rb +1 -1
- data/lib/chicago/etl/schema_sinks_and_transformations_builder.rb +1 -1
- data/lib/chicago/etl/schema_table_sink_factory.rb +11 -11
- data/lib/chicago/etl/screens/column_screen.rb +1 -1
- data/lib/chicago/{flow → etl}/sink.rb +1 -1
- data/lib/chicago/etl/stage.rb +2 -3
- data/lib/chicago/{flow → etl}/transformation.rb +1 -1
- data/lib/chicago/{flow → etl}/transformation_chain.rb +1 -1
- data/lib/chicago/etl/transformations/deduplicate_rows.rb +1 -1
- data/lib/chicago/etl/transformations.rb +5 -5
- data/lib/chicago/etl.rb +14 -11
- data/spec/{flow → etl}/array_sink_spec.rb +1 -1
- data/spec/{flow → etl}/array_source_spec.rb +1 -1
- data/spec/{flow → etl}/dataset_source_spec.rb +1 -1
- data/spec/etl/define_stage_spec.rb +6 -6
- data/spec/{flow → etl}/filter_spec.rb +1 -1
- data/spec/{flow → etl}/mysql_file_serializer_spec.rb +1 -1
- data/spec/{flow → etl}/mysql_file_sink_spec.rb +2 -2
- data/spec/{flow → etl}/mysql_integration_spec.rb +4 -4
- data/spec/etl/schema_table_sink_factory_spec.rb +1 -1
- data/spec/{flow → etl}/transformation_chain_spec.rb +6 -6
- data/spec/{flow → etl}/transformation_spec.rb +3 -3
- data/spec/etl/transformations_spec.rb +1 -1
- metadata +27 -27
- data/lib/chicago/flow/mysql.rb +0 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/chicago-etl.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "chicago-etl"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Roland Swingler"]
|
12
|
-
s.date = "2013-11-
|
12
|
+
s.date = "2013-11-18"
|
13
13
|
s.description = "ETL tools for Chicago"
|
14
14
|
s.email = "roland.swingler@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -28,14 +28,24 @@ Gem::Specification.new do |s|
|
|
28
28
|
"chicago-flow.gemspec",
|
29
29
|
"lib/chicago-etl.rb",
|
30
30
|
"lib/chicago/etl.rb",
|
31
|
+
"lib/chicago/etl/array_sink.rb",
|
32
|
+
"lib/chicago/etl/array_source.rb",
|
31
33
|
"lib/chicago/etl/batch.rb",
|
32
34
|
"lib/chicago/etl/core_extensions.rb",
|
33
35
|
"lib/chicago/etl/counter.rb",
|
34
36
|
"lib/chicago/etl/dataset_batch_stage.rb",
|
35
37
|
"lib/chicago/etl/dataset_builder.rb",
|
38
|
+
"lib/chicago/etl/dataset_source.rb",
|
39
|
+
"lib/chicago/etl/errors.rb",
|
40
|
+
"lib/chicago/etl/filter.rb",
|
36
41
|
"lib/chicago/etl/key_builder.rb",
|
37
42
|
"lib/chicago/etl/load_dataset_builder.rb",
|
43
|
+
"lib/chicago/etl/mysql.rb",
|
44
|
+
"lib/chicago/etl/mysql_file_serializer.rb",
|
45
|
+
"lib/chicago/etl/mysql_file_sink.rb",
|
46
|
+
"lib/chicago/etl/null_sink.rb",
|
38
47
|
"lib/chicago/etl/pipeline.rb",
|
48
|
+
"lib/chicago/etl/pipeline_endpoint.rb",
|
39
49
|
"lib/chicago/etl/schema_sinks_and_transformations_builder.rb",
|
40
50
|
"lib/chicago/etl/schema_table_sink_factory.rb",
|
41
51
|
"lib/chicago/etl/screens/column_screen.rb",
|
@@ -44,37 +54,34 @@ Gem::Specification.new do |s|
|
|
44
54
|
"lib/chicago/etl/screens/out_of_bounds.rb",
|
45
55
|
"lib/chicago/etl/sequel/dependant_tables.rb",
|
46
56
|
"lib/chicago/etl/sequel/filter_to_etl_batch.rb",
|
57
|
+
"lib/chicago/etl/sink.rb",
|
47
58
|
"lib/chicago/etl/stage.rb",
|
48
59
|
"lib/chicago/etl/stage_builder.rb",
|
49
60
|
"lib/chicago/etl/table_builder.rb",
|
50
61
|
"lib/chicago/etl/task_invocation.rb",
|
51
62
|
"lib/chicago/etl/tasks.rb",
|
63
|
+
"lib/chicago/etl/transformation.rb",
|
64
|
+
"lib/chicago/etl/transformation_chain.rb",
|
52
65
|
"lib/chicago/etl/transformations.rb",
|
53
66
|
"lib/chicago/etl/transformations/deduplicate_rows.rb",
|
54
67
|
"lib/chicago/etl/transformations/uk_post_code.rb",
|
55
68
|
"lib/chicago/etl/transformations/uk_post_code_field.rb",
|
56
|
-
"lib/chicago/flow/array_sink.rb",
|
57
|
-
"lib/chicago/flow/array_source.rb",
|
58
|
-
"lib/chicago/flow/dataset_source.rb",
|
59
|
-
"lib/chicago/flow/errors.rb",
|
60
|
-
"lib/chicago/flow/filter.rb",
|
61
|
-
"lib/chicago/flow/mysql.rb",
|
62
|
-
"lib/chicago/flow/mysql_file_serializer.rb",
|
63
|
-
"lib/chicago/flow/mysql_file_sink.rb",
|
64
|
-
"lib/chicago/flow/null_sink.rb",
|
65
|
-
"lib/chicago/flow/pipeline_endpoint.rb",
|
66
|
-
"lib/chicago/flow/sink.rb",
|
67
|
-
"lib/chicago/flow/transformation.rb",
|
68
|
-
"lib/chicago/flow/transformation_chain.rb",
|
69
69
|
"spec/db_connections.yml.dist",
|
70
|
+
"spec/etl/array_sink_spec.rb",
|
71
|
+
"spec/etl/array_source_spec.rb",
|
70
72
|
"spec/etl/batch_spec.rb",
|
71
73
|
"spec/etl/core_extensions_spec.rb",
|
72
74
|
"spec/etl/counter_spec.rb",
|
75
|
+
"spec/etl/dataset_source_spec.rb",
|
73
76
|
"spec/etl/define_dimension_stage_spec.rb",
|
74
77
|
"spec/etl/define_stage_spec.rb",
|
75
78
|
"spec/etl/etl_batch_id_dataset_filter.rb",
|
79
|
+
"spec/etl/filter_spec.rb",
|
76
80
|
"spec/etl/key_builder_spec.rb",
|
77
81
|
"spec/etl/load_dataset_builder_spec.rb",
|
82
|
+
"spec/etl/mysql_file_serializer_spec.rb",
|
83
|
+
"spec/etl/mysql_file_sink_spec.rb",
|
84
|
+
"spec/etl/mysql_integration_spec.rb",
|
78
85
|
"spec/etl/pipeline_stage_builder_spec.rb",
|
79
86
|
"spec/etl/schema_table_sink_factory_spec.rb",
|
80
87
|
"spec/etl/screens/invalid_element_spec.rb",
|
@@ -85,19 +92,12 @@ Gem::Specification.new do |s|
|
|
85
92
|
"spec/etl/stage_spec.rb",
|
86
93
|
"spec/etl/table_builder_spec.rb",
|
87
94
|
"spec/etl/task_spec.rb",
|
95
|
+
"spec/etl/transformation_chain_spec.rb",
|
96
|
+
"spec/etl/transformation_spec.rb",
|
88
97
|
"spec/etl/transformations/deduplicate_rows_spec.rb",
|
89
98
|
"spec/etl/transformations/uk_post_code_field_spec.rb",
|
90
99
|
"spec/etl/transformations/uk_post_code_spec.rb",
|
91
100
|
"spec/etl/transformations_spec.rb",
|
92
|
-
"spec/flow/array_sink_spec.rb",
|
93
|
-
"spec/flow/array_source_spec.rb",
|
94
|
-
"spec/flow/dataset_source_spec.rb",
|
95
|
-
"spec/flow/filter_spec.rb",
|
96
|
-
"spec/flow/mysql_file_serializer_spec.rb",
|
97
|
-
"spec/flow/mysql_file_sink_spec.rb",
|
98
|
-
"spec/flow/mysql_integration_spec.rb",
|
99
|
-
"spec/flow/transformation_chain_spec.rb",
|
100
|
-
"spec/flow/transformation_spec.rb",
|
101
101
|
"spec/spec_helper.rb"
|
102
102
|
]
|
103
103
|
s.homepage = "http://github.com/notonthehighstreet/chicago-etl"
|
@@ -13,10 +13,10 @@ module Chicago
|
|
13
13
|
# Pass an :exclude option if you don't want all columns of the
|
14
14
|
# schema table to be loaded via this sink.
|
15
15
|
def sink(options={})
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
16
|
+
MysqlFileSink.new(@db,
|
17
|
+
@schema_table.table_name,
|
18
|
+
load_columns(options[:exclude]),
|
19
|
+
mysql_options(options))
|
20
20
|
end
|
21
21
|
|
22
22
|
# Returns a sink to load data into the MySQL table backing the
|
@@ -26,20 +26,20 @@ module Chicago
|
|
26
26
|
# schema table's key table name will be used otherwise.
|
27
27
|
def key_sink(options={})
|
28
28
|
table = options.delete(:table) || @schema_table.key_table_name
|
29
|
-
sink =
|
30
|
-
|
31
|
-
|
32
|
-
|
29
|
+
sink = MysqlFileSink.new(@db,
|
30
|
+
table,
|
31
|
+
[:original_id, :dimension_id],
|
32
|
+
mysql_options(options))
|
33
33
|
sink.truncation_strategy = lambda do
|
34
34
|
# No Op - we want to maintain keys to avoid having to sort
|
35
35
|
# out fact tables.
|
36
36
|
end
|
37
37
|
sink
|
38
38
|
end
|
39
|
-
|
39
|
+
|
40
40
|
# Returns a sink to load errors generated in the ETL process.
|
41
41
|
def error_sink(options={})
|
42
|
-
sink =
|
42
|
+
sink = MysqlFileSink.
|
43
43
|
new(@db, :etl_error_log,
|
44
44
|
[:column, :row_id, :error, :severity, :error_detail], mysql_options(options)).
|
45
45
|
set_constant_values(:table => @schema_table.table_name.to_s,
|
@@ -53,7 +53,7 @@ module Chicago
|
|
53
53
|
end
|
54
54
|
sink
|
55
55
|
end
|
56
|
-
|
56
|
+
|
57
57
|
private
|
58
58
|
|
59
59
|
def load_columns(exclude=nil)
|
data/lib/chicago/etl/stage.rb
CHANGED
@@ -39,7 +39,7 @@ module Chicago
|
|
39
39
|
filtered_dataset = reextract ? source :
|
40
40
|
@filter_strategy.call(source, etl_batch)
|
41
41
|
|
42
|
-
|
42
|
+
DatasetSource.new(filtered_dataset)
|
43
43
|
end
|
44
44
|
|
45
45
|
private
|
@@ -58,8 +58,7 @@ module Chicago
|
|
58
58
|
end
|
59
59
|
|
60
60
|
def transformation_chain
|
61
|
-
@transformation_chain ||=
|
62
|
-
new(*@transformations)
|
61
|
+
@transformation_chain ||= TransformationChain.new(*@transformations)
|
63
62
|
end
|
64
63
|
|
65
64
|
def process_row(row)
|
@@ -2,7 +2,7 @@ module Chicago
|
|
2
2
|
module ETL
|
3
3
|
module Transformations
|
4
4
|
# Filters rows so they only get output once, based on a :key.
|
5
|
-
class WrittenRowFilter <
|
5
|
+
class WrittenRowFilter < Transformation
|
6
6
|
requires_options :key
|
7
7
|
|
8
8
|
def initialize(*args)
|
@@ -30,7 +30,7 @@ module Chicago
|
|
30
30
|
# :_errors field.
|
31
31
|
#
|
32
32
|
# Pass the :key_builder option to set the KeyBuilder.
|
33
|
-
class AddKey <
|
33
|
+
class AddKey < Transformation
|
34
34
|
requires_options :key_builder
|
35
35
|
adds_fields :id
|
36
36
|
|
@@ -57,7 +57,7 @@ module Chicago
|
|
57
57
|
end
|
58
58
|
|
59
59
|
# Removes embedded :_errors and puts them on the error stream.
|
60
|
-
class DemultiplexErrors <
|
60
|
+
class DemultiplexErrors < Transformation
|
61
61
|
def output_streams
|
62
62
|
[:default, :error]
|
63
63
|
end
|
@@ -73,7 +73,7 @@ module Chicago
|
|
73
73
|
|
74
74
|
# Removes a field from the row, and creates a row on a
|
75
75
|
# designated key stream
|
76
|
-
class DimensionKeyMapping <
|
76
|
+
class DimensionKeyMapping < Transformation
|
77
77
|
requires_options :original_key, :key_table
|
78
78
|
|
79
79
|
def removed_fields
|
@@ -103,7 +103,7 @@ module Chicago
|
|
103
103
|
end
|
104
104
|
|
105
105
|
# Adds a hash of the specified columns as a field in the row.
|
106
|
-
class HashColumns <
|
106
|
+
class HashColumns < Transformation
|
107
107
|
requires_options :columns
|
108
108
|
|
109
109
|
def process_row(row)
|
data/lib/chicago/etl.rb
CHANGED
@@ -6,17 +6,17 @@ else
|
|
6
6
|
end
|
7
7
|
|
8
8
|
require 'sequel'
|
9
|
-
require 'chicago/
|
10
|
-
require 'chicago/
|
11
|
-
require 'chicago/
|
12
|
-
require 'chicago/
|
13
|
-
require 'chicago/
|
14
|
-
require 'chicago/
|
15
|
-
require 'chicago/
|
16
|
-
require 'chicago/
|
17
|
-
require 'chicago/
|
18
|
-
require 'chicago/
|
19
|
-
require 'chicago/
|
9
|
+
require 'chicago/etl/errors'
|
10
|
+
require 'chicago/etl/transformation'
|
11
|
+
require 'chicago/etl/filter'
|
12
|
+
require 'chicago/etl/transformation_chain'
|
13
|
+
require 'chicago/etl/pipeline_endpoint'
|
14
|
+
require 'chicago/etl/array_source'
|
15
|
+
require 'chicago/etl/dataset_source'
|
16
|
+
require 'chicago/etl/sink'
|
17
|
+
require 'chicago/etl/array_sink'
|
18
|
+
require 'chicago/etl/null_sink'
|
19
|
+
require 'chicago/etl/mysql'
|
20
20
|
|
21
21
|
require 'chicago/etl/core_extensions'
|
22
22
|
require 'chicago/etl/counter'
|
@@ -66,4 +66,7 @@ module Chicago
|
|
66
66
|
end
|
67
67
|
end
|
68
68
|
end
|
69
|
+
|
70
|
+
# Deprecated, allows clients to transition when they like.
|
71
|
+
Flow = ETL
|
69
72
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require "spec_helper"
|
2
2
|
|
3
|
-
class TestTransformation < Chicago::
|
3
|
+
class TestTransformation < Chicago::ETL::Transformation
|
4
4
|
def output_streams
|
5
5
|
[:another_stream]
|
6
6
|
end
|
@@ -23,8 +23,8 @@ describe "defining and executing a stage" do
|
|
23
23
|
end
|
24
24
|
|
25
25
|
sinks do
|
26
|
-
add Chicago::
|
27
|
-
add Chicago::
|
26
|
+
add Chicago::ETL::ArraySink.new(:test)
|
27
|
+
add Chicago::ETL::ArraySink.new(:test), :stream => :another_stream
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
@@ -50,8 +50,8 @@ describe "defining and executing a stage" do
|
|
50
50
|
end
|
51
51
|
|
52
52
|
sinks do
|
53
|
-
add Chicago::
|
54
|
-
add Chicago::
|
53
|
+
add Chicago::ETL::ArraySink.new(:test)
|
54
|
+
add Chicago::ETL::ArraySink.new(:test), :stream => :another_stream
|
55
55
|
end
|
56
56
|
end
|
57
57
|
|
@@ -78,7 +78,7 @@ describe "defining and executing a stage" do
|
|
78
78
|
end
|
79
79
|
|
80
80
|
sinks do
|
81
|
-
add Chicago::
|
81
|
+
add Chicago::ETL::ArraySink.new(:test)
|
82
82
|
end
|
83
83
|
|
84
84
|
filter_strategy do |source, etl_batch|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
require 'sequel'
|
3
3
|
|
4
|
-
describe Chicago::
|
4
|
+
describe Chicago::ETL::MysqlFileSink do
|
5
5
|
let(:dataset) { mock(:dataset).as_null_object }
|
6
6
|
let(:db) { mock(:db, :[] => dataset, :schema => []) }
|
7
7
|
let(:csv) { mock(:csv) }
|
@@ -29,7 +29,7 @@ describe Chicago::Flow::MysqlFileSink do
|
|
29
29
|
end
|
30
30
|
|
31
31
|
it "serializes values before writing to the file" do
|
32
|
-
Chicago::
|
32
|
+
Chicago::ETL::MysqlFileSerializer.any_instance.
|
33
33
|
should_receive(:serialize).with(1).and_return(1)
|
34
34
|
sink << {:foo => 1}
|
35
35
|
end
|
@@ -2,7 +2,7 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe "Mysql -> Mysql through transformation chain" do
|
4
4
|
let(:dup_row) {
|
5
|
-
Class.new(Chicago::
|
5
|
+
Class.new(Chicago::ETL::Transformation) {
|
6
6
|
def output_streams
|
7
7
|
[:default, @options[:onto]].flatten
|
8
8
|
end
|
@@ -46,15 +46,15 @@ describe "Mysql -> Mysql through transformation chain" do
|
|
46
46
|
TEST_DB[:source].multi_insert([{:foo => nil, :bin => :unhex.sql_function("1F")},
|
47
47
|
{:foo => "Hello", :bin => :unhex.sql_function("1F")}])
|
48
48
|
|
49
|
-
source = Chicago::
|
49
|
+
source = Chicago::ETL::DatasetSource.
|
50
50
|
new(TEST_DB[:source].
|
51
51
|
select(:id, :foo, :hex.sql_function(:bin).as(:bin)))
|
52
52
|
|
53
53
|
transformations = [dup_row.new(:onto => :other)]
|
54
54
|
|
55
|
-
sink_1 = Chicago::
|
55
|
+
sink_1 = Chicago::ETL::MysqlFileSink.
|
56
56
|
new(TEST_DB, :destination, [:id, :foo, :bin])
|
57
|
-
sink_2 = Chicago::
|
57
|
+
sink_2 = Chicago::ETL::ArraySink.new([:id, :foo, :bin])
|
58
58
|
|
59
59
|
stage = Chicago::ETL::Stage.new(:test,
|
60
60
|
:source => source,
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe Chicago::
|
3
|
+
describe Chicago::ETL::TransformationChain do
|
4
4
|
let(:add_1_to_a) {
|
5
|
-
Class.new(Chicago::
|
5
|
+
Class.new(Chicago::ETL::Transformation) {
|
6
6
|
def process_row(row)
|
7
7
|
row[:a] += 1
|
8
8
|
row
|
@@ -11,7 +11,7 @@ describe Chicago::Flow::TransformationChain do
|
|
11
11
|
}
|
12
12
|
|
13
13
|
let(:dup_row) {
|
14
|
-
Class.new(Chicago::
|
14
|
+
Class.new(Chicago::ETL::Transformation) {
|
15
15
|
def output_streams
|
16
16
|
[:default, @options[:onto]].flatten
|
17
17
|
end
|
@@ -24,7 +24,7 @@ describe Chicago::Flow::TransformationChain do
|
|
24
24
|
}
|
25
25
|
|
26
26
|
let(:store_until_flush) {
|
27
|
-
Class.new(Chicago::
|
27
|
+
Class.new(Chicago::ETL::Transformation) {
|
28
28
|
def process_row(row)
|
29
29
|
@cache ||= []
|
30
30
|
@cache << row
|
@@ -48,7 +48,7 @@ describe Chicago::Flow::TransformationChain do
|
|
48
48
|
end
|
49
49
|
|
50
50
|
it "can cope with a filter returning nil" do
|
51
|
-
described_class.new(Chicago::
|
51
|
+
described_class.new(Chicago::ETL::Filter.new,
|
52
52
|
dup_row.new, add_1_to_a.new).process({:a => 1}).
|
53
53
|
should == []
|
54
54
|
end
|
@@ -56,7 +56,7 @@ describe Chicago::Flow::TransformationChain do
|
|
56
56
|
it "can write to different streams" do
|
57
57
|
described_class.new(dup_row.new(:onto => :other),
|
58
58
|
add_1_to_a.new).process({:a => 1}).
|
59
|
-
should == [{:a => 2}, {:a => 1, Chicago::
|
59
|
+
should == [{:a => 2}, {:a => 1, Chicago::ETL::STREAM => :other}]
|
60
60
|
end
|
61
61
|
|
62
62
|
it "knows what streams it writes to as a chain" do
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe Chicago::
|
3
|
+
describe Chicago::ETL::Transformation do
|
4
4
|
let(:add_1_to_a) {
|
5
5
|
Class.new(described_class) {
|
6
6
|
def process_row(row)
|
@@ -45,8 +45,8 @@ describe Chicago::Flow::Transformation do
|
|
45
45
|
|
46
46
|
it "can apply to all streams using :all" do
|
47
47
|
add_1_to_a.new(:all).process({:a => 1}).should == {:a => 2}
|
48
|
-
add_1_to_a.new(:all).process({:a => 1, Chicago::
|
49
|
-
should == {:a => 2, Chicago::
|
48
|
+
add_1_to_a.new(:all).process({:a => 1, Chicago::ETL::STREAM => :other}).
|
49
|
+
should == {:a => 2, Chicago::ETL::STREAM => :other}
|
50
50
|
end
|
51
51
|
|
52
52
|
it "can be flushed" do
|
@@ -16,7 +16,7 @@ describe Chicago::ETL::Transformations::DemultiplexErrors do
|
|
16
16
|
it "adds the errors onto the error stream" do
|
17
17
|
subject.process(:_errors => [{:error => 1}]).last.should == {
|
18
18
|
:error => 1,
|
19
|
-
Chicago::
|
19
|
+
Chicago::ETL::STREAM => :error
|
20
20
|
}
|
21
21
|
end
|
22
22
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chicago-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 2
|
9
|
+
- 0
|
10
|
+
version: 0.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Roland Swingler
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2013-11-
|
18
|
+
date: 2013-11-18 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
version_requirements: &id001 !ruby/object:Gem::Requirement
|
@@ -239,14 +239,24 @@ files:
|
|
239
239
|
- chicago-flow.gemspec
|
240
240
|
- lib/chicago-etl.rb
|
241
241
|
- lib/chicago/etl.rb
|
242
|
+
- lib/chicago/etl/array_sink.rb
|
243
|
+
- lib/chicago/etl/array_source.rb
|
242
244
|
- lib/chicago/etl/batch.rb
|
243
245
|
- lib/chicago/etl/core_extensions.rb
|
244
246
|
- lib/chicago/etl/counter.rb
|
245
247
|
- lib/chicago/etl/dataset_batch_stage.rb
|
246
248
|
- lib/chicago/etl/dataset_builder.rb
|
249
|
+
- lib/chicago/etl/dataset_source.rb
|
250
|
+
- lib/chicago/etl/errors.rb
|
251
|
+
- lib/chicago/etl/filter.rb
|
247
252
|
- lib/chicago/etl/key_builder.rb
|
248
253
|
- lib/chicago/etl/load_dataset_builder.rb
|
254
|
+
- lib/chicago/etl/mysql.rb
|
255
|
+
- lib/chicago/etl/mysql_file_serializer.rb
|
256
|
+
- lib/chicago/etl/mysql_file_sink.rb
|
257
|
+
- lib/chicago/etl/null_sink.rb
|
249
258
|
- lib/chicago/etl/pipeline.rb
|
259
|
+
- lib/chicago/etl/pipeline_endpoint.rb
|
250
260
|
- lib/chicago/etl/schema_sinks_and_transformations_builder.rb
|
251
261
|
- lib/chicago/etl/schema_table_sink_factory.rb
|
252
262
|
- lib/chicago/etl/screens/column_screen.rb
|
@@ -255,37 +265,34 @@ files:
|
|
255
265
|
- lib/chicago/etl/screens/out_of_bounds.rb
|
256
266
|
- lib/chicago/etl/sequel/dependant_tables.rb
|
257
267
|
- lib/chicago/etl/sequel/filter_to_etl_batch.rb
|
268
|
+
- lib/chicago/etl/sink.rb
|
258
269
|
- lib/chicago/etl/stage.rb
|
259
270
|
- lib/chicago/etl/stage_builder.rb
|
260
271
|
- lib/chicago/etl/table_builder.rb
|
261
272
|
- lib/chicago/etl/task_invocation.rb
|
262
273
|
- lib/chicago/etl/tasks.rb
|
274
|
+
- lib/chicago/etl/transformation.rb
|
275
|
+
- lib/chicago/etl/transformation_chain.rb
|
263
276
|
- lib/chicago/etl/transformations.rb
|
264
277
|
- lib/chicago/etl/transformations/deduplicate_rows.rb
|
265
278
|
- lib/chicago/etl/transformations/uk_post_code.rb
|
266
279
|
- lib/chicago/etl/transformations/uk_post_code_field.rb
|
267
|
-
- lib/chicago/flow/array_sink.rb
|
268
|
-
- lib/chicago/flow/array_source.rb
|
269
|
-
- lib/chicago/flow/dataset_source.rb
|
270
|
-
- lib/chicago/flow/errors.rb
|
271
|
-
- lib/chicago/flow/filter.rb
|
272
|
-
- lib/chicago/flow/mysql.rb
|
273
|
-
- lib/chicago/flow/mysql_file_serializer.rb
|
274
|
-
- lib/chicago/flow/mysql_file_sink.rb
|
275
|
-
- lib/chicago/flow/null_sink.rb
|
276
|
-
- lib/chicago/flow/pipeline_endpoint.rb
|
277
|
-
- lib/chicago/flow/sink.rb
|
278
|
-
- lib/chicago/flow/transformation.rb
|
279
|
-
- lib/chicago/flow/transformation_chain.rb
|
280
280
|
- spec/db_connections.yml.dist
|
281
|
+
- spec/etl/array_sink_spec.rb
|
282
|
+
- spec/etl/array_source_spec.rb
|
281
283
|
- spec/etl/batch_spec.rb
|
282
284
|
- spec/etl/core_extensions_spec.rb
|
283
285
|
- spec/etl/counter_spec.rb
|
286
|
+
- spec/etl/dataset_source_spec.rb
|
284
287
|
- spec/etl/define_dimension_stage_spec.rb
|
285
288
|
- spec/etl/define_stage_spec.rb
|
286
289
|
- spec/etl/etl_batch_id_dataset_filter.rb
|
290
|
+
- spec/etl/filter_spec.rb
|
287
291
|
- spec/etl/key_builder_spec.rb
|
288
292
|
- spec/etl/load_dataset_builder_spec.rb
|
293
|
+
- spec/etl/mysql_file_serializer_spec.rb
|
294
|
+
- spec/etl/mysql_file_sink_spec.rb
|
295
|
+
- spec/etl/mysql_integration_spec.rb
|
289
296
|
- spec/etl/pipeline_stage_builder_spec.rb
|
290
297
|
- spec/etl/schema_table_sink_factory_spec.rb
|
291
298
|
- spec/etl/screens/invalid_element_spec.rb
|
@@ -296,19 +303,12 @@ files:
|
|
296
303
|
- spec/etl/stage_spec.rb
|
297
304
|
- spec/etl/table_builder_spec.rb
|
298
305
|
- spec/etl/task_spec.rb
|
306
|
+
- spec/etl/transformation_chain_spec.rb
|
307
|
+
- spec/etl/transformation_spec.rb
|
299
308
|
- spec/etl/transformations/deduplicate_rows_spec.rb
|
300
309
|
- spec/etl/transformations/uk_post_code_field_spec.rb
|
301
310
|
- spec/etl/transformations/uk_post_code_spec.rb
|
302
311
|
- spec/etl/transformations_spec.rb
|
303
|
-
- spec/flow/array_sink_spec.rb
|
304
|
-
- spec/flow/array_source_spec.rb
|
305
|
-
- spec/flow/dataset_source_spec.rb
|
306
|
-
- spec/flow/filter_spec.rb
|
307
|
-
- spec/flow/mysql_file_serializer_spec.rb
|
308
|
-
- spec/flow/mysql_file_sink_spec.rb
|
309
|
-
- spec/flow/mysql_integration_spec.rb
|
310
|
-
- spec/flow/transformation_chain_spec.rb
|
311
|
-
- spec/flow/transformation_spec.rb
|
312
312
|
- spec/spec_helper.rb
|
313
313
|
homepage: http://github.com/notonthehighstreet/chicago-etl
|
314
314
|
licenses:
|
data/lib/chicago/flow/mysql.rb
DELETED