chicago-etl 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/Gemfile +0 -1
- data/VERSION +1 -1
- data/chicago-etl.gemspec +8 -9
- data/lib/chicago/etl.rb +1 -0
- data/lib/chicago/etl/pipeline.rb +16 -19
- data/lib/chicago/etl/row_transformation_stage.rb +78 -0
- data/lib/chicago/etl/schema_table_stage_builder.rb +18 -4
- data/lib/chicago/etl/stage.rb +26 -67
- data/lib/chicago/etl/stage_builder.rb +10 -9
- data/spec/etl/define_dimension_stage_spec.rb +6 -2
- data/spec/etl/mysql_integration_spec.rb +7 -7
- data/spec/etl/{stage_spec.rb → row_transformation_stage_spec.rb} +5 -9
- metadata +166 -202
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
MTdkYTQ0NmIxNGQ0ODU2NTAzMWU4NThjYzhjZWMwOTVjNmU4NjRhNg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
NDA3N2Y0Y2Y3ZTk1ZGQ2Mzg0NDdkMjA4YzRjODMwMDlmZTRjOWZkMw==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
MTJiYzg5MGFlMjcwNTBlOTc5N2RkOGM3ZmVmMzIwNTMyN2FmYWQ2ODgwMmYz
|
10
|
+
YTI2MGYwZGQ5ZGNkZWU0MzNlOWNmNTlhODg0NTRkNzU5NDhlYTZjNzcxOWRi
|
11
|
+
ODE3NGNiNWZjMjg3NzJkNzc4OWRhYThlOWUwMjBkNzkzMmYwMWQ=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
MDg3ZDQ0ZjhjZTYyZjMzYmFhNWUwOWJlNTAzMzBkOTdjNGFlMzU2NTZkMzA5
|
14
|
+
ZjcwNDBmYTg0YTUyZDE5MThhY2IxYzQwYzM1NzE1MzcwYWEyM2FhYTllYWU3
|
15
|
+
OTkwZmJlMDEwMTQzNjhjMmNjMTAxYWRjYmY3ZjNjZGY5ZDIwYjg=
|
data/Gemfile
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.5
|
data/chicago-etl.gemspec
CHANGED
@@ -2,14 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
+
# stub: chicago-etl 0.2.5 ruby lib
|
5
6
|
|
6
7
|
Gem::Specification.new do |s|
|
7
8
|
s.name = "chicago-etl"
|
8
|
-
s.version = "0.2.
|
9
|
+
s.version = "0.2.5"
|
9
10
|
|
10
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
|
+
s.require_paths = ["lib"]
|
11
13
|
s.authors = ["Roland Swingler"]
|
12
|
-
s.date = "2014-
|
14
|
+
s.date = "2014-08-07"
|
13
15
|
s.description = "ETL tools for Chicago"
|
14
16
|
s.email = "roland.swingler@gmail.com"
|
15
17
|
s.extra_rdoc_files = [
|
@@ -45,6 +47,7 @@ Gem::Specification.new do |s|
|
|
45
47
|
"lib/chicago/etl/null_sink.rb",
|
46
48
|
"lib/chicago/etl/pipeline.rb",
|
47
49
|
"lib/chicago/etl/pipeline_endpoint.rb",
|
50
|
+
"lib/chicago/etl/row_transformation_stage.rb",
|
48
51
|
"lib/chicago/etl/schema_sinks_and_transformations_builder.rb",
|
49
52
|
"lib/chicago/etl/schema_table_sink_factory.rb",
|
50
53
|
"lib/chicago/etl/schema_table_stage_builder.rb",
|
@@ -85,6 +88,7 @@ Gem::Specification.new do |s|
|
|
85
88
|
"spec/etl/mysql_file_sink_spec.rb",
|
86
89
|
"spec/etl/mysql_integration_spec.rb",
|
87
90
|
"spec/etl/pipeline_stage_builder_spec.rb",
|
91
|
+
"spec/etl/row_transformation_stage_spec.rb",
|
88
92
|
"spec/etl/schema_table_sink_factory_spec.rb",
|
89
93
|
"spec/etl/screens/invalid_element_spec.rb",
|
90
94
|
"spec/etl/screens/missing_value_spec.rb",
|
@@ -92,7 +96,6 @@ Gem::Specification.new do |s|
|
|
92
96
|
"spec/etl/sequel/dependant_tables_spec.rb",
|
93
97
|
"spec/etl/sequel/filter_to_etl_batch_spec.rb",
|
94
98
|
"spec/etl/stage_name_spec.rb",
|
95
|
-
"spec/etl/stage_spec.rb",
|
96
99
|
"spec/etl/table_builder_spec.rb",
|
97
100
|
"spec/etl/task_spec.rb",
|
98
101
|
"spec/etl/transformation_chain_spec.rb",
|
@@ -105,12 +108,11 @@ Gem::Specification.new do |s|
|
|
105
108
|
]
|
106
109
|
s.homepage = "http://github.com/notonthehighstreet/chicago-etl"
|
107
110
|
s.licenses = ["MIT"]
|
108
|
-
s.
|
109
|
-
s.rubygems_version = "1.8.25"
|
111
|
+
s.rubygems_version = "2.2.1"
|
110
112
|
s.summary = "Chicago ETL"
|
111
113
|
|
112
114
|
if s.respond_to? :specification_version then
|
113
|
-
s.specification_version =
|
115
|
+
s.specification_version = 4
|
114
116
|
|
115
117
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
116
118
|
s.add_runtime_dependency(%q<chicagowarehouse>, [">= 0.4.6", "~> 0.4"])
|
@@ -122,7 +124,6 @@ Gem::Specification.new do |s|
|
|
122
124
|
s.add_development_dependency(%q<yard>, [">= 0"])
|
123
125
|
s.add_development_dependency(%q<flog>, [">= 0"])
|
124
126
|
s.add_development_dependency(%q<simplecov>, [">= 0"])
|
125
|
-
s.add_development_dependency(%q<ZenTest>, [">= 0"])
|
126
127
|
s.add_development_dependency(%q<mysql2>, [">= 0"])
|
127
128
|
s.add_development_dependency(%q<bundler>, ["~> 1"])
|
128
129
|
s.add_development_dependency(%q<jeweler>, [">= 0"])
|
@@ -136,7 +137,6 @@ Gem::Specification.new do |s|
|
|
136
137
|
s.add_dependency(%q<yard>, [">= 0"])
|
137
138
|
s.add_dependency(%q<flog>, [">= 0"])
|
138
139
|
s.add_dependency(%q<simplecov>, [">= 0"])
|
139
|
-
s.add_dependency(%q<ZenTest>, [">= 0"])
|
140
140
|
s.add_dependency(%q<mysql2>, [">= 0"])
|
141
141
|
s.add_dependency(%q<bundler>, ["~> 1"])
|
142
142
|
s.add_dependency(%q<jeweler>, [">= 0"])
|
@@ -151,7 +151,6 @@ Gem::Specification.new do |s|
|
|
151
151
|
s.add_dependency(%q<yard>, [">= 0"])
|
152
152
|
s.add_dependency(%q<flog>, [">= 0"])
|
153
153
|
s.add_dependency(%q<simplecov>, [">= 0"])
|
154
|
-
s.add_dependency(%q<ZenTest>, [">= 0"])
|
155
154
|
s.add_dependency(%q<mysql2>, [">= 0"])
|
156
155
|
s.add_dependency(%q<bundler>, ["~> 1"])
|
157
156
|
s.add_dependency(%q<jeweler>, [">= 0"])
|
data/lib/chicago/etl.rb
CHANGED
@@ -28,6 +28,7 @@ require 'chicago/etl/transformations'
|
|
28
28
|
require 'chicago/etl/load_dataset_builder'
|
29
29
|
require 'chicago/etl/dataset_builder'
|
30
30
|
require 'chicago/etl/stage'
|
31
|
+
require 'chicago/etl/row_transformation_stage'
|
31
32
|
require 'chicago/etl/stage_builder'
|
32
33
|
require 'chicago/etl/schema_sinks_and_transformations_builder'
|
33
34
|
require 'chicago/etl/pipeline'
|
data/lib/chicago/etl/pipeline.rb
CHANGED
@@ -6,40 +6,37 @@ module Chicago
|
|
6
6
|
attr_reader :stages
|
7
7
|
|
8
8
|
# Creates a pipeline for a Schema.
|
9
|
-
def initialize(db, schema)
|
9
|
+
def initialize(db, schema, &block)
|
10
10
|
@schema, @db = schema, db
|
11
11
|
@stages = Chicago::Schema::NamedElementCollection.new
|
12
|
+
@builder_class_factory = block || lambda {|name, options| StageBuilder }
|
12
13
|
end
|
13
14
|
|
14
15
|
# Defines a generic stage in the pipeline.
|
15
16
|
def define_stage(*args, &block)
|
16
17
|
options = args.last.kind_of?(Hash) ? args.pop : {}
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
@stages << build_dimension_load_stage(name, options, &block)
|
22
|
-
elsif name =~ [:load, :facts]
|
23
|
-
@stages << build_fact_load_stage(name, options, &block)
|
19
|
+
if args.last.kind_of?(Stage)
|
20
|
+
stage = args.pop
|
21
|
+
name = StageName.new(args)
|
24
22
|
else
|
25
|
-
|
23
|
+
name = StageName.new(args)
|
24
|
+
stage = build_stage(name, options, &block)
|
26
25
|
end
|
27
|
-
end
|
28
26
|
|
29
|
-
|
30
|
-
SchemaTableStageBuilder.new(@db, schema_table).build(name, &block)
|
31
|
-
end
|
27
|
+
stage.name = StageName.new(args)
|
32
28
|
|
33
|
-
|
29
|
+
@stages << stage
|
30
|
+
end
|
34
31
|
|
35
|
-
def
|
36
|
-
|
37
|
-
build_stage(name, @schema.dimension(dimension_name), &block)
|
32
|
+
def build_stage(name, options, &block)
|
33
|
+
builder(name, options).build(name, options, &block)
|
38
34
|
end
|
39
35
|
|
40
|
-
|
41
|
-
|
42
|
-
|
36
|
+
private
|
37
|
+
|
38
|
+
def builder(name, options)
|
39
|
+
@builder_class_factory.call(name, options).new(@db, @schema)
|
43
40
|
end
|
44
41
|
end
|
45
42
|
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module Chicago
|
2
|
+
module ETL
|
3
|
+
# A Stage that passes source rows through a transformation chain.
|
4
|
+
#
|
5
|
+
# All rows are read into Ruby and then written to sinks after
|
6
|
+
# passing through 0 or more Transformations.
|
7
|
+
class RowTransformationStage < Stage
|
8
|
+
# Returns the source for this stage.
|
9
|
+
attr_reader :source
|
10
|
+
|
11
|
+
def initialize(options={})
|
12
|
+
super
|
13
|
+
@source = options[:source]
|
14
|
+
@sinks = options[:sinks]
|
15
|
+
@transformations = options[:transformations] || []
|
16
|
+
@filter_strategy = options[:filter_strategy] || lambda {|s, _| s }
|
17
|
+
|
18
|
+
validate_arguments
|
19
|
+
end
|
20
|
+
|
21
|
+
# Executes this stage in the context of an ETL::Batch
|
22
|
+
def perform_execution(etl_batch)
|
23
|
+
transform_and_load filtered_source(etl_batch)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Returns the named sink, if it exists
|
27
|
+
def sink(name)
|
28
|
+
@sinks[name.to_sym]
|
29
|
+
end
|
30
|
+
|
31
|
+
def sinks
|
32
|
+
@sinks.values
|
33
|
+
end
|
34
|
+
|
35
|
+
# @api private
|
36
|
+
def filtered_source(etl_batch)
|
37
|
+
filtered_dataset = etl_batch.reextracting? ? source :
|
38
|
+
@filter_strategy.call(source, etl_batch)
|
39
|
+
|
40
|
+
DatasetSource.new(filtered_dataset)
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def transform_and_load(source)
|
46
|
+
sinks.each(&:open)
|
47
|
+
pipe_rows_to_sinks_from(source)
|
48
|
+
sinks.each(&:close)
|
49
|
+
end
|
50
|
+
|
51
|
+
def pipe_rows_to_sinks_from(source)
|
52
|
+
source.each do |row|
|
53
|
+
transformation_chain.process(row).each {|row| process_row(row) }
|
54
|
+
end
|
55
|
+
transformation_chain.flush.each {|row| process_row(row) }
|
56
|
+
end
|
57
|
+
|
58
|
+
def transformation_chain
|
59
|
+
@transformation_chain ||= TransformationChain.new(*@transformations)
|
60
|
+
end
|
61
|
+
|
62
|
+
def process_row(row)
|
63
|
+
stream = row.delete(:_stream) || :default
|
64
|
+
@sinks[stream] << row
|
65
|
+
end
|
66
|
+
|
67
|
+
def validate_arguments
|
68
|
+
if @source.nil?
|
69
|
+
raise ArgumentError, "Stage #{@name} requires a source"
|
70
|
+
end
|
71
|
+
|
72
|
+
if @sinks.blank?
|
73
|
+
raise ArgumentError, "Stage #{@name} requires at least one sink"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -7,11 +7,11 @@ module Chicago
|
|
7
7
|
# Clients shouldn't need to instantiate this directly, but instead
|
8
8
|
# call the protected methods in the context of defining a Pipeline
|
9
9
|
class SchemaTableStageBuilder < StageBuilder
|
10
|
-
|
11
|
-
def initialize(db, schema_table)
|
12
|
-
super(db)
|
10
|
+
def build(name, options, &block)
|
13
11
|
@wrapped_builder = SchemaSinksAndTransformationsBuilder.
|
14
|
-
new(@db, schema_table)
|
12
|
+
new(@db, schema_table(name, options))
|
13
|
+
|
14
|
+
super
|
15
15
|
end
|
16
16
|
|
17
17
|
protected
|
@@ -47,5 +47,19 @@ module Chicago
|
|
47
47
|
}
|
48
48
|
end
|
49
49
|
end
|
50
|
+
|
51
|
+
class LoadDimensionStageBuilder < SchemaTableStageBuilder
|
52
|
+
def schema_table(name, options)
|
53
|
+
dimension_name = options[:dimension] || name.name
|
54
|
+
@schema.dimension(dimension_name)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
class LoadFactStageBuilder < SchemaTableStageBuilder
|
59
|
+
def schema_table(name, options)
|
60
|
+
fact_name = options[:dimension] || name.name
|
61
|
+
@schema.fact(fact_name)
|
62
|
+
end
|
63
|
+
end
|
50
64
|
end
|
51
65
|
end
|
data/lib/chicago/etl/stage.rb
CHANGED
@@ -1,30 +1,24 @@
|
|
1
1
|
module Chicago
|
2
2
|
module ETL
|
3
|
-
# A Stage in the ETL
|
3
|
+
# A Stage in the ETL Pipeline.
|
4
4
|
#
|
5
|
-
#
|
6
|
-
#
|
5
|
+
# Stage subclasses vary in how they perform their execution - some
|
6
|
+
# stages may pipe rows from a source to sinks, others may perform
|
7
|
+
# direct in-database updates.
|
8
|
+
#
|
9
|
+
# @abstract
|
7
10
|
class Stage
|
8
|
-
#
|
9
|
-
|
10
|
-
|
11
|
-
# Returns the name of this stage.
|
12
|
-
attr_reader :name
|
11
|
+
# The name of this stage.
|
12
|
+
attr_accessor :name
|
13
13
|
|
14
|
-
def initialize(
|
15
|
-
@name = name
|
16
|
-
@source = options[:source]
|
17
|
-
@sinks = options[:sinks]
|
18
|
-
@transformations = options[:transformations] || []
|
19
|
-
@filter_strategy = options[:filter_strategy] || lambda {|s, _| s }
|
20
|
-
@pre_execution_strategies = options[:pre_execution_strategies] || []
|
14
|
+
def initialize(options={})
|
21
15
|
@executable = options.has_key?(:executable) ? options[:executable] : true
|
22
|
-
|
23
|
-
validate_arguments
|
16
|
+
@pre_execution_strategies = options[:pre_execution_strategies] || []
|
24
17
|
end
|
25
|
-
|
18
|
+
|
26
19
|
# Returns the unqualified name of this stage.
|
27
20
|
def task_name
|
21
|
+
raise "This Stage has not been bound to a name" if @name.nil?
|
28
22
|
name.name
|
29
23
|
end
|
30
24
|
|
@@ -32,28 +26,25 @@ module Chicago
|
|
32
26
|
def executable?
|
33
27
|
@executable
|
34
28
|
end
|
35
|
-
|
36
|
-
# Executes this stage in the context of an ETL::Batch
|
29
|
+
|
30
|
+
# Executes this stage in the context of an ETL::Batch.
|
31
|
+
#
|
32
|
+
# This should not be overridden by subclasses; perform_execution
|
33
|
+
# should be changed instead.
|
37
34
|
def execute(etl_batch)
|
38
35
|
prepare_stage(etl_batch)
|
39
|
-
|
40
|
-
end
|
41
|
-
|
42
|
-
# Returns the named sink, if it exists
|
43
|
-
def sink(name)
|
44
|
-
@sinks[name.to_sym]
|
36
|
+
perform_execution(etl_batch)
|
45
37
|
end
|
46
38
|
|
47
|
-
|
48
|
-
@sinks.values
|
49
|
-
end
|
50
|
-
|
51
|
-
# @api private
|
52
|
-
def filtered_source(etl_batch)
|
53
|
-
filtered_dataset = etl_batch.reextracting? ? source :
|
54
|
-
@filter_strategy.call(source, etl_batch)
|
39
|
+
protected
|
55
40
|
|
56
|
-
|
41
|
+
# Does the actual work involved in executing this stage.
|
42
|
+
#
|
43
|
+
# This should be overridden by subclasses.
|
44
|
+
#
|
45
|
+
# @abstract
|
46
|
+
def perform_execution(etl_batch)
|
47
|
+
raise "perform_execution method has not been overridden."
|
57
48
|
end
|
58
49
|
|
59
50
|
private
|
@@ -63,38 +54,6 @@ module Chicago
|
|
63
54
|
strategy.call(self, etl_batch)
|
64
55
|
end
|
65
56
|
end
|
66
|
-
|
67
|
-
def transform_and_load(source)
|
68
|
-
sinks.each(&:open)
|
69
|
-
pipe_rows_to_sinks_from(source)
|
70
|
-
sinks.each(&:close)
|
71
|
-
end
|
72
|
-
|
73
|
-
def pipe_rows_to_sinks_from(source)
|
74
|
-
source.each do |row|
|
75
|
-
transformation_chain.process(row).each {|row| process_row(row) }
|
76
|
-
end
|
77
|
-
transformation_chain.flush.each {|row| process_row(row) }
|
78
|
-
end
|
79
|
-
|
80
|
-
def transformation_chain
|
81
|
-
@transformation_chain ||= TransformationChain.new(*@transformations)
|
82
|
-
end
|
83
|
-
|
84
|
-
def process_row(row)
|
85
|
-
stream = row.delete(:_stream) || :default
|
86
|
-
@sinks[stream] << row
|
87
|
-
end
|
88
|
-
|
89
|
-
def validate_arguments
|
90
|
-
if @source.nil?
|
91
|
-
raise ArgumentError, "Stage #{@name} requires a source"
|
92
|
-
end
|
93
|
-
|
94
|
-
if @sinks.blank?
|
95
|
-
raise ArgumentError, "Stage #{@name} requires at least one sink"
|
96
|
-
end
|
97
|
-
end
|
98
57
|
end
|
99
58
|
end
|
100
59
|
end
|
@@ -1,24 +1,25 @@
|
|
1
1
|
module Chicago
|
2
2
|
module ETL
|
3
3
|
class StageBuilder
|
4
|
-
def initialize(db)
|
4
|
+
def initialize(db, schema)
|
5
5
|
@db = db
|
6
|
+
@schema = schema
|
6
7
|
end
|
7
8
|
|
8
|
-
def build(name, &block)
|
9
|
+
def build(name, options, &block)
|
9
10
|
@pre_execution_strategies = []
|
10
11
|
@executable = true
|
11
12
|
|
12
13
|
instance_eval &block
|
13
14
|
set_default_stage_values
|
14
15
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
16
|
+
RowTransformationStage.
|
17
|
+
new(:source => @dataset,
|
18
|
+
:sinks => @sinks,
|
19
|
+
:transformations => @transformations,
|
20
|
+
:filter_strategy => @filter_strategy,
|
21
|
+
:pre_execution_strategies => @pre_execution_strategies,
|
22
|
+
:executable => @executable)
|
22
23
|
end
|
23
24
|
|
24
25
|
protected
|
@@ -15,8 +15,12 @@ describe "creating and running a dimension stage" do
|
|
15
15
|
schema
|
16
16
|
}
|
17
17
|
|
18
|
-
let(:pipeline) {
|
19
|
-
|
18
|
+
let(:pipeline) {
|
19
|
+
Chicago::ETL::Pipeline.new(db, schema) do |name, options|
|
20
|
+
Chicago::ETL::LoadDimensionStageBuilder
|
21
|
+
end
|
22
|
+
}
|
23
|
+
|
20
24
|
it "glues the source, transformations, and sink correctly" do
|
21
25
|
pipeline.define_stage(:load, :dimensions, :test) do
|
22
26
|
source do
|
@@ -56,13 +56,13 @@ describe "Mysql -> Mysql through transformation chain" do
|
|
56
56
|
new(TEST_DB, :destination, [:id, :foo, :bin])
|
57
57
|
sink_2 = Chicago::ETL::ArraySink.new([:id, :foo, :bin])
|
58
58
|
|
59
|
-
stage = Chicago::ETL::
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
59
|
+
stage = Chicago::ETL::RowTransformationStage.
|
60
|
+
new(:source => source,
|
61
|
+
:transformations => transformations,
|
62
|
+
:sinks => {
|
63
|
+
:default => sink_1,
|
64
|
+
:other => sink_2
|
65
|
+
})
|
66
66
|
|
67
67
|
stage.execute(double(:etl_batch, :reextracting? => true))
|
68
68
|
|
@@ -1,27 +1,24 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe Chicago::ETL::
|
3
|
+
describe Chicago::ETL::RowTransformationStage do
|
4
4
|
let(:etl_batch) { double(:etl_batch, :reextracting? => true) }
|
5
5
|
|
6
6
|
it "requires a source" do
|
7
7
|
expect {
|
8
|
-
described_class.new(:
|
9
|
-
:source => nil,
|
8
|
+
described_class.new(:source => nil,
|
10
9
|
:sinks => {:default => double(:sink)})
|
11
10
|
}.to raise_error(ArgumentError)
|
12
11
|
end
|
13
12
|
|
14
13
|
it "requires sinks" do
|
15
14
|
expect {
|
16
|
-
described_class.new(:
|
17
|
-
:source => double(:source),
|
15
|
+
described_class.new(:source => double(:source),
|
18
16
|
:sinks => nil)
|
19
17
|
}.to raise_error(ArgumentError)
|
20
18
|
end
|
21
19
|
|
22
20
|
it "does not filter the dataset if re-extracting" do
|
23
|
-
stage = described_class.new(:
|
24
|
-
:source => double(:source),
|
21
|
+
stage = described_class.new(:source => double(:source),
|
25
22
|
:sinks => {:default => double(:sink)},
|
26
23
|
:filter_strategy => lambda { fail })
|
27
24
|
|
@@ -33,8 +30,7 @@ describe Chicago::ETL::Stage do
|
|
33
30
|
sink.should_receive(:open)
|
34
31
|
sink.should_receive(:close)
|
35
32
|
|
36
|
-
stage = described_class.new(:
|
37
|
-
:source => [],
|
33
|
+
stage = described_class.new(:source => [],
|
38
34
|
:sinks => {:default => sink})
|
39
35
|
|
40
36
|
stage.execute(etl_batch)
|
metadata
CHANGED
@@ -1,225 +1,197 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: chicago-etl
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 2
|
9
|
-
- 4
|
10
|
-
version: 0.2.4
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.5
|
11
5
|
platform: ruby
|
12
|
-
authors:
|
6
|
+
authors:
|
13
7
|
- Roland Swingler
|
14
8
|
autorequire:
|
15
9
|
bindir: bin
|
16
10
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
hash: 3
|
27
|
-
segments:
|
28
|
-
- 0
|
29
|
-
- 4
|
30
|
-
- 6
|
11
|
+
date: 2014-08-07 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: chicagowarehouse
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ! '>='
|
18
|
+
- !ruby/object:Gem::Version
|
31
19
|
version: 0.4.6
|
32
20
|
- - ~>
|
33
|
-
- !ruby/object:Gem::Version
|
34
|
-
|
35
|
-
segments:
|
36
|
-
- 0
|
37
|
-
- 4
|
38
|
-
version: "0.4"
|
39
|
-
requirement: *id001
|
40
|
-
type: :runtime
|
41
|
-
prerelease: false
|
42
|
-
name: chicagowarehouse
|
43
|
-
- !ruby/object:Gem::Dependency
|
44
|
-
version_requirements: &id002 !ruby/object:Gem::Requirement
|
45
|
-
none: false
|
46
|
-
requirements:
|
47
|
-
- - ">="
|
48
|
-
- !ruby/object:Gem::Version
|
49
|
-
hash: 3
|
50
|
-
segments:
|
51
|
-
- 0
|
52
|
-
version: "0"
|
53
|
-
requirement: *id002
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '0.4'
|
54
23
|
type: :runtime
|
55
24
|
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.4.6
|
30
|
+
- - ~>
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0.4'
|
33
|
+
- !ruby/object:Gem::Dependency
|
56
34
|
name: fastercsv
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
- !ruby/object:Gem::Version
|
63
|
-
hash: 27
|
64
|
-
segments:
|
65
|
-
- 0
|
66
|
-
- 0
|
67
|
-
- 2
|
68
|
-
version: 0.0.2
|
69
|
-
requirement: *id003
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ! '>='
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
70
40
|
type: :runtime
|
71
41
|
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ! '>='
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0'
|
47
|
+
- !ruby/object:Gem::Dependency
|
72
48
|
name: sequel_load_data_infile
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
- !ruby/object:Gem::Version
|
79
|
-
hash: 3
|
80
|
-
segments:
|
81
|
-
- 0
|
82
|
-
version: "0"
|
83
|
-
requirement: *id004
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 0.0.2
|
84
54
|
type: :runtime
|
85
55
|
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ! '>='
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: 0.0.2
|
61
|
+
- !ruby/object:Gem::Dependency
|
86
62
|
name: sequel_fast_columns
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
hash: 7
|
94
|
-
segments:
|
95
|
-
- 2
|
96
|
-
version: "2"
|
97
|
-
requirement: *id005
|
98
|
-
type: :development
|
63
|
+
requirement: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ! '>='
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0'
|
68
|
+
type: :runtime
|
99
69
|
prerelease: false
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ! '>='
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
- !ruby/object:Gem::Dependency
|
100
76
|
name: rspec
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
- !ruby/object:Gem::Version
|
107
|
-
hash: 3
|
108
|
-
segments:
|
109
|
-
- 0
|
110
|
-
version: "0"
|
111
|
-
requirement: *id006
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - ~>
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '2'
|
112
82
|
type: :development
|
113
83
|
prerelease: false
|
84
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ~>
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '2'
|
89
|
+
- !ruby/object:Gem::Dependency
|
114
90
|
name: timecop
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
- !ruby/object:Gem::Version
|
121
|
-
hash: 3
|
122
|
-
segments:
|
123
|
-
- 0
|
124
|
-
version: "0"
|
125
|
-
requirement: *id007
|
91
|
+
requirement: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ! '>='
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
126
96
|
type: :development
|
127
97
|
prerelease: false
|
98
|
+
version_requirements: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ! '>='
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '0'
|
103
|
+
- !ruby/object:Gem::Dependency
|
128
104
|
name: yard
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
- !ruby/object:Gem::Version
|
135
|
-
hash: 3
|
136
|
-
segments:
|
137
|
-
- 0
|
138
|
-
version: "0"
|
139
|
-
requirement: *id008
|
105
|
+
requirement: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
140
110
|
type: :development
|
141
111
|
prerelease: false
|
112
|
+
version_requirements: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - ! '>='
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '0'
|
117
|
+
- !ruby/object:Gem::Dependency
|
142
118
|
name: flog
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
- !ruby/object:Gem::Version
|
149
|
-
hash: 3
|
150
|
-
segments:
|
151
|
-
- 0
|
152
|
-
version: "0"
|
153
|
-
requirement: *id009
|
119
|
+
requirement: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - ! '>='
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '0'
|
154
124
|
type: :development
|
155
125
|
prerelease: false
|
126
|
+
version_requirements: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - ! '>='
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '0'
|
131
|
+
- !ruby/object:Gem::Dependency
|
156
132
|
name: simplecov
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
- !ruby/object:Gem::Version
|
163
|
-
hash: 3
|
164
|
-
segments:
|
165
|
-
- 0
|
166
|
-
version: "0"
|
167
|
-
requirement: *id010
|
168
|
-
type: :development
|
169
|
-
prerelease: false
|
170
|
-
name: ZenTest
|
171
|
-
- !ruby/object:Gem::Dependency
|
172
|
-
version_requirements: &id011 !ruby/object:Gem::Requirement
|
173
|
-
none: false
|
174
|
-
requirements:
|
175
|
-
- - ">="
|
176
|
-
- !ruby/object:Gem::Version
|
177
|
-
hash: 3
|
178
|
-
segments:
|
179
|
-
- 0
|
180
|
-
version: "0"
|
181
|
-
requirement: *id011
|
133
|
+
requirement: !ruby/object:Gem::Requirement
|
134
|
+
requirements:
|
135
|
+
- - ! '>='
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
version: '0'
|
182
138
|
type: :development
|
183
139
|
prerelease: false
|
140
|
+
version_requirements: !ruby/object:Gem::Requirement
|
141
|
+
requirements:
|
142
|
+
- - ! '>='
|
143
|
+
- !ruby/object:Gem::Version
|
144
|
+
version: '0'
|
145
|
+
- !ruby/object:Gem::Dependency
|
184
146
|
name: mysql2
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
- !ruby/object:Gem::Version
|
191
|
-
hash: 1
|
192
|
-
segments:
|
193
|
-
- 1
|
194
|
-
version: "1"
|
195
|
-
requirement: *id012
|
147
|
+
requirement: !ruby/object:Gem::Requirement
|
148
|
+
requirements:
|
149
|
+
- - ! '>='
|
150
|
+
- !ruby/object:Gem::Version
|
151
|
+
version: '0'
|
196
152
|
type: :development
|
197
153
|
prerelease: false
|
154
|
+
version_requirements: !ruby/object:Gem::Requirement
|
155
|
+
requirements:
|
156
|
+
- - ! '>='
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '0'
|
159
|
+
- !ruby/object:Gem::Dependency
|
198
160
|
name: bundler
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
- !ruby/object:Gem::Version
|
205
|
-
hash: 3
|
206
|
-
segments:
|
207
|
-
- 0
|
208
|
-
version: "0"
|
209
|
-
requirement: *id013
|
161
|
+
requirement: !ruby/object:Gem::Requirement
|
162
|
+
requirements:
|
163
|
+
- - ~>
|
164
|
+
- !ruby/object:Gem::Version
|
165
|
+
version: '1'
|
210
166
|
type: :development
|
211
167
|
prerelease: false
|
168
|
+
version_requirements: !ruby/object:Gem::Requirement
|
169
|
+
requirements:
|
170
|
+
- - ~>
|
171
|
+
- !ruby/object:Gem::Version
|
172
|
+
version: '1'
|
173
|
+
- !ruby/object:Gem::Dependency
|
212
174
|
name: jeweler
|
175
|
+
requirement: !ruby/object:Gem::Requirement
|
176
|
+
requirements:
|
177
|
+
- - ! '>='
|
178
|
+
- !ruby/object:Gem::Version
|
179
|
+
version: '0'
|
180
|
+
type: :development
|
181
|
+
prerelease: false
|
182
|
+
version_requirements: !ruby/object:Gem::Requirement
|
183
|
+
requirements:
|
184
|
+
- - ! '>='
|
185
|
+
- !ruby/object:Gem::Version
|
186
|
+
version: '0'
|
213
187
|
description: ETL tools for Chicago
|
214
188
|
email: roland.swingler@gmail.com
|
215
189
|
executables: []
|
216
|
-
|
217
190
|
extensions: []
|
218
|
-
|
219
|
-
extra_rdoc_files:
|
191
|
+
extra_rdoc_files:
|
220
192
|
- LICENSE.txt
|
221
193
|
- README.rdoc
|
222
|
-
files:
|
194
|
+
files:
|
223
195
|
- .document
|
224
196
|
- .rspec
|
225
197
|
- Gemfile
|
@@ -248,6 +220,7 @@ files:
|
|
248
220
|
- lib/chicago/etl/null_sink.rb
|
249
221
|
- lib/chicago/etl/pipeline.rb
|
250
222
|
- lib/chicago/etl/pipeline_endpoint.rb
|
223
|
+
- lib/chicago/etl/row_transformation_stage.rb
|
251
224
|
- lib/chicago/etl/schema_sinks_and_transformations_builder.rb
|
252
225
|
- lib/chicago/etl/schema_table_sink_factory.rb
|
253
226
|
- lib/chicago/etl/schema_table_stage_builder.rb
|
@@ -288,6 +261,7 @@ files:
|
|
288
261
|
- spec/etl/mysql_file_sink_spec.rb
|
289
262
|
- spec/etl/mysql_integration_spec.rb
|
290
263
|
- spec/etl/pipeline_stage_builder_spec.rb
|
264
|
+
- spec/etl/row_transformation_stage_spec.rb
|
291
265
|
- spec/etl/schema_table_sink_factory_spec.rb
|
292
266
|
- spec/etl/screens/invalid_element_spec.rb
|
293
267
|
- spec/etl/screens/missing_value_spec.rb
|
@@ -295,7 +269,6 @@ files:
|
|
295
269
|
- spec/etl/sequel/dependant_tables_spec.rb
|
296
270
|
- spec/etl/sequel/filter_to_etl_batch_spec.rb
|
297
271
|
- spec/etl/stage_name_spec.rb
|
298
|
-
- spec/etl/stage_spec.rb
|
299
272
|
- spec/etl/table_builder_spec.rb
|
300
273
|
- spec/etl/task_spec.rb
|
301
274
|
- spec/etl/transformation_chain_spec.rb
|
@@ -306,37 +279,28 @@ files:
|
|
306
279
|
- spec/etl/transformations_spec.rb
|
307
280
|
- spec/spec_helper.rb
|
308
281
|
homepage: http://github.com/notonthehighstreet/chicago-etl
|
309
|
-
licenses:
|
282
|
+
licenses:
|
310
283
|
- MIT
|
284
|
+
metadata: {}
|
311
285
|
post_install_message:
|
312
286
|
rdoc_options: []
|
313
|
-
|
314
|
-
require_paths:
|
287
|
+
require_paths:
|
315
288
|
- lib
|
316
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
none: false
|
327
|
-
requirements:
|
328
|
-
- - ">="
|
329
|
-
- !ruby/object:Gem::Version
|
330
|
-
hash: 3
|
331
|
-
segments:
|
332
|
-
- 0
|
333
|
-
version: "0"
|
289
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
290
|
+
requirements:
|
291
|
+
- - ! '>='
|
292
|
+
- !ruby/object:Gem::Version
|
293
|
+
version: '0'
|
294
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
295
|
+
requirements:
|
296
|
+
- - ! '>='
|
297
|
+
- !ruby/object:Gem::Version
|
298
|
+
version: '0'
|
334
299
|
requirements: []
|
335
|
-
|
336
300
|
rubyforge_project:
|
337
|
-
rubygems_version:
|
301
|
+
rubygems_version: 2.2.1
|
338
302
|
signing_key:
|
339
|
-
specification_version:
|
303
|
+
specification_version: 4
|
340
304
|
summary: Chicago ETL
|
341
305
|
test_files: []
|
342
|
-
|
306
|
+
has_rdoc:
|