chicago-etl 0.2.4 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/Gemfile +0 -1
- data/VERSION +1 -1
- data/chicago-etl.gemspec +8 -9
- data/lib/chicago/etl.rb +1 -0
- data/lib/chicago/etl/pipeline.rb +16 -19
- data/lib/chicago/etl/row_transformation_stage.rb +78 -0
- data/lib/chicago/etl/schema_table_stage_builder.rb +18 -4
- data/lib/chicago/etl/stage.rb +26 -67
- data/lib/chicago/etl/stage_builder.rb +10 -9
- data/spec/etl/define_dimension_stage_spec.rb +6 -2
- data/spec/etl/mysql_integration_spec.rb +7 -7
- data/spec/etl/{stage_spec.rb → row_transformation_stage_spec.rb} +5 -9
- metadata +166 -202
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
MTdkYTQ0NmIxNGQ0ODU2NTAzMWU4NThjYzhjZWMwOTVjNmU4NjRhNg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
NDA3N2Y0Y2Y3ZTk1ZGQ2Mzg0NDdkMjA4YzRjODMwMDlmZTRjOWZkMw==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
MTJiYzg5MGFlMjcwNTBlOTc5N2RkOGM3ZmVmMzIwNTMyN2FmYWQ2ODgwMmYz
|
10
|
+
YTI2MGYwZGQ5ZGNkZWU0MzNlOWNmNTlhODg0NTRkNzU5NDhlYTZjNzcxOWRi
|
11
|
+
ODE3NGNiNWZjMjg3NzJkNzc4OWRhYThlOWUwMjBkNzkzMmYwMWQ=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
MDg3ZDQ0ZjhjZTYyZjMzYmFhNWUwOWJlNTAzMzBkOTdjNGFlMzU2NTZkMzA5
|
14
|
+
ZjcwNDBmYTg0YTUyZDE5MThhY2IxYzQwYzM1NzE1MzcwYWEyM2FhYTllYWU3
|
15
|
+
OTkwZmJlMDEwMTQzNjhjMmNjMTAxYWRjYmY3ZjNjZGY5ZDIwYjg=
|
data/Gemfile
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.5
|
data/chicago-etl.gemspec
CHANGED
@@ -2,14 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
+
# stub: chicago-etl 0.2.5 ruby lib
|
5
6
|
|
6
7
|
Gem::Specification.new do |s|
|
7
8
|
s.name = "chicago-etl"
|
8
|
-
s.version = "0.2.
|
9
|
+
s.version = "0.2.5"
|
9
10
|
|
10
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
|
+
s.require_paths = ["lib"]
|
11
13
|
s.authors = ["Roland Swingler"]
|
12
|
-
s.date = "2014-
|
14
|
+
s.date = "2014-08-07"
|
13
15
|
s.description = "ETL tools for Chicago"
|
14
16
|
s.email = "roland.swingler@gmail.com"
|
15
17
|
s.extra_rdoc_files = [
|
@@ -45,6 +47,7 @@ Gem::Specification.new do |s|
|
|
45
47
|
"lib/chicago/etl/null_sink.rb",
|
46
48
|
"lib/chicago/etl/pipeline.rb",
|
47
49
|
"lib/chicago/etl/pipeline_endpoint.rb",
|
50
|
+
"lib/chicago/etl/row_transformation_stage.rb",
|
48
51
|
"lib/chicago/etl/schema_sinks_and_transformations_builder.rb",
|
49
52
|
"lib/chicago/etl/schema_table_sink_factory.rb",
|
50
53
|
"lib/chicago/etl/schema_table_stage_builder.rb",
|
@@ -85,6 +88,7 @@ Gem::Specification.new do |s|
|
|
85
88
|
"spec/etl/mysql_file_sink_spec.rb",
|
86
89
|
"spec/etl/mysql_integration_spec.rb",
|
87
90
|
"spec/etl/pipeline_stage_builder_spec.rb",
|
91
|
+
"spec/etl/row_transformation_stage_spec.rb",
|
88
92
|
"spec/etl/schema_table_sink_factory_spec.rb",
|
89
93
|
"spec/etl/screens/invalid_element_spec.rb",
|
90
94
|
"spec/etl/screens/missing_value_spec.rb",
|
@@ -92,7 +96,6 @@ Gem::Specification.new do |s|
|
|
92
96
|
"spec/etl/sequel/dependant_tables_spec.rb",
|
93
97
|
"spec/etl/sequel/filter_to_etl_batch_spec.rb",
|
94
98
|
"spec/etl/stage_name_spec.rb",
|
95
|
-
"spec/etl/stage_spec.rb",
|
96
99
|
"spec/etl/table_builder_spec.rb",
|
97
100
|
"spec/etl/task_spec.rb",
|
98
101
|
"spec/etl/transformation_chain_spec.rb",
|
@@ -105,12 +108,11 @@ Gem::Specification.new do |s|
|
|
105
108
|
]
|
106
109
|
s.homepage = "http://github.com/notonthehighstreet/chicago-etl"
|
107
110
|
s.licenses = ["MIT"]
|
108
|
-
s.
|
109
|
-
s.rubygems_version = "1.8.25"
|
111
|
+
s.rubygems_version = "2.2.1"
|
110
112
|
s.summary = "Chicago ETL"
|
111
113
|
|
112
114
|
if s.respond_to? :specification_version then
|
113
|
-
s.specification_version =
|
115
|
+
s.specification_version = 4
|
114
116
|
|
115
117
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
116
118
|
s.add_runtime_dependency(%q<chicagowarehouse>, [">= 0.4.6", "~> 0.4"])
|
@@ -122,7 +124,6 @@ Gem::Specification.new do |s|
|
|
122
124
|
s.add_development_dependency(%q<yard>, [">= 0"])
|
123
125
|
s.add_development_dependency(%q<flog>, [">= 0"])
|
124
126
|
s.add_development_dependency(%q<simplecov>, [">= 0"])
|
125
|
-
s.add_development_dependency(%q<ZenTest>, [">= 0"])
|
126
127
|
s.add_development_dependency(%q<mysql2>, [">= 0"])
|
127
128
|
s.add_development_dependency(%q<bundler>, ["~> 1"])
|
128
129
|
s.add_development_dependency(%q<jeweler>, [">= 0"])
|
@@ -136,7 +137,6 @@ Gem::Specification.new do |s|
|
|
136
137
|
s.add_dependency(%q<yard>, [">= 0"])
|
137
138
|
s.add_dependency(%q<flog>, [">= 0"])
|
138
139
|
s.add_dependency(%q<simplecov>, [">= 0"])
|
139
|
-
s.add_dependency(%q<ZenTest>, [">= 0"])
|
140
140
|
s.add_dependency(%q<mysql2>, [">= 0"])
|
141
141
|
s.add_dependency(%q<bundler>, ["~> 1"])
|
142
142
|
s.add_dependency(%q<jeweler>, [">= 0"])
|
@@ -151,7 +151,6 @@ Gem::Specification.new do |s|
|
|
151
151
|
s.add_dependency(%q<yard>, [">= 0"])
|
152
152
|
s.add_dependency(%q<flog>, [">= 0"])
|
153
153
|
s.add_dependency(%q<simplecov>, [">= 0"])
|
154
|
-
s.add_dependency(%q<ZenTest>, [">= 0"])
|
155
154
|
s.add_dependency(%q<mysql2>, [">= 0"])
|
156
155
|
s.add_dependency(%q<bundler>, ["~> 1"])
|
157
156
|
s.add_dependency(%q<jeweler>, [">= 0"])
|
data/lib/chicago/etl.rb
CHANGED
@@ -28,6 +28,7 @@ require 'chicago/etl/transformations'
|
|
28
28
|
require 'chicago/etl/load_dataset_builder'
|
29
29
|
require 'chicago/etl/dataset_builder'
|
30
30
|
require 'chicago/etl/stage'
|
31
|
+
require 'chicago/etl/row_transformation_stage'
|
31
32
|
require 'chicago/etl/stage_builder'
|
32
33
|
require 'chicago/etl/schema_sinks_and_transformations_builder'
|
33
34
|
require 'chicago/etl/pipeline'
|
data/lib/chicago/etl/pipeline.rb
CHANGED
@@ -6,40 +6,37 @@ module Chicago
|
|
6
6
|
attr_reader :stages
|
7
7
|
|
8
8
|
# Creates a pipeline for a Schema.
|
9
|
-
def initialize(db, schema)
|
9
|
+
def initialize(db, schema, &block)
|
10
10
|
@schema, @db = schema, db
|
11
11
|
@stages = Chicago::Schema::NamedElementCollection.new
|
12
|
+
@builder_class_factory = block || lambda {|name, options| StageBuilder }
|
12
13
|
end
|
13
14
|
|
14
15
|
# Defines a generic stage in the pipeline.
|
15
16
|
def define_stage(*args, &block)
|
16
17
|
options = args.last.kind_of?(Hash) ? args.pop : {}
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
@stages << build_dimension_load_stage(name, options, &block)
|
22
|
-
elsif name =~ [:load, :facts]
|
23
|
-
@stages << build_fact_load_stage(name, options, &block)
|
19
|
+
if args.last.kind_of?(Stage)
|
20
|
+
stage = args.pop
|
21
|
+
name = StageName.new(args)
|
24
22
|
else
|
25
|
-
|
23
|
+
name = StageName.new(args)
|
24
|
+
stage = build_stage(name, options, &block)
|
26
25
|
end
|
27
|
-
end
|
28
26
|
|
29
|
-
|
30
|
-
SchemaTableStageBuilder.new(@db, schema_table).build(name, &block)
|
31
|
-
end
|
27
|
+
stage.name = StageName.new(args)
|
32
28
|
|
33
|
-
|
29
|
+
@stages << stage
|
30
|
+
end
|
34
31
|
|
35
|
-
def
|
36
|
-
|
37
|
-
build_stage(name, @schema.dimension(dimension_name), &block)
|
32
|
+
def build_stage(name, options, &block)
|
33
|
+
builder(name, options).build(name, options, &block)
|
38
34
|
end
|
39
35
|
|
40
|
-
|
41
|
-
|
42
|
-
|
36
|
+
private
|
37
|
+
|
38
|
+
def builder(name, options)
|
39
|
+
@builder_class_factory.call(name, options).new(@db, @schema)
|
43
40
|
end
|
44
41
|
end
|
45
42
|
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module Chicago
|
2
|
+
module ETL
|
3
|
+
# A Stage that passes source rows through a transformation chain.
|
4
|
+
#
|
5
|
+
# All rows are read into Ruby and then written to sinks after
|
6
|
+
# passing through 0 or more Transformations.
|
7
|
+
class RowTransformationStage < Stage
|
8
|
+
# Returns the source for this stage.
|
9
|
+
attr_reader :source
|
10
|
+
|
11
|
+
def initialize(options={})
|
12
|
+
super
|
13
|
+
@source = options[:source]
|
14
|
+
@sinks = options[:sinks]
|
15
|
+
@transformations = options[:transformations] || []
|
16
|
+
@filter_strategy = options[:filter_strategy] || lambda {|s, _| s }
|
17
|
+
|
18
|
+
validate_arguments
|
19
|
+
end
|
20
|
+
|
21
|
+
# Executes this stage in the context of an ETL::Batch
|
22
|
+
def perform_execution(etl_batch)
|
23
|
+
transform_and_load filtered_source(etl_batch)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Returns the named sink, if it exists
|
27
|
+
def sink(name)
|
28
|
+
@sinks[name.to_sym]
|
29
|
+
end
|
30
|
+
|
31
|
+
def sinks
|
32
|
+
@sinks.values
|
33
|
+
end
|
34
|
+
|
35
|
+
# @api private
|
36
|
+
def filtered_source(etl_batch)
|
37
|
+
filtered_dataset = etl_batch.reextracting? ? source :
|
38
|
+
@filter_strategy.call(source, etl_batch)
|
39
|
+
|
40
|
+
DatasetSource.new(filtered_dataset)
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def transform_and_load(source)
|
46
|
+
sinks.each(&:open)
|
47
|
+
pipe_rows_to_sinks_from(source)
|
48
|
+
sinks.each(&:close)
|
49
|
+
end
|
50
|
+
|
51
|
+
def pipe_rows_to_sinks_from(source)
|
52
|
+
source.each do |row|
|
53
|
+
transformation_chain.process(row).each {|row| process_row(row) }
|
54
|
+
end
|
55
|
+
transformation_chain.flush.each {|row| process_row(row) }
|
56
|
+
end
|
57
|
+
|
58
|
+
def transformation_chain
|
59
|
+
@transformation_chain ||= TransformationChain.new(*@transformations)
|
60
|
+
end
|
61
|
+
|
62
|
+
def process_row(row)
|
63
|
+
stream = row.delete(:_stream) || :default
|
64
|
+
@sinks[stream] << row
|
65
|
+
end
|
66
|
+
|
67
|
+
def validate_arguments
|
68
|
+
if @source.nil?
|
69
|
+
raise ArgumentError, "Stage #{@name} requires a source"
|
70
|
+
end
|
71
|
+
|
72
|
+
if @sinks.blank?
|
73
|
+
raise ArgumentError, "Stage #{@name} requires at least one sink"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -7,11 +7,11 @@ module Chicago
|
|
7
7
|
# Clients shouldn't need to instantiate this directly, but instead
|
8
8
|
# call the protected methods in the context of defining a Pipeline
|
9
9
|
class SchemaTableStageBuilder < StageBuilder
|
10
|
-
|
11
|
-
def initialize(db, schema_table)
|
12
|
-
super(db)
|
10
|
+
def build(name, options, &block)
|
13
11
|
@wrapped_builder = SchemaSinksAndTransformationsBuilder.
|
14
|
-
new(@db, schema_table)
|
12
|
+
new(@db, schema_table(name, options))
|
13
|
+
|
14
|
+
super
|
15
15
|
end
|
16
16
|
|
17
17
|
protected
|
@@ -47,5 +47,19 @@ module Chicago
|
|
47
47
|
}
|
48
48
|
end
|
49
49
|
end
|
50
|
+
|
51
|
+
class LoadDimensionStageBuilder < SchemaTableStageBuilder
|
52
|
+
def schema_table(name, options)
|
53
|
+
dimension_name = options[:dimension] || name.name
|
54
|
+
@schema.dimension(dimension_name)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
class LoadFactStageBuilder < SchemaTableStageBuilder
|
59
|
+
def schema_table(name, options)
|
60
|
+
fact_name = options[:dimension] || name.name
|
61
|
+
@schema.fact(fact_name)
|
62
|
+
end
|
63
|
+
end
|
50
64
|
end
|
51
65
|
end
|
data/lib/chicago/etl/stage.rb
CHANGED
@@ -1,30 +1,24 @@
|
|
1
1
|
module Chicago
|
2
2
|
module ETL
|
3
|
-
# A Stage in the ETL
|
3
|
+
# A Stage in the ETL Pipeline.
|
4
4
|
#
|
5
|
-
#
|
6
|
-
#
|
5
|
+
# Stage subclasses vary in how they perform their execution - some
|
6
|
+
# stages may pipe rows from a source to sinks, others may perform
|
7
|
+
# direct in-database updates.
|
8
|
+
#
|
9
|
+
# @abstract
|
7
10
|
class Stage
|
8
|
-
#
|
9
|
-
|
10
|
-
|
11
|
-
# Returns the name of this stage.
|
12
|
-
attr_reader :name
|
11
|
+
# The name of this stage.
|
12
|
+
attr_accessor :name
|
13
13
|
|
14
|
-
def initialize(
|
15
|
-
@name = name
|
16
|
-
@source = options[:source]
|
17
|
-
@sinks = options[:sinks]
|
18
|
-
@transformations = options[:transformations] || []
|
19
|
-
@filter_strategy = options[:filter_strategy] || lambda {|s, _| s }
|
20
|
-
@pre_execution_strategies = options[:pre_execution_strategies] || []
|
14
|
+
def initialize(options={})
|
21
15
|
@executable = options.has_key?(:executable) ? options[:executable] : true
|
22
|
-
|
23
|
-
validate_arguments
|
16
|
+
@pre_execution_strategies = options[:pre_execution_strategies] || []
|
24
17
|
end
|
25
|
-
|
18
|
+
|
26
19
|
# Returns the unqualified name of this stage.
|
27
20
|
def task_name
|
21
|
+
raise "This Stage has not been bound to a name" if @name.nil?
|
28
22
|
name.name
|
29
23
|
end
|
30
24
|
|
@@ -32,28 +26,25 @@ module Chicago
|
|
32
26
|
def executable?
|
33
27
|
@executable
|
34
28
|
end
|
35
|
-
|
36
|
-
# Executes this stage in the context of an ETL::Batch
|
29
|
+
|
30
|
+
# Executes this stage in the context of an ETL::Batch.
|
31
|
+
#
|
32
|
+
# This should not be overridden by subclasses; perform_execution
|
33
|
+
# should be changed instead.
|
37
34
|
def execute(etl_batch)
|
38
35
|
prepare_stage(etl_batch)
|
39
|
-
|
40
|
-
end
|
41
|
-
|
42
|
-
# Returns the named sink, if it exists
|
43
|
-
def sink(name)
|
44
|
-
@sinks[name.to_sym]
|
36
|
+
perform_execution(etl_batch)
|
45
37
|
end
|
46
38
|
|
47
|
-
|
48
|
-
@sinks.values
|
49
|
-
end
|
50
|
-
|
51
|
-
# @api private
|
52
|
-
def filtered_source(etl_batch)
|
53
|
-
filtered_dataset = etl_batch.reextracting? ? source :
|
54
|
-
@filter_strategy.call(source, etl_batch)
|
39
|
+
protected
|
55
40
|
|
56
|
-
|
41
|
+
# Does the actual work involved in executing this stage.
|
42
|
+
#
|
43
|
+
# This should be overridden by subclasses.
|
44
|
+
#
|
45
|
+
# @abstract
|
46
|
+
def perform_execution(etl_batch)
|
47
|
+
raise "perform_execution method has not been overridden."
|
57
48
|
end
|
58
49
|
|
59
50
|
private
|
@@ -63,38 +54,6 @@ module Chicago
|
|
63
54
|
strategy.call(self, etl_batch)
|
64
55
|
end
|
65
56
|
end
|
66
|
-
|
67
|
-
def transform_and_load(source)
|
68
|
-
sinks.each(&:open)
|
69
|
-
pipe_rows_to_sinks_from(source)
|
70
|
-
sinks.each(&:close)
|
71
|
-
end
|
72
|
-
|
73
|
-
def pipe_rows_to_sinks_from(source)
|
74
|
-
source.each do |row|
|
75
|
-
transformation_chain.process(row).each {|row| process_row(row) }
|
76
|
-
end
|
77
|
-
transformation_chain.flush.each {|row| process_row(row) }
|
78
|
-
end
|
79
|
-
|
80
|
-
def transformation_chain
|
81
|
-
@transformation_chain ||= TransformationChain.new(*@transformations)
|
82
|
-
end
|
83
|
-
|
84
|
-
def process_row(row)
|
85
|
-
stream = row.delete(:_stream) || :default
|
86
|
-
@sinks[stream] << row
|
87
|
-
end
|
88
|
-
|
89
|
-
def validate_arguments
|
90
|
-
if @source.nil?
|
91
|
-
raise ArgumentError, "Stage #{@name} requires a source"
|
92
|
-
end
|
93
|
-
|
94
|
-
if @sinks.blank?
|
95
|
-
raise ArgumentError, "Stage #{@name} requires at least one sink"
|
96
|
-
end
|
97
|
-
end
|
98
57
|
end
|
99
58
|
end
|
100
59
|
end
|
@@ -1,24 +1,25 @@
|
|
1
1
|
module Chicago
|
2
2
|
module ETL
|
3
3
|
class StageBuilder
|
4
|
-
def initialize(db)
|
4
|
+
def initialize(db, schema)
|
5
5
|
@db = db
|
6
|
+
@schema = schema
|
6
7
|
end
|
7
8
|
|
8
|
-
def build(name, &block)
|
9
|
+
def build(name, options, &block)
|
9
10
|
@pre_execution_strategies = []
|
10
11
|
@executable = true
|
11
12
|
|
12
13
|
instance_eval &block
|
13
14
|
set_default_stage_values
|
14
15
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
16
|
+
RowTransformationStage.
|
17
|
+
new(:source => @dataset,
|
18
|
+
:sinks => @sinks,
|
19
|
+
:transformations => @transformations,
|
20
|
+
:filter_strategy => @filter_strategy,
|
21
|
+
:pre_execution_strategies => @pre_execution_strategies,
|
22
|
+
:executable => @executable)
|
22
23
|
end
|
23
24
|
|
24
25
|
protected
|
@@ -15,8 +15,12 @@ describe "creating and running a dimension stage" do
|
|
15
15
|
schema
|
16
16
|
}
|
17
17
|
|
18
|
-
let(:pipeline) {
|
19
|
-
|
18
|
+
let(:pipeline) {
|
19
|
+
Chicago::ETL::Pipeline.new(db, schema) do |name, options|
|
20
|
+
Chicago::ETL::LoadDimensionStageBuilder
|
21
|
+
end
|
22
|
+
}
|
23
|
+
|
20
24
|
it "glues the source, transformations, and sink correctly" do
|
21
25
|
pipeline.define_stage(:load, :dimensions, :test) do
|
22
26
|
source do
|
@@ -56,13 +56,13 @@ describe "Mysql -> Mysql through transformation chain" do
|
|
56
56
|
new(TEST_DB, :destination, [:id, :foo, :bin])
|
57
57
|
sink_2 = Chicago::ETL::ArraySink.new([:id, :foo, :bin])
|
58
58
|
|
59
|
-
stage = Chicago::ETL::
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
59
|
+
stage = Chicago::ETL::RowTransformationStage.
|
60
|
+
new(:source => source,
|
61
|
+
:transformations => transformations,
|
62
|
+
:sinks => {
|
63
|
+
:default => sink_1,
|
64
|
+
:other => sink_2
|
65
|
+
})
|
66
66
|
|
67
67
|
stage.execute(double(:etl_batch, :reextracting? => true))
|
68
68
|
|
@@ -1,27 +1,24 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe Chicago::ETL::
|
3
|
+
describe Chicago::ETL::RowTransformationStage do
|
4
4
|
let(:etl_batch) { double(:etl_batch, :reextracting? => true) }
|
5
5
|
|
6
6
|
it "requires a source" do
|
7
7
|
expect {
|
8
|
-
described_class.new(:
|
9
|
-
:source => nil,
|
8
|
+
described_class.new(:source => nil,
|
10
9
|
:sinks => {:default => double(:sink)})
|
11
10
|
}.to raise_error(ArgumentError)
|
12
11
|
end
|
13
12
|
|
14
13
|
it "requires sinks" do
|
15
14
|
expect {
|
16
|
-
described_class.new(:
|
17
|
-
:source => double(:source),
|
15
|
+
described_class.new(:source => double(:source),
|
18
16
|
:sinks => nil)
|
19
17
|
}.to raise_error(ArgumentError)
|
20
18
|
end
|
21
19
|
|
22
20
|
it "does not filter the dataset if re-extracting" do
|
23
|
-
stage = described_class.new(:
|
24
|
-
:source => double(:source),
|
21
|
+
stage = described_class.new(:source => double(:source),
|
25
22
|
:sinks => {:default => double(:sink)},
|
26
23
|
:filter_strategy => lambda { fail })
|
27
24
|
|
@@ -33,8 +30,7 @@ describe Chicago::ETL::Stage do
|
|
33
30
|
sink.should_receive(:open)
|
34
31
|
sink.should_receive(:close)
|
35
32
|
|
36
|
-
stage = described_class.new(:
|
37
|
-
:source => [],
|
33
|
+
stage = described_class.new(:source => [],
|
38
34
|
:sinks => {:default => sink})
|
39
35
|
|
40
36
|
stage.execute(etl_batch)
|
metadata
CHANGED
@@ -1,225 +1,197 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: chicago-etl
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 2
|
9
|
-
- 4
|
10
|
-
version: 0.2.4
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.5
|
11
5
|
platform: ruby
|
12
|
-
authors:
|
6
|
+
authors:
|
13
7
|
- Roland Swingler
|
14
8
|
autorequire:
|
15
9
|
bindir: bin
|
16
10
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
hash: 3
|
27
|
-
segments:
|
28
|
-
- 0
|
29
|
-
- 4
|
30
|
-
- 6
|
11
|
+
date: 2014-08-07 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: chicagowarehouse
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ! '>='
|
18
|
+
- !ruby/object:Gem::Version
|
31
19
|
version: 0.4.6
|
32
20
|
- - ~>
|
33
|
-
- !ruby/object:Gem::Version
|
34
|
-
|
35
|
-
segments:
|
36
|
-
- 0
|
37
|
-
- 4
|
38
|
-
version: "0.4"
|
39
|
-
requirement: *id001
|
40
|
-
type: :runtime
|
41
|
-
prerelease: false
|
42
|
-
name: chicagowarehouse
|
43
|
-
- !ruby/object:Gem::Dependency
|
44
|
-
version_requirements: &id002 !ruby/object:Gem::Requirement
|
45
|
-
none: false
|
46
|
-
requirements:
|
47
|
-
- - ">="
|
48
|
-
- !ruby/object:Gem::Version
|
49
|
-
hash: 3
|
50
|
-
segments:
|
51
|
-
- 0
|
52
|
-
version: "0"
|
53
|
-
requirement: *id002
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '0.4'
|
54
23
|
type: :runtime
|
55
24
|
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.4.6
|
30
|
+
- - ~>
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0.4'
|
33
|
+
- !ruby/object:Gem::Dependency
|
56
34
|
name: fastercsv
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
- !ruby/object:Gem::Version
|
63
|
-
hash: 27
|
64
|
-
segments:
|
65
|
-
- 0
|
66
|
-
- 0
|
67
|
-
- 2
|
68
|
-
version: 0.0.2
|
69
|
-
requirement: *id003
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ! '>='
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
70
40
|
type: :runtime
|
71
41
|
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ! '>='
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0'
|
47
|
+
- !ruby/object:Gem::Dependency
|
72
48
|
name: sequel_load_data_infile
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
- !ruby/object:Gem::Version
|
79
|
-
hash: 3
|
80
|
-
segments:
|
81
|
-
- 0
|
82
|
-
version: "0"
|
83
|
-
requirement: *id004
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 0.0.2
|
84
54
|
type: :runtime
|
85
55
|
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ! '>='
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: 0.0.2
|
61
|
+
- !ruby/object:Gem::Dependency
|
86
62
|
name: sequel_fast_columns
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
hash: 7
|
94
|
-
segments:
|
95
|
-
- 2
|
96
|
-
version: "2"
|
97
|
-
requirement: *id005
|
98
|
-
type: :development
|
63
|
+
requirement: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ! '>='
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0'
|
68
|
+
type: :runtime
|
99
69
|
prerelease: false
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ! '>='
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
- !ruby/object:Gem::Dependency
|
100
76
|
name: rspec
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
- !ruby/object:Gem::Version
|
107
|
-
hash: 3
|
108
|
-
segments:
|
109
|
-
- 0
|
110
|
-
version: "0"
|
111
|
-
requirement: *id006
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - ~>
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '2'
|
112
82
|
type: :development
|
113
83
|
prerelease: false
|
84
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ~>
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '2'
|
89
|
+
- !ruby/object:Gem::Dependency
|
114
90
|
name: timecop
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
- !ruby/object:Gem::Version
|
121
|
-
hash: 3
|
122
|
-
segments:
|
123
|
-
- 0
|
124
|
-
version: "0"
|
125
|
-
requirement: *id007
|
91
|
+
requirement: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ! '>='
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
126
96
|
type: :development
|
127
97
|
prerelease: false
|
98
|
+
version_requirements: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ! '>='
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '0'
|
103
|
+
- !ruby/object:Gem::Dependency
|
128
104
|
name: yard
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
- !ruby/object:Gem::Version
|
135
|
-
hash: 3
|
136
|
-
segments:
|
137
|
-
- 0
|
138
|
-
version: "0"
|
139
|
-
requirement: *id008
|
105
|
+
requirement: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
140
110
|
type: :development
|
141
111
|
prerelease: false
|
112
|
+
version_requirements: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - ! '>='
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '0'
|
117
|
+
- !ruby/object:Gem::Dependency
|
142
118
|
name: flog
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
- !ruby/object:Gem::Version
|
149
|
-
hash: 3
|
150
|
-
segments:
|
151
|
-
- 0
|
152
|
-
version: "0"
|
153
|
-
requirement: *id009
|
119
|
+
requirement: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - ! '>='
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '0'
|
154
124
|
type: :development
|
155
125
|
prerelease: false
|
126
|
+
version_requirements: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - ! '>='
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '0'
|
131
|
+
- !ruby/object:Gem::Dependency
|
156
132
|
name: simplecov
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
- !ruby/object:Gem::Version
|
163
|
-
hash: 3
|
164
|
-
segments:
|
165
|
-
- 0
|
166
|
-
version: "0"
|
167
|
-
requirement: *id010
|
168
|
-
type: :development
|
169
|
-
prerelease: false
|
170
|
-
name: ZenTest
|
171
|
-
- !ruby/object:Gem::Dependency
|
172
|
-
version_requirements: &id011 !ruby/object:Gem::Requirement
|
173
|
-
none: false
|
174
|
-
requirements:
|
175
|
-
- - ">="
|
176
|
-
- !ruby/object:Gem::Version
|
177
|
-
hash: 3
|
178
|
-
segments:
|
179
|
-
- 0
|
180
|
-
version: "0"
|
181
|
-
requirement: *id011
|
133
|
+
requirement: !ruby/object:Gem::Requirement
|
134
|
+
requirements:
|
135
|
+
- - ! '>='
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
version: '0'
|
182
138
|
type: :development
|
183
139
|
prerelease: false
|
140
|
+
version_requirements: !ruby/object:Gem::Requirement
|
141
|
+
requirements:
|
142
|
+
- - ! '>='
|
143
|
+
- !ruby/object:Gem::Version
|
144
|
+
version: '0'
|
145
|
+
- !ruby/object:Gem::Dependency
|
184
146
|
name: mysql2
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
- !ruby/object:Gem::Version
|
191
|
-
hash: 1
|
192
|
-
segments:
|
193
|
-
- 1
|
194
|
-
version: "1"
|
195
|
-
requirement: *id012
|
147
|
+
requirement: !ruby/object:Gem::Requirement
|
148
|
+
requirements:
|
149
|
+
- - ! '>='
|
150
|
+
- !ruby/object:Gem::Version
|
151
|
+
version: '0'
|
196
152
|
type: :development
|
197
153
|
prerelease: false
|
154
|
+
version_requirements: !ruby/object:Gem::Requirement
|
155
|
+
requirements:
|
156
|
+
- - ! '>='
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '0'
|
159
|
+
- !ruby/object:Gem::Dependency
|
198
160
|
name: bundler
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
- !ruby/object:Gem::Version
|
205
|
-
hash: 3
|
206
|
-
segments:
|
207
|
-
- 0
|
208
|
-
version: "0"
|
209
|
-
requirement: *id013
|
161
|
+
requirement: !ruby/object:Gem::Requirement
|
162
|
+
requirements:
|
163
|
+
- - ~>
|
164
|
+
- !ruby/object:Gem::Version
|
165
|
+
version: '1'
|
210
166
|
type: :development
|
211
167
|
prerelease: false
|
168
|
+
version_requirements: !ruby/object:Gem::Requirement
|
169
|
+
requirements:
|
170
|
+
- - ~>
|
171
|
+
- !ruby/object:Gem::Version
|
172
|
+
version: '1'
|
173
|
+
- !ruby/object:Gem::Dependency
|
212
174
|
name: jeweler
|
175
|
+
requirement: !ruby/object:Gem::Requirement
|
176
|
+
requirements:
|
177
|
+
- - ! '>='
|
178
|
+
- !ruby/object:Gem::Version
|
179
|
+
version: '0'
|
180
|
+
type: :development
|
181
|
+
prerelease: false
|
182
|
+
version_requirements: !ruby/object:Gem::Requirement
|
183
|
+
requirements:
|
184
|
+
- - ! '>='
|
185
|
+
- !ruby/object:Gem::Version
|
186
|
+
version: '0'
|
213
187
|
description: ETL tools for Chicago
|
214
188
|
email: roland.swingler@gmail.com
|
215
189
|
executables: []
|
216
|
-
|
217
190
|
extensions: []
|
218
|
-
|
219
|
-
extra_rdoc_files:
|
191
|
+
extra_rdoc_files:
|
220
192
|
- LICENSE.txt
|
221
193
|
- README.rdoc
|
222
|
-
files:
|
194
|
+
files:
|
223
195
|
- .document
|
224
196
|
- .rspec
|
225
197
|
- Gemfile
|
@@ -248,6 +220,7 @@ files:
|
|
248
220
|
- lib/chicago/etl/null_sink.rb
|
249
221
|
- lib/chicago/etl/pipeline.rb
|
250
222
|
- lib/chicago/etl/pipeline_endpoint.rb
|
223
|
+
- lib/chicago/etl/row_transformation_stage.rb
|
251
224
|
- lib/chicago/etl/schema_sinks_and_transformations_builder.rb
|
252
225
|
- lib/chicago/etl/schema_table_sink_factory.rb
|
253
226
|
- lib/chicago/etl/schema_table_stage_builder.rb
|
@@ -288,6 +261,7 @@ files:
|
|
288
261
|
- spec/etl/mysql_file_sink_spec.rb
|
289
262
|
- spec/etl/mysql_integration_spec.rb
|
290
263
|
- spec/etl/pipeline_stage_builder_spec.rb
|
264
|
+
- spec/etl/row_transformation_stage_spec.rb
|
291
265
|
- spec/etl/schema_table_sink_factory_spec.rb
|
292
266
|
- spec/etl/screens/invalid_element_spec.rb
|
293
267
|
- spec/etl/screens/missing_value_spec.rb
|
@@ -295,7 +269,6 @@ files:
|
|
295
269
|
- spec/etl/sequel/dependant_tables_spec.rb
|
296
270
|
- spec/etl/sequel/filter_to_etl_batch_spec.rb
|
297
271
|
- spec/etl/stage_name_spec.rb
|
298
|
-
- spec/etl/stage_spec.rb
|
299
272
|
- spec/etl/table_builder_spec.rb
|
300
273
|
- spec/etl/task_spec.rb
|
301
274
|
- spec/etl/transformation_chain_spec.rb
|
@@ -306,37 +279,28 @@ files:
|
|
306
279
|
- spec/etl/transformations_spec.rb
|
307
280
|
- spec/spec_helper.rb
|
308
281
|
homepage: http://github.com/notonthehighstreet/chicago-etl
|
309
|
-
licenses:
|
282
|
+
licenses:
|
310
283
|
- MIT
|
284
|
+
metadata: {}
|
311
285
|
post_install_message:
|
312
286
|
rdoc_options: []
|
313
|
-
|
314
|
-
require_paths:
|
287
|
+
require_paths:
|
315
288
|
- lib
|
316
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
none: false
|
327
|
-
requirements:
|
328
|
-
- - ">="
|
329
|
-
- !ruby/object:Gem::Version
|
330
|
-
hash: 3
|
331
|
-
segments:
|
332
|
-
- 0
|
333
|
-
version: "0"
|
289
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
290
|
+
requirements:
|
291
|
+
- - ! '>='
|
292
|
+
- !ruby/object:Gem::Version
|
293
|
+
version: '0'
|
294
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
295
|
+
requirements:
|
296
|
+
- - ! '>='
|
297
|
+
- !ruby/object:Gem::Version
|
298
|
+
version: '0'
|
334
299
|
requirements: []
|
335
|
-
|
336
300
|
rubyforge_project:
|
337
|
-
rubygems_version:
|
301
|
+
rubygems_version: 2.2.1
|
338
302
|
signing_key:
|
339
|
-
specification_version:
|
303
|
+
specification_version: 4
|
340
304
|
summary: Chicago ETL
|
341
305
|
test_files: []
|
342
|
-
|
306
|
+
has_rdoc:
|