chicago-etl 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/chicago-etl.gemspec +7 -2
- data/lib/chicago/etl/batch.rb +0 -21
- data/lib/chicago/etl/pipeline.rb +17 -1
- data/lib/chicago/etl/stage.rb +76 -0
- data/lib/chicago/etl/stage_builder.rb +69 -0
- data/lib/chicago/etl/transformations/deduplicate_rows.rb +40 -0
- data/lib/chicago/etl.rb +4 -0
- data/spec/etl/define_stage_spec.rb +114 -0
- data/spec/etl/transformations/deduplicate_rows_spec.rb +23 -0
- metadata +198 -184
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.1
|
data/chicago-etl.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "chicago-etl"
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Roland Swingler"]
|
12
|
-
s.date = "2013-
|
12
|
+
s.date = "2013-11-07"
|
13
13
|
s.description = "ETL tools for Chicago"
|
14
14
|
s.email = "roland.swingler@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -43,10 +43,13 @@ Gem::Specification.new do |s|
|
|
43
43
|
"lib/chicago/etl/screens/out_of_bounds.rb",
|
44
44
|
"lib/chicago/etl/sequel/dependant_tables.rb",
|
45
45
|
"lib/chicago/etl/sequel/filter_to_etl_batch.rb",
|
46
|
+
"lib/chicago/etl/stage.rb",
|
47
|
+
"lib/chicago/etl/stage_builder.rb",
|
46
48
|
"lib/chicago/etl/table_builder.rb",
|
47
49
|
"lib/chicago/etl/task_invocation.rb",
|
48
50
|
"lib/chicago/etl/tasks.rb",
|
49
51
|
"lib/chicago/etl/transformations.rb",
|
52
|
+
"lib/chicago/etl/transformations/deduplicate_rows.rb",
|
50
53
|
"lib/chicago/etl/transformations/uk_post_code.rb",
|
51
54
|
"lib/chicago/etl/transformations/uk_post_code_field.rb",
|
52
55
|
"lib/chicago/flow/array_sink.rb",
|
@@ -68,6 +71,7 @@ Gem::Specification.new do |s|
|
|
68
71
|
"spec/etl/core_extensions_spec.rb",
|
69
72
|
"spec/etl/counter_spec.rb",
|
70
73
|
"spec/etl/dataset_batch_stage_spec.rb",
|
74
|
+
"spec/etl/define_stage_spec.rb",
|
71
75
|
"spec/etl/etl_batch_id_dataset_filter.rb",
|
72
76
|
"spec/etl/key_builder_spec.rb",
|
73
77
|
"spec/etl/load_dataset_builder_spec.rb",
|
@@ -80,6 +84,7 @@ Gem::Specification.new do |s|
|
|
80
84
|
"spec/etl/sequel/filter_to_etl_batch_spec.rb",
|
81
85
|
"spec/etl/table_builder_spec.rb",
|
82
86
|
"spec/etl/task_spec.rb",
|
87
|
+
"spec/etl/transformations/deduplicate_rows_spec.rb",
|
83
88
|
"spec/etl/transformations/uk_post_code_field_spec.rb",
|
84
89
|
"spec/etl/transformations/uk_post_code_spec.rb",
|
85
90
|
"spec/etl/transformations_spec.rb",
|
data/lib/chicago/etl/batch.rb
CHANGED
@@ -31,27 +31,6 @@ module Chicago
|
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
-
# Deprecated.
|
35
|
-
#
|
36
|
-
# @deprecated Use perform_task instead
|
37
|
-
def load(task_name, &block)
|
38
|
-
perform_task(:load, task_name, &block)
|
39
|
-
end
|
40
|
-
|
41
|
-
# Deprecated.
|
42
|
-
#
|
43
|
-
# @deprecated Use perform_task instead
|
44
|
-
def transform(task_name, &block)
|
45
|
-
perform_task(:extract, task_name, &block)
|
46
|
-
end
|
47
|
-
|
48
|
-
# Deprecated.
|
49
|
-
#
|
50
|
-
# @deprecated Use perform_task instead
|
51
|
-
def extract(task_name, &block)
|
52
|
-
perform_task(:extract, task_name, &block)
|
53
|
-
end
|
54
|
-
|
55
34
|
# Performs a named task if it hasn't already run successfully in
|
56
35
|
# this batch.
|
57
36
|
def perform_task(stage, task_name, &block)
|
data/lib/chicago/etl/pipeline.rb
CHANGED
@@ -8,11 +8,24 @@ module Chicago
|
|
8
8
|
# Returns all defined fact load tasks
|
9
9
|
attr_reader :load_facts
|
10
10
|
|
11
|
+
# Returns all the defined generic stages.
|
12
|
+
attr_reader :stages
|
13
|
+
|
11
14
|
# Creates a pipeline for a Schema.
|
12
15
|
def initialize(db, schema)
|
13
16
|
@schema, @db = schema, db
|
14
17
|
@load_dimensions = Chicago::Schema::NamedElementCollection.new
|
15
18
|
@load_facts = Chicago::Schema::NamedElementCollection.new
|
19
|
+
@stages = Chicago::Schema::NamedElementCollection.new
|
20
|
+
end
|
21
|
+
|
22
|
+
# Defines a generic stage in the pipeline.
|
23
|
+
def define_stage(name, &block)
|
24
|
+
@stages << build_schemaless_stage(name, &block)
|
25
|
+
end
|
26
|
+
|
27
|
+
def build_schemaless_stage(name, &block)
|
28
|
+
StageBuilder.new(@db).build(name, &block)
|
16
29
|
end
|
17
30
|
|
18
31
|
# Defines a dimension load stage
|
@@ -73,6 +86,7 @@ module Chicago
|
|
73
86
|
|
74
87
|
# Define elements of the pipeline. See LoadPipelineStageBuilder
|
75
88
|
# for details.
|
89
|
+
# TODO: rename pipeline => transforms below this method
|
76
90
|
def pipeline(&block)
|
77
91
|
@pipeline_stage = LoadPipelineStageBuilder.new(@db, @schema_table).
|
78
92
|
build(&block)
|
@@ -81,9 +95,11 @@ module Chicago
|
|
81
95
|
# Defines the dataset, see DatasetBuilder .
|
82
96
|
#
|
83
97
|
# The block must return a Sequel::Dataset.
|
84
|
-
|
98
|
+
# TODO: rename dataset => source below this method, make generic
|
99
|
+
def source(&block)
|
85
100
|
@dataset = DatasetBuilder.new(@db).build(&block)
|
86
101
|
end
|
102
|
+
alias :dataset :source
|
87
103
|
|
88
104
|
# Define a custom filter strategy for filtering to an ETL batch.
|
89
105
|
def filter_strategy(&block)
|
@@ -0,0 +1,76 @@
|
|
1
|
+
module Chicago
|
2
|
+
module ETL
|
3
|
+
class Stage
|
4
|
+
attr_reader :name
|
5
|
+
|
6
|
+
def initialize(name, options={})
|
7
|
+
@name = name
|
8
|
+
@source = options.fetch(:source)
|
9
|
+
raise ArgumentError, "Stage #{name} requires a source" unless @source
|
10
|
+
|
11
|
+
@sinks = options.fetch(:sinks)
|
12
|
+
raise ArgumentError, "Stage #{name} requires at least one sink" if @sinks.empty?
|
13
|
+
|
14
|
+
@transformations = options.fetch(:transformations)
|
15
|
+
@transformation_chain = Chicago::Flow::TransformationChain.
|
16
|
+
new(*@transformations)
|
17
|
+
|
18
|
+
@filter_strategy = options[:filter_strategy] ||
|
19
|
+
lambda {|source, _| source }
|
20
|
+
end
|
21
|
+
|
22
|
+
def execute(etl_batch, reextract)
|
23
|
+
modified_source = reextract_and_filter_source(@source, etl_batch, reextract)
|
24
|
+
transform_and_load_from(modified_source)
|
25
|
+
end
|
26
|
+
|
27
|
+
def transform_and_load_from(source)
|
28
|
+
end
|
29
|
+
|
30
|
+
def reextract_and_filter_source(source, etl_batch, reextract=false)
|
31
|
+
if reextract
|
32
|
+
filtered_dataset = source
|
33
|
+
else
|
34
|
+
filtered_dataset = @filter_strategy.call(source, etl_batch)
|
35
|
+
end
|
36
|
+
Chicago::Flow::DatasetSource.new(filtered_dataset)
|
37
|
+
end
|
38
|
+
|
39
|
+
attr_reader :transformation_chain
|
40
|
+
|
41
|
+
# Returns the named sink, if it exists
|
42
|
+
def sink(name)
|
43
|
+
@sinks[name.to_sym]
|
44
|
+
end
|
45
|
+
|
46
|
+
def sinks
|
47
|
+
@sinks.values
|
48
|
+
end
|
49
|
+
|
50
|
+
def register_sink(name, sink)
|
51
|
+
@sinks[name.to_sym] = sink
|
52
|
+
self
|
53
|
+
end
|
54
|
+
|
55
|
+
def transform_and_load_from(source)
|
56
|
+
sinks.each(&:open)
|
57
|
+
pipe_rows_to_sinks_from(source)
|
58
|
+
sinks.each(&:close)
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
def pipe_rows_to_sinks_from(source)
|
64
|
+
source.each do |row|
|
65
|
+
transformation_chain.process(row).each {|row| process_row(row) }
|
66
|
+
end
|
67
|
+
transformation_chain.flush.each {|row| process_row(row) }
|
68
|
+
end
|
69
|
+
|
70
|
+
def process_row(row)
|
71
|
+
stream = row.delete(:_stream) || :default
|
72
|
+
@sinks[stream] << row
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module Chicago
|
2
|
+
module ETL
|
3
|
+
class StageBuilder
|
4
|
+
attr_reader :sink_factory
|
5
|
+
|
6
|
+
def initialize(db)
|
7
|
+
@db = db
|
8
|
+
end
|
9
|
+
|
10
|
+
def build(name, &block)
|
11
|
+
@sinks = {}
|
12
|
+
@transformations = []
|
13
|
+
|
14
|
+
instance_eval &block
|
15
|
+
|
16
|
+
Stage.new(name,
|
17
|
+
:source => @dataset,
|
18
|
+
:sinks => @sinks,
|
19
|
+
:transformations => @transformations,
|
20
|
+
:filter_strategy => @filter_strategy)
|
21
|
+
end
|
22
|
+
|
23
|
+
def source(&block)
|
24
|
+
@dataset = DatasetBuilder.new(@db).build(&block)
|
25
|
+
end
|
26
|
+
|
27
|
+
def transformations(klass=TransformationBuilder, &block)
|
28
|
+
@transformations = klass.new.build(&block)
|
29
|
+
end
|
30
|
+
|
31
|
+
def sinks(options={}, &block)
|
32
|
+
@sinks = SinkBuilder.new.build(&block)
|
33
|
+
end
|
34
|
+
|
35
|
+
# TODO: think of potentially better ways of dealig with this
|
36
|
+
# problem.
|
37
|
+
def filter_strategy(&block)
|
38
|
+
@filter_strategy = block
|
39
|
+
end
|
40
|
+
|
41
|
+
class TransformationBuilder
|
42
|
+
def build(&block)
|
43
|
+
@transformations = []
|
44
|
+
instance_eval(&block)
|
45
|
+
@transformations
|
46
|
+
end
|
47
|
+
|
48
|
+
def add(transformation)
|
49
|
+
@transformations << transformation
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
class SinkBuilder
|
54
|
+
def build(&block)
|
55
|
+
@sinks = {}
|
56
|
+
instance_eval(&block)
|
57
|
+
@sinks
|
58
|
+
end
|
59
|
+
|
60
|
+
protected
|
61
|
+
|
62
|
+
def add(sink, options={})
|
63
|
+
stream = options[:stream] || :default
|
64
|
+
@sinks[stream] = sink
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Chicago
|
2
|
+
module ETL
|
3
|
+
class DeduplicateRows < Chicago::Flow::Transformation
|
4
|
+
def process_row(row)
|
5
|
+
if @working_row.nil?
|
6
|
+
@working_row = row
|
7
|
+
return
|
8
|
+
elsif same_row?(row)
|
9
|
+
@working_row = merge_rows(row)
|
10
|
+
return
|
11
|
+
else
|
12
|
+
assign_new_row_and_return_old_row(row)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def flush
|
17
|
+
@working_row.nil? ? [] : [@working_row]
|
18
|
+
end
|
19
|
+
|
20
|
+
protected
|
21
|
+
|
22
|
+
attr_reader :working_row
|
23
|
+
|
24
|
+
# This should be implemented by clients
|
25
|
+
def merge_rows(row)
|
26
|
+
end
|
27
|
+
|
28
|
+
# This should be implemented by clients
|
29
|
+
def same_row?(row)
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def assign_new_row_and_return_old_row(row)
|
35
|
+
row, @working_row = @working_row, row
|
36
|
+
row
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/lib/chicago/etl.rb
CHANGED
@@ -29,6 +29,9 @@ require 'chicago/etl/dataset_batch_stage'
|
|
29
29
|
require 'chicago/etl/load_pipeline_stage_builder'
|
30
30
|
require 'chicago/etl/pipeline'
|
31
31
|
|
32
|
+
require 'chicago/etl/stage'
|
33
|
+
require 'chicago/etl/stage_builder'
|
34
|
+
|
32
35
|
# Sequel Extensions
|
33
36
|
require 'chicago/etl/sequel/filter_to_etl_batch'
|
34
37
|
require 'chicago/etl/sequel/dependant_tables'
|
@@ -40,6 +43,7 @@ require 'chicago/etl/screens/invalid_element'
|
|
40
43
|
require 'chicago/etl/screens/out_of_bounds'
|
41
44
|
|
42
45
|
# Transformations
|
46
|
+
require 'chicago/etl/transformations/deduplicate_rows'
|
43
47
|
require 'chicago/etl/transformations/uk_post_code'
|
44
48
|
require 'chicago/etl/transformations/uk_post_code_field'
|
45
49
|
|
@@ -0,0 +1,114 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
class TestTransformation < Chicago::Flow::Transformation
|
4
|
+
def output_streams
|
5
|
+
[:another_stream]
|
6
|
+
end
|
7
|
+
|
8
|
+
def process_row(row)
|
9
|
+
[row, assign_stream({:some_field => "has an error value"}, :another_stream)]
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
describe "defining and executing a stage" do
|
14
|
+
let(:rows) { [{:some_field => "value"}] }
|
15
|
+
let(:db) { double(:test_dataset_method => rows) }
|
16
|
+
let(:schema) { double }
|
17
|
+
let(:pipeline) { Chicago::ETL::Pipeline.new(db, schema)}
|
18
|
+
|
19
|
+
it "allows no tranformations" do
|
20
|
+
pipeline.define_stage(:test_stage) do
|
21
|
+
source do
|
22
|
+
db.test_dataset_method
|
23
|
+
end
|
24
|
+
|
25
|
+
sinks do
|
26
|
+
add Chicago::Flow::ArraySink.new(:test)
|
27
|
+
add Chicago::Flow::ArraySink.new(:test), :stream => :another_stream
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
pipeline.stages.each do |stage|
|
32
|
+
stage.execute(double, true)
|
33
|
+
end
|
34
|
+
|
35
|
+
stage = pipeline.stages.first
|
36
|
+
stage.sink(:default).data.length.should == 1
|
37
|
+
stage.sink(:default).data.first.should == {:some_field => "value"}
|
38
|
+
|
39
|
+
stage.sink(:another_stream).data.length.should == 0
|
40
|
+
end
|
41
|
+
|
42
|
+
it "requires sinks" do
|
43
|
+
expect {
|
44
|
+
pipeline.define_stage(:test_stage) do
|
45
|
+
source do
|
46
|
+
db.test_dataset_method
|
47
|
+
end
|
48
|
+
end
|
49
|
+
}.to raise_error(ArgumentError)
|
50
|
+
end
|
51
|
+
|
52
|
+
it "requires sources" do
|
53
|
+
expect {
|
54
|
+
pipeline.define_stage(:test_stage) do
|
55
|
+
sinks do
|
56
|
+
add Chicago::Flow::ArraySink.new(:test)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
}.to raise_error(ArgumentError)
|
60
|
+
end
|
61
|
+
|
62
|
+
it "glues the source, transformations, and sink correctly" do
|
63
|
+
pipeline.define_stage(:test_stage) do
|
64
|
+
source do
|
65
|
+
db.test_dataset_method
|
66
|
+
end
|
67
|
+
|
68
|
+
transformations do
|
69
|
+
add TestTransformation.new
|
70
|
+
end
|
71
|
+
|
72
|
+
sinks do
|
73
|
+
add Chicago::Flow::ArraySink.new(:test)
|
74
|
+
add Chicago::Flow::ArraySink.new(:test), :stream => :another_stream
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
pipeline.stages.each do |stage|
|
79
|
+
stage.execute(double, true)
|
80
|
+
end
|
81
|
+
|
82
|
+
stage = pipeline.stages.first
|
83
|
+
stage.sink(:default).data.length.should == 1
|
84
|
+
stage.sink(:default).data.first.should == {:some_field => "value"}
|
85
|
+
|
86
|
+
stage.sink(:another_stream).data.length.should == 1
|
87
|
+
stage.sink(:another_stream).data.first.should == {:some_field => "has an error value"}
|
88
|
+
end
|
89
|
+
|
90
|
+
it "allows the source to be filtered via a filter strategy" do
|
91
|
+
etl_batch_double = double
|
92
|
+
fake_source = []
|
93
|
+
|
94
|
+
fake_source.should_receive(:another_dataset_method).and_return([])
|
95
|
+
pipeline.define_stage(:test_stage) do
|
96
|
+
source do
|
97
|
+
fake_source
|
98
|
+
end
|
99
|
+
|
100
|
+
sinks do
|
101
|
+
add Chicago::Flow::ArraySink.new(:test)
|
102
|
+
end
|
103
|
+
|
104
|
+
filter_strategy do |source, etl_batch|
|
105
|
+
etl_batch.should == etl_batch_double
|
106
|
+
source.another_dataset_method
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
pipeline.stages.each do |stage|
|
111
|
+
stage.execute(etl_batch_double, false)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Chicago::ETL::DeduplicateRows do
|
4
|
+
it "deduplicates rows" do
|
5
|
+
class TestTransform < described_class
|
6
|
+
def merge_rows(row)
|
7
|
+
working_row.merge(row)
|
8
|
+
end
|
9
|
+
|
10
|
+
def same_row?(row)
|
11
|
+
working_row[:id] == row[:id]
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
transform = TestTransform.new
|
16
|
+
|
17
|
+
transform.process({:id => 1, :foo => :bar}).should be_blank
|
18
|
+
transform.process({:id => 1, :bar => :baz}).should be_blank
|
19
|
+
transform.process({:id => 2, :foo => :quux}).should == {:id => 1, :foo => :bar, :bar => :baz}
|
20
|
+
|
21
|
+
transform.flush.should == [{:id => 2, :foo => :quux}]
|
22
|
+
end
|
23
|
+
end
|
metadata
CHANGED
@@ -1,233 +1,248 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: chicago-etl
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 1
|
9
|
-
- 0
|
10
|
-
version: 0.1.0
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Roland Swingler
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
12
|
+
date: 2013-11-07 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: chicagowarehouse
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
22
17
|
none: false
|
23
|
-
requirements:
|
18
|
+
requirements:
|
24
19
|
- - ~>
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
|
27
|
-
segments:
|
28
|
-
- 0
|
29
|
-
- 4
|
30
|
-
version: "0.4"
|
31
|
-
requirement: *id001
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0.4'
|
32
22
|
type: :runtime
|
33
23
|
prerelease: false
|
34
|
-
|
35
|
-
- !ruby/object:Gem::Dependency
|
36
|
-
version_requirements: &id002 !ruby/object:Gem::Requirement
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
25
|
none: false
|
38
|
-
requirements:
|
39
|
-
- -
|
40
|
-
- !ruby/object:Gem::Version
|
41
|
-
|
42
|
-
|
43
|
-
- 0
|
44
|
-
version: "0"
|
45
|
-
requirement: *id002
|
46
|
-
type: :runtime
|
47
|
-
prerelease: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0.4'
|
30
|
+
- !ruby/object:Gem::Dependency
|
48
31
|
name: fastercsv
|
49
|
-
|
50
|
-
version_requirements: &id003 !ruby/object:Gem::Requirement
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
51
33
|
none: false
|
52
|
-
requirements:
|
53
|
-
- -
|
54
|
-
- !ruby/object:Gem::Version
|
55
|
-
|
56
|
-
segments:
|
57
|
-
- 0
|
58
|
-
version: "0"
|
59
|
-
requirement: *id003
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
60
38
|
type: :runtime
|
61
39
|
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
62
47
|
name: sequel
|
63
|
-
|
64
|
-
version_requirements: &id004 !ruby/object:Gem::Requirement
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
65
49
|
none: false
|
66
|
-
requirements:
|
67
|
-
- -
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
|
70
|
-
segments:
|
71
|
-
- 0
|
72
|
-
- 0
|
73
|
-
- 2
|
74
|
-
version: 0.0.2
|
75
|
-
requirement: *id004
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
76
54
|
type: :runtime
|
77
55
|
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
78
63
|
name: sequel_load_data_infile
|
79
|
-
|
80
|
-
version_requirements: &id005 !ruby/object:Gem::Requirement
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
81
65
|
none: false
|
82
|
-
requirements:
|
83
|
-
- -
|
84
|
-
- !ruby/object:Gem::Version
|
85
|
-
|
86
|
-
segments:
|
87
|
-
- 0
|
88
|
-
version: "0"
|
89
|
-
requirement: *id005
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 0.0.2
|
90
70
|
type: :runtime
|
91
71
|
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 0.0.2
|
78
|
+
- !ruby/object:Gem::Dependency
|
92
79
|
name: sequel_fast_columns
|
93
|
-
|
94
|
-
version_requirements: &id006 !ruby/object:Gem::Requirement
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
95
81
|
none: false
|
96
|
-
requirements:
|
97
|
-
- -
|
98
|
-
- !ruby/object:Gem::Version
|
99
|
-
|
100
|
-
|
101
|
-
- 2
|
102
|
-
version: "2"
|
103
|
-
requirement: *id006
|
104
|
-
type: :development
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
type: :runtime
|
105
87
|
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
- !ruby/object:Gem::Dependency
|
106
95
|
name: rspec
|
107
|
-
|
108
|
-
version_requirements: &id007 !ruby/object:Gem::Requirement
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
109
97
|
none: false
|
110
|
-
requirements:
|
111
|
-
- -
|
112
|
-
- !ruby/object:Gem::Version
|
113
|
-
|
114
|
-
segments:
|
115
|
-
- 0
|
116
|
-
version: "0"
|
117
|
-
requirement: *id007
|
98
|
+
requirements:
|
99
|
+
- - ~>
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '2'
|
118
102
|
type: :development
|
119
103
|
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ~>
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '2'
|
110
|
+
- !ruby/object:Gem::Dependency
|
120
111
|
name: timecop
|
121
|
-
|
122
|
-
version_requirements: &id008 !ruby/object:Gem::Requirement
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
123
113
|
none: false
|
124
|
-
requirements:
|
125
|
-
- -
|
126
|
-
- !ruby/object:Gem::Version
|
127
|
-
|
128
|
-
segments:
|
129
|
-
- 0
|
130
|
-
version: "0"
|
131
|
-
requirement: *id008
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
132
118
|
type: :development
|
133
119
|
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ! '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
126
|
+
- !ruby/object:Gem::Dependency
|
134
127
|
name: yard
|
135
|
-
|
136
|
-
version_requirements: &id009 !ruby/object:Gem::Requirement
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
137
129
|
none: false
|
138
|
-
requirements:
|
139
|
-
- -
|
140
|
-
- !ruby/object:Gem::Version
|
141
|
-
|
142
|
-
segments:
|
143
|
-
- 0
|
144
|
-
version: "0"
|
145
|
-
requirement: *id009
|
130
|
+
requirements:
|
131
|
+
- - ! '>='
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '0'
|
146
134
|
type: :development
|
147
135
|
prerelease: false
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
138
|
+
requirements:
|
139
|
+
- - ! '>='
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: '0'
|
142
|
+
- !ruby/object:Gem::Dependency
|
148
143
|
name: flog
|
149
|
-
|
150
|
-
version_requirements: &id010 !ruby/object:Gem::Requirement
|
144
|
+
requirement: !ruby/object:Gem::Requirement
|
151
145
|
none: false
|
152
|
-
requirements:
|
153
|
-
- -
|
154
|
-
- !ruby/object:Gem::Version
|
155
|
-
|
156
|
-
segments:
|
157
|
-
- 0
|
158
|
-
version: "0"
|
159
|
-
requirement: *id010
|
146
|
+
requirements:
|
147
|
+
- - ! '>='
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
version: '0'
|
160
150
|
type: :development
|
161
151
|
prerelease: false
|
152
|
+
version_requirements: !ruby/object:Gem::Requirement
|
153
|
+
none: false
|
154
|
+
requirements:
|
155
|
+
- - ! '>='
|
156
|
+
- !ruby/object:Gem::Version
|
157
|
+
version: '0'
|
158
|
+
- !ruby/object:Gem::Dependency
|
162
159
|
name: simplecov
|
163
|
-
|
164
|
-
version_requirements: &id011 !ruby/object:Gem::Requirement
|
160
|
+
requirement: !ruby/object:Gem::Requirement
|
165
161
|
none: false
|
166
|
-
requirements:
|
167
|
-
- -
|
168
|
-
- !ruby/object:Gem::Version
|
169
|
-
|
170
|
-
segments:
|
171
|
-
- 0
|
172
|
-
version: "0"
|
173
|
-
requirement: *id011
|
162
|
+
requirements:
|
163
|
+
- - ! '>='
|
164
|
+
- !ruby/object:Gem::Version
|
165
|
+
version: '0'
|
174
166
|
type: :development
|
175
167
|
prerelease: false
|
168
|
+
version_requirements: !ruby/object:Gem::Requirement
|
169
|
+
none: false
|
170
|
+
requirements:
|
171
|
+
- - ! '>='
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0'
|
174
|
+
- !ruby/object:Gem::Dependency
|
176
175
|
name: ZenTest
|
177
|
-
|
178
|
-
version_requirements: &id012 !ruby/object:Gem::Requirement
|
176
|
+
requirement: !ruby/object:Gem::Requirement
|
179
177
|
none: false
|
180
|
-
requirements:
|
181
|
-
- -
|
182
|
-
- !ruby/object:Gem::Version
|
183
|
-
|
184
|
-
segments:
|
185
|
-
- 2
|
186
|
-
- 8
|
187
|
-
- 1
|
188
|
-
version: 2.8.1
|
189
|
-
requirement: *id012
|
178
|
+
requirements:
|
179
|
+
- - ! '>='
|
180
|
+
- !ruby/object:Gem::Version
|
181
|
+
version: '0'
|
190
182
|
type: :development
|
191
183
|
prerelease: false
|
184
|
+
version_requirements: !ruby/object:Gem::Requirement
|
185
|
+
none: false
|
186
|
+
requirements:
|
187
|
+
- - ! '>='
|
188
|
+
- !ruby/object:Gem::Version
|
189
|
+
version: '0'
|
190
|
+
- !ruby/object:Gem::Dependency
|
192
191
|
name: mysql
|
193
|
-
|
194
|
-
version_requirements: &id013 !ruby/object:Gem::Requirement
|
192
|
+
requirement: !ruby/object:Gem::Requirement
|
195
193
|
none: false
|
196
|
-
requirements:
|
197
|
-
- -
|
198
|
-
- !ruby/object:Gem::Version
|
199
|
-
|
200
|
-
segments:
|
201
|
-
- 1
|
202
|
-
version: "1"
|
203
|
-
requirement: *id013
|
194
|
+
requirements:
|
195
|
+
- - '='
|
196
|
+
- !ruby/object:Gem::Version
|
197
|
+
version: 2.8.1
|
204
198
|
type: :development
|
205
199
|
prerelease: false
|
200
|
+
version_requirements: !ruby/object:Gem::Requirement
|
201
|
+
none: false
|
202
|
+
requirements:
|
203
|
+
- - '='
|
204
|
+
- !ruby/object:Gem::Version
|
205
|
+
version: 2.8.1
|
206
|
+
- !ruby/object:Gem::Dependency
|
206
207
|
name: bundler
|
207
|
-
|
208
|
-
version_requirements: &id014 !ruby/object:Gem::Requirement
|
208
|
+
requirement: !ruby/object:Gem::Requirement
|
209
209
|
none: false
|
210
|
-
requirements:
|
211
|
-
- -
|
212
|
-
- !ruby/object:Gem::Version
|
213
|
-
|
214
|
-
segments:
|
215
|
-
- 0
|
216
|
-
version: "0"
|
217
|
-
requirement: *id014
|
210
|
+
requirements:
|
211
|
+
- - ~>
|
212
|
+
- !ruby/object:Gem::Version
|
213
|
+
version: '1'
|
218
214
|
type: :development
|
219
215
|
prerelease: false
|
216
|
+
version_requirements: !ruby/object:Gem::Requirement
|
217
|
+
none: false
|
218
|
+
requirements:
|
219
|
+
- - ~>
|
220
|
+
- !ruby/object:Gem::Version
|
221
|
+
version: '1'
|
222
|
+
- !ruby/object:Gem::Dependency
|
220
223
|
name: jeweler
|
224
|
+
requirement: !ruby/object:Gem::Requirement
|
225
|
+
none: false
|
226
|
+
requirements:
|
227
|
+
- - ! '>='
|
228
|
+
- !ruby/object:Gem::Version
|
229
|
+
version: '0'
|
230
|
+
type: :development
|
231
|
+
prerelease: false
|
232
|
+
version_requirements: !ruby/object:Gem::Requirement
|
233
|
+
none: false
|
234
|
+
requirements:
|
235
|
+
- - ! '>='
|
236
|
+
- !ruby/object:Gem::Version
|
237
|
+
version: '0'
|
221
238
|
description: ETL tools for Chicago
|
222
239
|
email: roland.swingler@gmail.com
|
223
240
|
executables: []
|
224
|
-
|
225
241
|
extensions: []
|
226
|
-
|
227
|
-
extra_rdoc_files:
|
242
|
+
extra_rdoc_files:
|
228
243
|
- LICENSE.txt
|
229
244
|
- README.rdoc
|
230
|
-
files:
|
245
|
+
files:
|
231
246
|
- .document
|
232
247
|
- .rspec
|
233
248
|
- Gemfile
|
@@ -254,10 +269,13 @@ files:
|
|
254
269
|
- lib/chicago/etl/screens/out_of_bounds.rb
|
255
270
|
- lib/chicago/etl/sequel/dependant_tables.rb
|
256
271
|
- lib/chicago/etl/sequel/filter_to_etl_batch.rb
|
272
|
+
- lib/chicago/etl/stage.rb
|
273
|
+
- lib/chicago/etl/stage_builder.rb
|
257
274
|
- lib/chicago/etl/table_builder.rb
|
258
275
|
- lib/chicago/etl/task_invocation.rb
|
259
276
|
- lib/chicago/etl/tasks.rb
|
260
277
|
- lib/chicago/etl/transformations.rb
|
278
|
+
- lib/chicago/etl/transformations/deduplicate_rows.rb
|
261
279
|
- lib/chicago/etl/transformations/uk_post_code.rb
|
262
280
|
- lib/chicago/etl/transformations/uk_post_code_field.rb
|
263
281
|
- lib/chicago/flow/array_sink.rb
|
@@ -279,6 +297,7 @@ files:
|
|
279
297
|
- spec/etl/core_extensions_spec.rb
|
280
298
|
- spec/etl/counter_spec.rb
|
281
299
|
- spec/etl/dataset_batch_stage_spec.rb
|
300
|
+
- spec/etl/define_stage_spec.rb
|
282
301
|
- spec/etl/etl_batch_id_dataset_filter.rb
|
283
302
|
- spec/etl/key_builder_spec.rb
|
284
303
|
- spec/etl/load_dataset_builder_spec.rb
|
@@ -291,6 +310,7 @@ files:
|
|
291
310
|
- spec/etl/sequel/filter_to_etl_batch_spec.rb
|
292
311
|
- spec/etl/table_builder_spec.rb
|
293
312
|
- spec/etl/task_spec.rb
|
313
|
+
- spec/etl/transformations/deduplicate_rows_spec.rb
|
294
314
|
- spec/etl/transformations/uk_post_code_field_spec.rb
|
295
315
|
- spec/etl/transformations/uk_post_code_spec.rb
|
296
316
|
- spec/etl/transformations_spec.rb
|
@@ -306,37 +326,31 @@ files:
|
|
306
326
|
- spec/flow/transformation_spec.rb
|
307
327
|
- spec/spec_helper.rb
|
308
328
|
homepage: http://github.com/notonthehighstreet/chicago-etl
|
309
|
-
licenses:
|
329
|
+
licenses:
|
310
330
|
- MIT
|
311
331
|
post_install_message:
|
312
332
|
rdoc_options: []
|
313
|
-
|
314
|
-
require_paths:
|
333
|
+
require_paths:
|
315
334
|
- lib
|
316
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
335
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
317
336
|
none: false
|
318
|
-
requirements:
|
319
|
-
- -
|
320
|
-
- !ruby/object:Gem::Version
|
321
|
-
|
322
|
-
segments:
|
337
|
+
requirements:
|
338
|
+
- - ! '>='
|
339
|
+
- !ruby/object:Gem::Version
|
340
|
+
version: '0'
|
341
|
+
segments:
|
323
342
|
- 0
|
324
|
-
|
325
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
343
|
+
hash: -2054734000096616506
|
344
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
326
345
|
none: false
|
327
|
-
requirements:
|
328
|
-
- -
|
329
|
-
- !ruby/object:Gem::Version
|
330
|
-
|
331
|
-
segments:
|
332
|
-
- 0
|
333
|
-
version: "0"
|
346
|
+
requirements:
|
347
|
+
- - ! '>='
|
348
|
+
- !ruby/object:Gem::Version
|
349
|
+
version: '0'
|
334
350
|
requirements: []
|
335
|
-
|
336
351
|
rubyforge_project:
|
337
352
|
rubygems_version: 1.8.25
|
338
353
|
signing_key:
|
339
354
|
specification_version: 3
|
340
355
|
summary: Chicago ETL
|
341
356
|
test_files: []
|
342
|
-
|