chicago-etl 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.1.1
data/chicago-etl.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "chicago-etl"
8
- s.version = "0.1.0"
8
+ s.version = "0.1.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Roland Swingler"]
12
- s.date = "2013-09-05"
12
+ s.date = "2013-11-07"
13
13
  s.description = "ETL tools for Chicago"
14
14
  s.email = "roland.swingler@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -43,10 +43,13 @@ Gem::Specification.new do |s|
43
43
  "lib/chicago/etl/screens/out_of_bounds.rb",
44
44
  "lib/chicago/etl/sequel/dependant_tables.rb",
45
45
  "lib/chicago/etl/sequel/filter_to_etl_batch.rb",
46
+ "lib/chicago/etl/stage.rb",
47
+ "lib/chicago/etl/stage_builder.rb",
46
48
  "lib/chicago/etl/table_builder.rb",
47
49
  "lib/chicago/etl/task_invocation.rb",
48
50
  "lib/chicago/etl/tasks.rb",
49
51
  "lib/chicago/etl/transformations.rb",
52
+ "lib/chicago/etl/transformations/deduplicate_rows.rb",
50
53
  "lib/chicago/etl/transformations/uk_post_code.rb",
51
54
  "lib/chicago/etl/transformations/uk_post_code_field.rb",
52
55
  "lib/chicago/flow/array_sink.rb",
@@ -68,6 +71,7 @@ Gem::Specification.new do |s|
68
71
  "spec/etl/core_extensions_spec.rb",
69
72
  "spec/etl/counter_spec.rb",
70
73
  "spec/etl/dataset_batch_stage_spec.rb",
74
+ "spec/etl/define_stage_spec.rb",
71
75
  "spec/etl/etl_batch_id_dataset_filter.rb",
72
76
  "spec/etl/key_builder_spec.rb",
73
77
  "spec/etl/load_dataset_builder_spec.rb",
@@ -80,6 +84,7 @@ Gem::Specification.new do |s|
80
84
  "spec/etl/sequel/filter_to_etl_batch_spec.rb",
81
85
  "spec/etl/table_builder_spec.rb",
82
86
  "spec/etl/task_spec.rb",
87
+ "spec/etl/transformations/deduplicate_rows_spec.rb",
83
88
  "spec/etl/transformations/uk_post_code_field_spec.rb",
84
89
  "spec/etl/transformations/uk_post_code_spec.rb",
85
90
  "spec/etl/transformations_spec.rb",
@@ -31,27 +31,6 @@ module Chicago
31
31
  end
32
32
  end
33
33
 
34
- # Deprecated.
35
- #
36
- # @deprecated Use perform_task instead
37
- def load(task_name, &block)
38
- perform_task(:load, task_name, &block)
39
- end
40
-
41
- # Deprecated.
42
- #
43
- # @deprecated Use perform_task instead
44
- def transform(task_name, &block)
45
- perform_task(:extract, task_name, &block)
46
- end
47
-
48
- # Deprecated.
49
- #
50
- # @deprecated Use perform_task instead
51
- def extract(task_name, &block)
52
- perform_task(:extract, task_name, &block)
53
- end
54
-
55
34
  # Performs a named task if it hasn't already run successfully in
56
35
  # this batch.
57
36
  def perform_task(stage, task_name, &block)
@@ -8,11 +8,24 @@ module Chicago
8
8
  # Returns all defined fact load tasks
9
9
  attr_reader :load_facts
10
10
 
11
+ # Returns all the defined generic stages.
12
+ attr_reader :stages
13
+
11
14
  # Creates a pipeline for a Schema.
12
15
  def initialize(db, schema)
13
16
  @schema, @db = schema, db
14
17
  @load_dimensions = Chicago::Schema::NamedElementCollection.new
15
18
  @load_facts = Chicago::Schema::NamedElementCollection.new
19
+ @stages = Chicago::Schema::NamedElementCollection.new
20
+ end
21
+
22
+ # Defines a generic stage in the pipeline.
23
+ def define_stage(name, &block)
24
+ @stages << build_schemaless_stage(name, &block)
25
+ end
26
+
27
+ def build_schemaless_stage(name, &block)
28
+ StageBuilder.new(@db).build(name, &block)
16
29
  end
17
30
 
18
31
  # Defines a dimension load stage
@@ -73,6 +86,7 @@ module Chicago
73
86
 
74
87
  # Define elements of the pipeline. See LoadPipelineStageBuilder
75
88
  # for details.
89
+ # TODO: rename pipeline => transforms below this method
76
90
  def pipeline(&block)
77
91
  @pipeline_stage = LoadPipelineStageBuilder.new(@db, @schema_table).
78
92
  build(&block)
@@ -81,9 +95,11 @@ module Chicago
81
95
  # Defines the dataset, see DatasetBuilder .
82
96
  #
83
97
  # The block must return a Sequel::Dataset.
84
- def dataset(&block)
98
+ # TODO: rename dataset => source below this method, make generic
99
+ def source(&block)
85
100
  @dataset = DatasetBuilder.new(@db).build(&block)
86
101
  end
102
+ alias :dataset :source
87
103
 
88
104
  # Define a custom filter strategy for filtering to an ETL batch.
89
105
  def filter_strategy(&block)
@@ -0,0 +1,76 @@
1
+ module Chicago
2
+ module ETL
3
+ class Stage
4
+ attr_reader :name
5
+
6
+ def initialize(name, options={})
7
+ @name = name
8
+ @source = options.fetch(:source)
9
+ raise ArgumentError, "Stage #{name} requires a source" unless @source
10
+
11
+ @sinks = options.fetch(:sinks)
12
+ raise ArgumentError, "Stage #{name} requires at least one sink" if @sinks.empty?
13
+
14
+ @transformations = options.fetch(:transformations)
15
+ @transformation_chain = Chicago::Flow::TransformationChain.
16
+ new(*@transformations)
17
+
18
+ @filter_strategy = options[:filter_strategy] ||
19
+ lambda {|source, _| source }
20
+ end
21
+
22
+ def execute(etl_batch, reextract)
23
+ modified_source = reextract_and_filter_source(@source, etl_batch, reextract)
24
+ transform_and_load_from(modified_source)
25
+ end
26
+
27
+ def transform_and_load_from(source)
28
+ end
29
+
30
+ def reextract_and_filter_source(source, etl_batch, reextract=false)
31
+ if reextract
32
+ filtered_dataset = source
33
+ else
34
+ filtered_dataset = @filter_strategy.call(source, etl_batch)
35
+ end
36
+ Chicago::Flow::DatasetSource.new(filtered_dataset)
37
+ end
38
+
39
+ attr_reader :transformation_chain
40
+
41
+ # Returns the named sink, if it exists
42
+ def sink(name)
43
+ @sinks[name.to_sym]
44
+ end
45
+
46
+ def sinks
47
+ @sinks.values
48
+ end
49
+
50
+ def register_sink(name, sink)
51
+ @sinks[name.to_sym] = sink
52
+ self
53
+ end
54
+
55
+ def transform_and_load_from(source)
56
+ sinks.each(&:open)
57
+ pipe_rows_to_sinks_from(source)
58
+ sinks.each(&:close)
59
+ end
60
+
61
+ private
62
+
63
+ def pipe_rows_to_sinks_from(source)
64
+ source.each do |row|
65
+ transformation_chain.process(row).each {|row| process_row(row) }
66
+ end
67
+ transformation_chain.flush.each {|row| process_row(row) }
68
+ end
69
+
70
+ def process_row(row)
71
+ stream = row.delete(:_stream) || :default
72
+ @sinks[stream] << row
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,69 @@
1
+ module Chicago
2
+ module ETL
3
+ class StageBuilder
4
+ attr_reader :sink_factory
5
+
6
+ def initialize(db)
7
+ @db = db
8
+ end
9
+
10
+ def build(name, &block)
11
+ @sinks = {}
12
+ @transformations = []
13
+
14
+ instance_eval &block
15
+
16
+ Stage.new(name,
17
+ :source => @dataset,
18
+ :sinks => @sinks,
19
+ :transformations => @transformations,
20
+ :filter_strategy => @filter_strategy)
21
+ end
22
+
23
+ def source(&block)
24
+ @dataset = DatasetBuilder.new(@db).build(&block)
25
+ end
26
+
27
+ def transformations(klass=TransformationBuilder, &block)
28
+ @transformations = klass.new.build(&block)
29
+ end
30
+
31
+ def sinks(options={}, &block)
32
+ @sinks = SinkBuilder.new.build(&block)
33
+ end
34
+
35
+ # TODO: think of potentially better ways of dealig with this
36
+ # problem.
37
+ def filter_strategy(&block)
38
+ @filter_strategy = block
39
+ end
40
+
41
+ class TransformationBuilder
42
+ def build(&block)
43
+ @transformations = []
44
+ instance_eval(&block)
45
+ @transformations
46
+ end
47
+
48
+ def add(transformation)
49
+ @transformations << transformation
50
+ end
51
+ end
52
+
53
+ class SinkBuilder
54
+ def build(&block)
55
+ @sinks = {}
56
+ instance_eval(&block)
57
+ @sinks
58
+ end
59
+
60
+ protected
61
+
62
+ def add(sink, options={})
63
+ stream = options[:stream] || :default
64
+ @sinks[stream] = sink
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,40 @@
1
+ module Chicago
2
+ module ETL
3
+ class DeduplicateRows < Chicago::Flow::Transformation
4
+ def process_row(row)
5
+ if @working_row.nil?
6
+ @working_row = row
7
+ return
8
+ elsif same_row?(row)
9
+ @working_row = merge_rows(row)
10
+ return
11
+ else
12
+ assign_new_row_and_return_old_row(row)
13
+ end
14
+ end
15
+
16
+ def flush
17
+ @working_row.nil? ? [] : [@working_row]
18
+ end
19
+
20
+ protected
21
+
22
+ attr_reader :working_row
23
+
24
+ # This should be implemented by clients
25
+ def merge_rows(row)
26
+ end
27
+
28
+ # This should be implemented by clients
29
+ def same_row?(row)
30
+ end
31
+
32
+ private
33
+
34
+ def assign_new_row_and_return_old_row(row)
35
+ row, @working_row = @working_row, row
36
+ row
37
+ end
38
+ end
39
+ end
40
+ end
data/lib/chicago/etl.rb CHANGED
@@ -29,6 +29,9 @@ require 'chicago/etl/dataset_batch_stage'
29
29
  require 'chicago/etl/load_pipeline_stage_builder'
30
30
  require 'chicago/etl/pipeline'
31
31
 
32
+ require 'chicago/etl/stage'
33
+ require 'chicago/etl/stage_builder'
34
+
32
35
  # Sequel Extensions
33
36
  require 'chicago/etl/sequel/filter_to_etl_batch'
34
37
  require 'chicago/etl/sequel/dependant_tables'
@@ -40,6 +43,7 @@ require 'chicago/etl/screens/invalid_element'
40
43
  require 'chicago/etl/screens/out_of_bounds'
41
44
 
42
45
  # Transformations
46
+ require 'chicago/etl/transformations/deduplicate_rows'
43
47
  require 'chicago/etl/transformations/uk_post_code'
44
48
  require 'chicago/etl/transformations/uk_post_code_field'
45
49
 
@@ -0,0 +1,114 @@
1
+ require "spec_helper"
2
+
3
+ class TestTransformation < Chicago::Flow::Transformation
4
+ def output_streams
5
+ [:another_stream]
6
+ end
7
+
8
+ def process_row(row)
9
+ [row, assign_stream({:some_field => "has an error value"}, :another_stream)]
10
+ end
11
+ end
12
+
13
+ describe "defining and executing a stage" do
14
+ let(:rows) { [{:some_field => "value"}] }
15
+ let(:db) { double(:test_dataset_method => rows) }
16
+ let(:schema) { double }
17
+ let(:pipeline) { Chicago::ETL::Pipeline.new(db, schema)}
18
+
19
+ it "allows no tranformations" do
20
+ pipeline.define_stage(:test_stage) do
21
+ source do
22
+ db.test_dataset_method
23
+ end
24
+
25
+ sinks do
26
+ add Chicago::Flow::ArraySink.new(:test)
27
+ add Chicago::Flow::ArraySink.new(:test), :stream => :another_stream
28
+ end
29
+ end
30
+
31
+ pipeline.stages.each do |stage|
32
+ stage.execute(double, true)
33
+ end
34
+
35
+ stage = pipeline.stages.first
36
+ stage.sink(:default).data.length.should == 1
37
+ stage.sink(:default).data.first.should == {:some_field => "value"}
38
+
39
+ stage.sink(:another_stream).data.length.should == 0
40
+ end
41
+
42
+ it "requires sinks" do
43
+ expect {
44
+ pipeline.define_stage(:test_stage) do
45
+ source do
46
+ db.test_dataset_method
47
+ end
48
+ end
49
+ }.to raise_error(ArgumentError)
50
+ end
51
+
52
+ it "requires sources" do
53
+ expect {
54
+ pipeline.define_stage(:test_stage) do
55
+ sinks do
56
+ add Chicago::Flow::ArraySink.new(:test)
57
+ end
58
+ end
59
+ }.to raise_error(ArgumentError)
60
+ end
61
+
62
+ it "glues the source, transformations, and sink correctly" do
63
+ pipeline.define_stage(:test_stage) do
64
+ source do
65
+ db.test_dataset_method
66
+ end
67
+
68
+ transformations do
69
+ add TestTransformation.new
70
+ end
71
+
72
+ sinks do
73
+ add Chicago::Flow::ArraySink.new(:test)
74
+ add Chicago::Flow::ArraySink.new(:test), :stream => :another_stream
75
+ end
76
+ end
77
+
78
+ pipeline.stages.each do |stage|
79
+ stage.execute(double, true)
80
+ end
81
+
82
+ stage = pipeline.stages.first
83
+ stage.sink(:default).data.length.should == 1
84
+ stage.sink(:default).data.first.should == {:some_field => "value"}
85
+
86
+ stage.sink(:another_stream).data.length.should == 1
87
+ stage.sink(:another_stream).data.first.should == {:some_field => "has an error value"}
88
+ end
89
+
90
+ it "allows the source to be filtered via a filter strategy" do
91
+ etl_batch_double = double
92
+ fake_source = []
93
+
94
+ fake_source.should_receive(:another_dataset_method).and_return([])
95
+ pipeline.define_stage(:test_stage) do
96
+ source do
97
+ fake_source
98
+ end
99
+
100
+ sinks do
101
+ add Chicago::Flow::ArraySink.new(:test)
102
+ end
103
+
104
+ filter_strategy do |source, etl_batch|
105
+ etl_batch.should == etl_batch_double
106
+ source.another_dataset_method
107
+ end
108
+ end
109
+
110
+ pipeline.stages.each do |stage|
111
+ stage.execute(etl_batch_double, false)
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,23 @@
1
+ require 'spec_helper'
2
+
3
+ describe Chicago::ETL::DeduplicateRows do
4
+ it "deduplicates rows" do
5
+ class TestTransform < described_class
6
+ def merge_rows(row)
7
+ working_row.merge(row)
8
+ end
9
+
10
+ def same_row?(row)
11
+ working_row[:id] == row[:id]
12
+ end
13
+ end
14
+
15
+ transform = TestTransform.new
16
+
17
+ transform.process({:id => 1, :foo => :bar}).should be_blank
18
+ transform.process({:id => 1, :bar => :baz}).should be_blank
19
+ transform.process({:id => 2, :foo => :quux}).should == {:id => 1, :foo => :bar, :bar => :baz}
20
+
21
+ transform.flush.should == [{:id => 2, :foo => :quux}]
22
+ end
23
+ end
metadata CHANGED
@@ -1,233 +1,248 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: chicago-etl
3
- version: !ruby/object:Gem::Version
4
- hash: 27
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 1
9
- - 0
10
- version: 0.1.0
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Roland Swingler
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2013-09-05 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
21
- version_requirements: &id001 !ruby/object:Gem::Requirement
12
+ date: 2013-11-07 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: chicagowarehouse
16
+ requirement: !ruby/object:Gem::Requirement
22
17
  none: false
23
- requirements:
18
+ requirements:
24
19
  - - ~>
25
- - !ruby/object:Gem::Version
26
- hash: 3
27
- segments:
28
- - 0
29
- - 4
30
- version: "0.4"
31
- requirement: *id001
20
+ - !ruby/object:Gem::Version
21
+ version: '0.4'
32
22
  type: :runtime
33
23
  prerelease: false
34
- name: chicagowarehouse
35
- - !ruby/object:Gem::Dependency
36
- version_requirements: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: !ruby/object:Gem::Requirement
37
25
  none: false
38
- requirements:
39
- - - ">="
40
- - !ruby/object:Gem::Version
41
- hash: 3
42
- segments:
43
- - 0
44
- version: "0"
45
- requirement: *id002
46
- type: :runtime
47
- prerelease: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '0.4'
30
+ - !ruby/object:Gem::Dependency
48
31
  name: fastercsv
49
- - !ruby/object:Gem::Dependency
50
- version_requirements: &id003 !ruby/object:Gem::Requirement
32
+ requirement: !ruby/object:Gem::Requirement
51
33
  none: false
52
- requirements:
53
- - - ">="
54
- - !ruby/object:Gem::Version
55
- hash: 3
56
- segments:
57
- - 0
58
- version: "0"
59
- requirement: *id003
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
60
38
  type: :runtime
61
39
  prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
62
47
  name: sequel
63
- - !ruby/object:Gem::Dependency
64
- version_requirements: &id004 !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
65
49
  none: false
66
- requirements:
67
- - - ">="
68
- - !ruby/object:Gem::Version
69
- hash: 27
70
- segments:
71
- - 0
72
- - 0
73
- - 2
74
- version: 0.0.2
75
- requirement: *id004
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
76
54
  type: :runtime
77
55
  prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
78
63
  name: sequel_load_data_infile
79
- - !ruby/object:Gem::Dependency
80
- version_requirements: &id005 !ruby/object:Gem::Requirement
64
+ requirement: !ruby/object:Gem::Requirement
81
65
  none: false
82
- requirements:
83
- - - ">="
84
- - !ruby/object:Gem::Version
85
- hash: 3
86
- segments:
87
- - 0
88
- version: "0"
89
- requirement: *id005
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: 0.0.2
90
70
  type: :runtime
91
71
  prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: 0.0.2
78
+ - !ruby/object:Gem::Dependency
92
79
  name: sequel_fast_columns
93
- - !ruby/object:Gem::Dependency
94
- version_requirements: &id006 !ruby/object:Gem::Requirement
80
+ requirement: !ruby/object:Gem::Requirement
95
81
  none: false
96
- requirements:
97
- - - ~>
98
- - !ruby/object:Gem::Version
99
- hash: 7
100
- segments:
101
- - 2
102
- version: "2"
103
- requirement: *id006
104
- type: :development
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :runtime
105
87
  prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
106
95
  name: rspec
107
- - !ruby/object:Gem::Dependency
108
- version_requirements: &id007 !ruby/object:Gem::Requirement
96
+ requirement: !ruby/object:Gem::Requirement
109
97
  none: false
110
- requirements:
111
- - - ">="
112
- - !ruby/object:Gem::Version
113
- hash: 3
114
- segments:
115
- - 0
116
- version: "0"
117
- requirement: *id007
98
+ requirements:
99
+ - - ~>
100
+ - !ruby/object:Gem::Version
101
+ version: '2'
118
102
  type: :development
119
103
  prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ~>
108
+ - !ruby/object:Gem::Version
109
+ version: '2'
110
+ - !ruby/object:Gem::Dependency
120
111
  name: timecop
121
- - !ruby/object:Gem::Dependency
122
- version_requirements: &id008 !ruby/object:Gem::Requirement
112
+ requirement: !ruby/object:Gem::Requirement
123
113
  none: false
124
- requirements:
125
- - - ">="
126
- - !ruby/object:Gem::Version
127
- hash: 3
128
- segments:
129
- - 0
130
- version: "0"
131
- requirement: *id008
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
132
118
  type: :development
133
119
  prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ - !ruby/object:Gem::Dependency
134
127
  name: yard
135
- - !ruby/object:Gem::Dependency
136
- version_requirements: &id009 !ruby/object:Gem::Requirement
128
+ requirement: !ruby/object:Gem::Requirement
137
129
  none: false
138
- requirements:
139
- - - ">="
140
- - !ruby/object:Gem::Version
141
- hash: 3
142
- segments:
143
- - 0
144
- version: "0"
145
- requirement: *id009
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
146
134
  type: :development
147
135
  prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ - !ruby/object:Gem::Dependency
148
143
  name: flog
149
- - !ruby/object:Gem::Dependency
150
- version_requirements: &id010 !ruby/object:Gem::Requirement
144
+ requirement: !ruby/object:Gem::Requirement
151
145
  none: false
152
- requirements:
153
- - - ">="
154
- - !ruby/object:Gem::Version
155
- hash: 3
156
- segments:
157
- - 0
158
- version: "0"
159
- requirement: *id010
146
+ requirements:
147
+ - - ! '>='
148
+ - !ruby/object:Gem::Version
149
+ version: '0'
160
150
  type: :development
161
151
  prerelease: false
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
154
+ requirements:
155
+ - - ! '>='
156
+ - !ruby/object:Gem::Version
157
+ version: '0'
158
+ - !ruby/object:Gem::Dependency
162
159
  name: simplecov
163
- - !ruby/object:Gem::Dependency
164
- version_requirements: &id011 !ruby/object:Gem::Requirement
160
+ requirement: !ruby/object:Gem::Requirement
165
161
  none: false
166
- requirements:
167
- - - ">="
168
- - !ruby/object:Gem::Version
169
- hash: 3
170
- segments:
171
- - 0
172
- version: "0"
173
- requirement: *id011
162
+ requirements:
163
+ - - ! '>='
164
+ - !ruby/object:Gem::Version
165
+ version: '0'
174
166
  type: :development
175
167
  prerelease: false
168
+ version_requirements: !ruby/object:Gem::Requirement
169
+ none: false
170
+ requirements:
171
+ - - ! '>='
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ - !ruby/object:Gem::Dependency
176
175
  name: ZenTest
177
- - !ruby/object:Gem::Dependency
178
- version_requirements: &id012 !ruby/object:Gem::Requirement
176
+ requirement: !ruby/object:Gem::Requirement
179
177
  none: false
180
- requirements:
181
- - - "="
182
- - !ruby/object:Gem::Version
183
- hash: 45
184
- segments:
185
- - 2
186
- - 8
187
- - 1
188
- version: 2.8.1
189
- requirement: *id012
178
+ requirements:
179
+ - - ! '>='
180
+ - !ruby/object:Gem::Version
181
+ version: '0'
190
182
  type: :development
191
183
  prerelease: false
184
+ version_requirements: !ruby/object:Gem::Requirement
185
+ none: false
186
+ requirements:
187
+ - - ! '>='
188
+ - !ruby/object:Gem::Version
189
+ version: '0'
190
+ - !ruby/object:Gem::Dependency
192
191
  name: mysql
193
- - !ruby/object:Gem::Dependency
194
- version_requirements: &id013 !ruby/object:Gem::Requirement
192
+ requirement: !ruby/object:Gem::Requirement
195
193
  none: false
196
- requirements:
197
- - - ~>
198
- - !ruby/object:Gem::Version
199
- hash: 1
200
- segments:
201
- - 1
202
- version: "1"
203
- requirement: *id013
194
+ requirements:
195
+ - - '='
196
+ - !ruby/object:Gem::Version
197
+ version: 2.8.1
204
198
  type: :development
205
199
  prerelease: false
200
+ version_requirements: !ruby/object:Gem::Requirement
201
+ none: false
202
+ requirements:
203
+ - - '='
204
+ - !ruby/object:Gem::Version
205
+ version: 2.8.1
206
+ - !ruby/object:Gem::Dependency
206
207
  name: bundler
207
- - !ruby/object:Gem::Dependency
208
- version_requirements: &id014 !ruby/object:Gem::Requirement
208
+ requirement: !ruby/object:Gem::Requirement
209
209
  none: false
210
- requirements:
211
- - - ">="
212
- - !ruby/object:Gem::Version
213
- hash: 3
214
- segments:
215
- - 0
216
- version: "0"
217
- requirement: *id014
210
+ requirements:
211
+ - - ~>
212
+ - !ruby/object:Gem::Version
213
+ version: '1'
218
214
  type: :development
219
215
  prerelease: false
216
+ version_requirements: !ruby/object:Gem::Requirement
217
+ none: false
218
+ requirements:
219
+ - - ~>
220
+ - !ruby/object:Gem::Version
221
+ version: '1'
222
+ - !ruby/object:Gem::Dependency
220
223
  name: jeweler
224
+ requirement: !ruby/object:Gem::Requirement
225
+ none: false
226
+ requirements:
227
+ - - ! '>='
228
+ - !ruby/object:Gem::Version
229
+ version: '0'
230
+ type: :development
231
+ prerelease: false
232
+ version_requirements: !ruby/object:Gem::Requirement
233
+ none: false
234
+ requirements:
235
+ - - ! '>='
236
+ - !ruby/object:Gem::Version
237
+ version: '0'
221
238
  description: ETL tools for Chicago
222
239
  email: roland.swingler@gmail.com
223
240
  executables: []
224
-
225
241
  extensions: []
226
-
227
- extra_rdoc_files:
242
+ extra_rdoc_files:
228
243
  - LICENSE.txt
229
244
  - README.rdoc
230
- files:
245
+ files:
231
246
  - .document
232
247
  - .rspec
233
248
  - Gemfile
@@ -254,10 +269,13 @@ files:
254
269
  - lib/chicago/etl/screens/out_of_bounds.rb
255
270
  - lib/chicago/etl/sequel/dependant_tables.rb
256
271
  - lib/chicago/etl/sequel/filter_to_etl_batch.rb
272
+ - lib/chicago/etl/stage.rb
273
+ - lib/chicago/etl/stage_builder.rb
257
274
  - lib/chicago/etl/table_builder.rb
258
275
  - lib/chicago/etl/task_invocation.rb
259
276
  - lib/chicago/etl/tasks.rb
260
277
  - lib/chicago/etl/transformations.rb
278
+ - lib/chicago/etl/transformations/deduplicate_rows.rb
261
279
  - lib/chicago/etl/transformations/uk_post_code.rb
262
280
  - lib/chicago/etl/transformations/uk_post_code_field.rb
263
281
  - lib/chicago/flow/array_sink.rb
@@ -279,6 +297,7 @@ files:
279
297
  - spec/etl/core_extensions_spec.rb
280
298
  - spec/etl/counter_spec.rb
281
299
  - spec/etl/dataset_batch_stage_spec.rb
300
+ - spec/etl/define_stage_spec.rb
282
301
  - spec/etl/etl_batch_id_dataset_filter.rb
283
302
  - spec/etl/key_builder_spec.rb
284
303
  - spec/etl/load_dataset_builder_spec.rb
@@ -291,6 +310,7 @@ files:
291
310
  - spec/etl/sequel/filter_to_etl_batch_spec.rb
292
311
  - spec/etl/table_builder_spec.rb
293
312
  - spec/etl/task_spec.rb
313
+ - spec/etl/transformations/deduplicate_rows_spec.rb
294
314
  - spec/etl/transformations/uk_post_code_field_spec.rb
295
315
  - spec/etl/transformations/uk_post_code_spec.rb
296
316
  - spec/etl/transformations_spec.rb
@@ -306,37 +326,31 @@ files:
306
326
  - spec/flow/transformation_spec.rb
307
327
  - spec/spec_helper.rb
308
328
  homepage: http://github.com/notonthehighstreet/chicago-etl
309
- licenses:
329
+ licenses:
310
330
  - MIT
311
331
  post_install_message:
312
332
  rdoc_options: []
313
-
314
- require_paths:
333
+ require_paths:
315
334
  - lib
316
- required_ruby_version: !ruby/object:Gem::Requirement
335
+ required_ruby_version: !ruby/object:Gem::Requirement
317
336
  none: false
318
- requirements:
319
- - - ">="
320
- - !ruby/object:Gem::Version
321
- hash: 3
322
- segments:
337
+ requirements:
338
+ - - ! '>='
339
+ - !ruby/object:Gem::Version
340
+ version: '0'
341
+ segments:
323
342
  - 0
324
- version: "0"
325
- required_rubygems_version: !ruby/object:Gem::Requirement
343
+ hash: -2054734000096616506
344
+ required_rubygems_version: !ruby/object:Gem::Requirement
326
345
  none: false
327
- requirements:
328
- - - ">="
329
- - !ruby/object:Gem::Version
330
- hash: 3
331
- segments:
332
- - 0
333
- version: "0"
346
+ requirements:
347
+ - - ! '>='
348
+ - !ruby/object:Gem::Version
349
+ version: '0'
334
350
  requirements: []
335
-
336
351
  rubyforge_project:
337
352
  rubygems_version: 1.8.25
338
353
  signing_key:
339
354
  specification_version: 3
340
355
  summary: Chicago ETL
341
356
  test_files: []
342
-