chicago-etl 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.2.2
data/chicago-etl.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "chicago-etl"
8
- s.version = "0.2.1"
8
+ s.version = "0.2.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Roland Swingler"]
12
- s.date = "2013-11-19"
12
+ s.date = "2013-11-26"
13
13
  s.description = "ETL tools for Chicago"
14
14
  s.email = "roland.swingler@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -33,7 +33,6 @@ Gem::Specification.new do |s|
33
33
  "lib/chicago/etl/batch.rb",
34
34
  "lib/chicago/etl/core_extensions.rb",
35
35
  "lib/chicago/etl/counter.rb",
36
- "lib/chicago/etl/dataset_batch_stage.rb",
37
36
  "lib/chicago/etl/dataset_builder.rb",
38
37
  "lib/chicago/etl/dataset_source.rb",
39
38
  "lib/chicago/etl/errors.rb",
@@ -48,6 +47,7 @@ Gem::Specification.new do |s|
48
47
  "lib/chicago/etl/pipeline_endpoint.rb",
49
48
  "lib/chicago/etl/schema_sinks_and_transformations_builder.rb",
50
49
  "lib/chicago/etl/schema_table_sink_factory.rb",
50
+ "lib/chicago/etl/schema_table_stage_builder.rb",
51
51
  "lib/chicago/etl/screens/column_screen.rb",
52
52
  "lib/chicago/etl/screens/invalid_element.rb",
53
53
  "lib/chicago/etl/screens/missing_value.rb",
@@ -57,6 +57,7 @@ Gem::Specification.new do |s|
57
57
  "lib/chicago/etl/sink.rb",
58
58
  "lib/chicago/etl/stage.rb",
59
59
  "lib/chicago/etl/stage_builder.rb",
60
+ "lib/chicago/etl/stage_name.rb",
60
61
  "lib/chicago/etl/table_builder.rb",
61
62
  "lib/chicago/etl/task_invocation.rb",
62
63
  "lib/chicago/etl/tasks.rb",
@@ -76,6 +77,7 @@ Gem::Specification.new do |s|
76
77
  "spec/etl/define_dimension_stage_spec.rb",
77
78
  "spec/etl/define_stage_spec.rb",
78
79
  "spec/etl/etl_batch_id_dataset_filter.rb",
80
+ "spec/etl/execution_wrapper_spec.rb",
79
81
  "spec/etl/filter_spec.rb",
80
82
  "spec/etl/key_builder_spec.rb",
81
83
  "spec/etl/load_dataset_builder_spec.rb",
@@ -89,6 +91,7 @@ Gem::Specification.new do |s|
89
91
  "spec/etl/screens/out_of_bounds_spec.rb",
90
92
  "spec/etl/sequel/dependant_tables_spec.rb",
91
93
  "spec/etl/sequel/filter_to_etl_batch_spec.rb",
94
+ "spec/etl/stage_name_spec.rb",
92
95
  "spec/etl/stage_spec.rb",
93
96
  "spec/etl/table_builder_spec.rb",
94
97
  "spec/etl/task_spec.rb",
@@ -2,116 +2,44 @@ module Chicago
2
2
  module ETL
3
3
  # An ETL pipeline.
4
4
  class Pipeline
5
- # Returns all defined dimension load tasks
6
- attr_reader :load_dimensions
7
-
8
- # Returns all defined fact load tasks
9
- attr_reader :load_facts
10
-
11
- # Returns all the defined generic stages.
5
+ # Returns all the defined stages.
12
6
  attr_reader :stages
13
7
 
14
8
  # Creates a pipeline for a Schema.
15
9
  def initialize(db, schema)
16
10
  @schema, @db = schema, db
17
- @load_dimensions = Chicago::Schema::NamedElementCollection.new
18
- @load_facts = Chicago::Schema::NamedElementCollection.new
19
11
  @stages = Chicago::Schema::NamedElementCollection.new
20
12
  end
21
13
 
22
14
  # Defines a generic stage in the pipeline.
23
- def define_stage(name, &block)
24
- @stages << build_schemaless_stage(name, &block)
25
- end
26
-
27
- def build_schemaless_stage(name, &block)
28
- StageBuilder.new(@db).build(name, &block)
29
- end
30
-
31
- # Defines a dimension load stage
32
- def define_dimension_load(name, options={}, &block)
33
- dimension_name = options[:dimension] || name
34
- @load_dimensions << build_stage(name,
35
- @schema.dimension(dimension_name),
36
- &block)
37
- end
38
-
39
- # Defines a fact load stage
40
- def define_fact_load(name, options={}, &block)
41
- fact_name = options[:fact] || name
42
- @load_facts << build_stage(name, @schema.fact(fact_name), &block)
43
- end
44
-
45
- # Builds a stage, but does not define it.
46
- def build_stage(name, schema_table, &block)
47
- DatasetBatchStageBuilder.new(@db, schema_table).build(name, &block)
48
- end
49
- end
50
-
51
- # Provides DSL methods for building a DataSetBatchStage.
52
- #
53
- # Clients shouldn't need to instantiate this directly, but instead
54
- # call the protected methods in the context of defining a Pipeline
55
- class DatasetBatchStageBuilder
56
- # @api private
57
- def initialize(db, schema_table)
58
- @db, @schema_table = db, schema_table
59
- end
60
-
61
- # @api private
62
- def build(name, &block)
63
- instance_eval &block
64
- unless defined? @sinks_and_transformations
65
- pipeline do
66
- end
67
- end
15
+ def define_stage(*args, &block)
16
+ options = args.last.kind_of?(Hash) ? args.pop : {}
68
17
 
69
- @filter_strategy ||= lambda {|dataset, etl_batch|
70
- dataset.filter_to_etl_batch(etl_batch)
71
- }
18
+ name = StageName.new(args)
72
19
 
73
- DatasetBatchStage.new(name,
74
- :source => @dataset,
75
- :transformations => @sinks_and_transformations[:transformations],
76
- :sinks => @sinks_and_transformations[:sinks],
77
- :filter_strategy => @filter_strategy,
78
- :truncate_pre_load => @truncate_pre_load)
79
- end
80
-
81
- protected
82
-
83
- # Specifies that the sinks should be truncated before loading
84
- # data.
85
- def truncate_pre_load
86
- @truncate_pre_load = true
20
+ if name =~ [:load, :dimensions]
21
+ @stages << build_dimension_load_stage(name, options, &block)
22
+ elsif name =~ [:load, :facts]
23
+ @stages << build_fact_load_stage(name, options, &block)
24
+ else
25
+ @stages << StageBuilder.new(@db).build(name, &block)
26
+ end
87
27
  end
88
28
 
89
- # Specifies that the dataset should never be filtered to the ETL
90
- # batch - i.e. it should behave as if reextract was always true
91
- def full_reload
92
- @filter_strategy = lambda {|dataset, etl_batch| dataset }
29
+ def build_stage(name, schema_table, &block)
30
+ SchemaTableStageBuilder.new(@db, schema_table).build(name, &block)
93
31
  end
94
32
 
95
- # Define elements of the pipeline. See LoadPipelineStageBuilder
96
- # for details.
97
- # TODO: rename pipeline => transforms below this method
98
- def pipeline(&block)
99
- @sinks_and_transformations = SchemaSinksAndTransformationsBuilder.new(@db, @schema_table).
100
- build(&block)
101
- end
33
+ private
102
34
 
103
- # Defines the dataset, see DatasetBuilder .
104
- #
105
- # The block must return a Sequel::Dataset.
106
- # TODO: rename dataset => source below this method, make generic
107
- def source(&block)
108
- @dataset = DatasetBuilder.new(@db).build(&block)
35
+ def build_dimension_load_stage(name, options, &block)
36
+ dimension_name = options[:dimension] || name.name
37
+ build_stage(name, @schema.dimension(dimension_name), &block)
109
38
  end
110
- alias :dataset :source
111
39
 
112
- # Define a custom filter strategy for filtering to an ETL batch.
113
- def filter_strategy(&block)
114
- @filter_strategy = block
40
+ def build_fact_load_stage(name, options, &block)
41
+ fact_name = options[:fact] || name.name
42
+ build_stage(name, @schema.fact(fact_name), &block)
115
43
  end
116
44
  end
117
45
  end
@@ -0,0 +1,48 @@
1
+ require 'chicago/etl/stage_builder'
2
+
3
+ module Chicago
4
+ module ETL
5
+ # Provides DSL methods for building a DataSetBatchStage.
6
+ #
7
+ # Clients shouldn't need to instantiate this directly, but instead
8
+ # call the protected methods in the context of defining a Pipeline
9
+ class SchemaTableStageBuilder < StageBuilder
10
+ # @api private
11
+ def initialize(db, schema_table)
12
+ super(db)
13
+ @wrapped_builder = SchemaSinksAndTransformationsBuilder.
14
+ new(@db, schema_table)
15
+ end
16
+
17
+ protected
18
+
19
+ # Define elements of the pipeline. See LoadPipelineStageBuilder
20
+ # for details.
21
+ #
22
+ # @deprecated
23
+ def pipeline(&block)
24
+ sinks_and_transformations = @wrapped_builder.build(&block)
25
+ @sinks = sinks_and_transformations[:sinks]
26
+ @transformations = sinks_and_transformations[:transformations] || []
27
+ end
28
+
29
+ # @api private
30
+ def set_default_stage_values
31
+ unless defined? @sinks
32
+ pipeline do
33
+ end
34
+ end
35
+
36
+ @pre_execution_strategies << lambda {|stage, etl_batch, reextract|
37
+ stage.sink(:error).truncate if reextract && stage.sink(:error)
38
+ stage.sink(:default).
39
+ set_constant_values(:_inserted_at => Time.now)
40
+ }
41
+
42
+ @filter_strategy ||= lambda {|dataset, etl_batch|
43
+ dataset.filter_to_etl_batch(etl_batch)
44
+ }
45
+ end
46
+ end
47
+ end
48
+ end
@@ -16,20 +16,26 @@ module Chicago
16
16
  @source = options[:source]
17
17
  @sinks = options[:sinks]
18
18
  @transformations = options[:transformations] || []
19
- @filter_strategy = options[:filter_strategy] ||
20
- lambda {|source, _| source }
21
- @truncate_pre_load = !!options[:truncate_pre_load]
19
+ @filter_strategy = options[:filter_strategy] || lambda {|s, _| s }
20
+ @pre_execution_strategies = options[:pre_execution_strategies] || []
21
+ @executable = options.has_key?(:executable) ? options[:executable] : true
22
22
 
23
23
  validate_arguments
24
24
  end
25
25
 
26
- # Returns true if the sinks should be truncated pre-load.
27
- def truncate_pre_load?
28
- @truncate_pre_load
26
+ # Returns the unqualified name of this stage.
27
+ def task_name
28
+ name.name
29
29
  end
30
-
30
+
31
+ # Returns true if this stage should be executed.
32
+ def executable?
33
+ @executable
34
+ end
35
+
36
+ # Executes this stage in the context of an ETL::Batch
31
37
  def execute(etl_batch, reextract=false)
32
- sinks.each {|sink| sink.truncate } if truncate_pre_load?
38
+ prepare_stage(etl_batch, reextract)
33
39
  transform_and_load filtered_source(etl_batch, reextract)
34
40
  end
35
41
 
@@ -42,6 +48,7 @@ module Chicago
42
48
  @sinks.values
43
49
  end
44
50
 
51
+ # @api private
45
52
  def filtered_source(etl_batch, reextract=false)
46
53
  filtered_dataset = reextract ? source :
47
54
  @filter_strategy.call(source, etl_batch)
@@ -51,6 +58,12 @@ module Chicago
51
58
 
52
59
  private
53
60
 
61
+ def prepare_stage(etl_batch, reextract)
62
+ @pre_execution_strategies.each do |strategy|
63
+ strategy.call(self, etl_batch, reextract)
64
+ end
65
+ end
66
+
54
67
  def transform_and_load(source)
55
68
  sinks.each(&:open)
56
69
  pipe_rows_to_sinks_from(source)
@@ -6,20 +6,42 @@ module Chicago
6
6
  end
7
7
 
8
8
  def build(name, &block)
9
- @sinks = {}
10
- @transformations = []
9
+ @pre_execution_strategies = []
10
+ @executable = true
11
11
 
12
12
  instance_eval &block
13
-
13
+ set_default_stage_values
14
+
14
15
  Stage.new(name,
15
16
  :source => @dataset,
16
17
  :sinks => @sinks,
17
18
  :transformations => @transformations,
18
- :filter_strategy => @filter_strategy)
19
+ :filter_strategy => @filter_strategy,
20
+ :pre_execution_strategies => @pre_execution_strategies,
21
+ :executable => @executable)
19
22
  end
20
23
 
21
24
  protected
22
25
 
26
+ # Specifies that the sinks should be truncated before loading
27
+ # data.
28
+ def truncate_pre_load
29
+ @pre_execution_strategies << lambda {|stage, etl_batch, reextract|
30
+ stage.sinks.each {|sink| sink.truncate }
31
+ }
32
+ end
33
+
34
+ # Specifies that the dataset should never be filtered to the ETL
35
+ # batch - i.e. it should behave as if reextract was always true
36
+ def full_reload
37
+ @filter_strategy = lambda {|dataset, etl_batch| dataset }
38
+ end
39
+
40
+ # Mark this stage as executable or non-executable.
41
+ def executable(value=true)
42
+ @executable = value
43
+ end
44
+
23
45
  def source(&block)
24
46
  @dataset = DatasetBuilder.new(@db).build(&block)
25
47
  end
@@ -32,12 +54,18 @@ module Chicago
32
54
  @sinks = SinkBuilder.new.build(&block)
33
55
  end
34
56
 
35
- # TODO: think of potentially better ways of dealig with this
57
+ # TODO: think of potentially better ways of dealing with this
36
58
  # problem.
37
59
  def filter_strategy(&block)
38
60
  @filter_strategy = block
39
61
  end
40
62
 
63
+ # @api private
64
+ def set_default_stage_values
65
+ @sinks ||= sinks {}
66
+ @transformations ||= transformations {}
67
+ end
68
+
41
69
  class TransformationBuilder
42
70
  def build(&block)
43
71
  @transformations = []
@@ -0,0 +1,51 @@
1
+ module Chicago
2
+ module ETL
3
+ # A namespaced name for an ETL stage.
4
+ #
5
+ # @api private
6
+ class StageName
7
+ def initialize(*names)
8
+ if names.size == 1 && names.first.kind_of?(String)
9
+ @names = names.first.split(".").map(&:to_sym).freeze
10
+ else
11
+ @names = names.flatten.map(&:to_sym).freeze
12
+ end
13
+ end
14
+
15
+ def name
16
+ @names.last
17
+ end
18
+
19
+ def match?(*pattern)
20
+ pattern.flatten!
21
+ return false if pattern.size > @names.size
22
+
23
+ pattern.each_with_index.all? do |part, i|
24
+ part == :* || @names[i] == part
25
+ end
26
+ end
27
+ alias :=~ :match?
28
+
29
+ def namespace
30
+ @names[0...(@names.size - 1)]
31
+ end
32
+
33
+ def eql?(other)
34
+ to_s == other.to_s
35
+ end
36
+ alias :== :eql?
37
+
38
+ def hash
39
+ to_s.hash
40
+ end
41
+
42
+ def to_a
43
+ @names.dup
44
+ end
45
+
46
+ def to_s
47
+ @string_representation ||= @names.join('.')
48
+ end
49
+ end
50
+ end
51
+ end
data/lib/chicago/etl.rb CHANGED
@@ -19,15 +19,16 @@ require 'chicago/etl/null_sink'
19
19
  require 'chicago/etl/mysql'
20
20
 
21
21
  require 'chicago/etl/core_extensions'
22
+ require 'chicago/etl/stage_name'
22
23
  require 'chicago/etl/counter'
23
24
  require 'chicago/etl/key_builder'
24
25
  require 'chicago/etl/schema_table_sink_factory'
26
+ require 'chicago/etl/schema_table_stage_builder'
25
27
  require 'chicago/etl/transformations'
26
28
  require 'chicago/etl/load_dataset_builder'
27
29
  require 'chicago/etl/dataset_builder'
28
30
  require 'chicago/etl/stage'
29
31
  require 'chicago/etl/stage_builder'
30
- require 'chicago/etl/dataset_batch_stage'
31
32
  require 'chicago/etl/schema_sinks_and_transformations_builder'
32
33
  require 'chicago/etl/pipeline'
33
34
 
@@ -60,9 +61,13 @@ module Chicago
60
61
  # within a batch.
61
62
  def self.execute(stage, etl_batch, reextract, logger)
62
63
  etl_batch.perform_task(:load, stage.name) do
63
- logger.debug "Starting loading #{stage.name}"
64
- stage.execute(etl_batch, reextract)
65
- logger.debug "Finished loading #{stage.name}"
64
+ if stage.executable?
65
+ logger.debug "Starting loading #{stage.name}"
66
+ stage.execute(etl_batch, reextract)
67
+ logger.info "Finished loading #{stage.name}"
68
+ else
69
+ logger.info "Skipping stage #{stage.name}"
70
+ end
66
71
  end
67
72
  end
68
73
  end
@@ -18,8 +18,8 @@ describe "creating and running a dimension stage" do
18
18
  let(:pipeline) { Chicago::ETL::Pipeline.new(db, schema)}
19
19
 
20
20
  it "glues the source, transformations, and sink correctly" do
21
- pipeline.define_dimension_load(:test) do
22
- dataset do
21
+ pipeline.define_stage(:load, :dimensions, :test) do
22
+ source do
23
23
  db.test_dataset_method
24
24
  end
25
25
  end
@@ -0,0 +1,27 @@
1
+ require 'spec_helper'
2
+
3
+ describe "Chicago::ETL Execution method" do
4
+ class StubBatch
5
+ def perform_task(*args)
6
+ yield
7
+ end
8
+ end
9
+
10
+ let(:logger) { mock(:logger).as_null_object }
11
+ let(:batch) { StubBatch.new }
12
+
13
+ it "only logs skipping the stage if the stage is not executable" do
14
+ stage = stub(:stage, :executable? => false, :name => "test")
15
+ stage.should_not_receive(:execute)
16
+ logger.should_receive(:info).with("Skipping stage test")
17
+
18
+ Chicago::ETL.execute(stage, batch, false, logger)
19
+ end
20
+
21
+ it "executes the stage" do
22
+ stage = stub(:stage, :executable? => true, :name => "test")
23
+ stage.should_receive(:execute).with(batch, false)
24
+
25
+ Chicago::ETL.execute(stage, batch, false, logger)
26
+ end
27
+ end
@@ -0,0 +1,55 @@
1
+ require 'spec_helper'
2
+ require 'set'
3
+
4
+ describe Chicago::ETL::StageName do
5
+ it "can be consturcted with variable args" do
6
+ described_class.new(:a, :b).to_a.should == [:a, :b]
7
+ end
8
+
9
+ it "can be constructed with an array of symbols" do
10
+ described_class.new([:a, :b]).to_a.should == [:a, :b]
11
+ end
12
+
13
+ it "can be constructed with a dot seaprated string" do
14
+ described_class.new("foo.bar").to_a.should == [:foo, :bar]
15
+ end
16
+
17
+ it "has a name" do
18
+ described_class.new("foo.bar.baz").name.should == :baz
19
+ end
20
+
21
+ it "has a namespace" do
22
+ described_class.new("foo.bar.baz").namespace.should == [:foo, :bar]
23
+ end
24
+
25
+ it "supports equality" do
26
+ described_class.new(:a, :b).should == described_class.new(:a, :b)
27
+ set = Set.new
28
+ set << described_class.new(:a, :b)
29
+ set.should include(described_class.new(:a, :b))
30
+ end
31
+
32
+ it "has a dotted string representation" do
33
+ described_class.new(:a, :b).to_s.should == "a.b"
34
+ end
35
+
36
+ it "matches an exact pattern" do
37
+ described_class.new(:a, :b).match?(:a, :b).should be_true
38
+ described_class.new(:a, :b).match?(:a, :c).should be_false
39
+ end
40
+
41
+ it "matches a left-anchored partial pattern" do
42
+ described_class.new(:a, :b).match?(:a).should be_true
43
+ described_class.new(:a, :b).match?(:b).should be_false
44
+ end
45
+
46
+ it "allows wildcards matching" do
47
+ described_class.new(:a, :b).match?(:*, :b).should be_true
48
+ described_class.new(:a, :b).match?(:*, :*).should be_true
49
+ described_class.new(:a, :b).match?(:*, :*, :*).should be_false
50
+ end
51
+
52
+ it "can use the =~ operator" do
53
+ (described_class.new(:a, :b) =~ [:*, :b]).should be_true
54
+ end
55
+ end
metadata CHANGED
@@ -1,233 +1,248 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: chicago-etl
3
- version: !ruby/object:Gem::Version
4
- hash: 21
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.2
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 2
9
- - 1
10
- version: 0.2.1
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Roland Swingler
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2013-11-19 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
21
- version_requirements: &id001 !ruby/object:Gem::Requirement
12
+ date: 2013-11-26 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: chicagowarehouse
16
+ requirement: !ruby/object:Gem::Requirement
22
17
  none: false
23
- requirements:
18
+ requirements:
24
19
  - - ~>
25
- - !ruby/object:Gem::Version
26
- hash: 3
27
- segments:
28
- - 0
29
- - 4
30
- version: "0.4"
31
- requirement: *id001
20
+ - !ruby/object:Gem::Version
21
+ version: '0.4'
32
22
  type: :runtime
33
23
  prerelease: false
34
- name: chicagowarehouse
35
- - !ruby/object:Gem::Dependency
36
- version_requirements: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: !ruby/object:Gem::Requirement
37
25
  none: false
38
- requirements:
39
- - - ">="
40
- - !ruby/object:Gem::Version
41
- hash: 3
42
- segments:
43
- - 0
44
- version: "0"
45
- requirement: *id002
46
- type: :runtime
47
- prerelease: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '0.4'
30
+ - !ruby/object:Gem::Dependency
48
31
  name: fastercsv
49
- - !ruby/object:Gem::Dependency
50
- version_requirements: &id003 !ruby/object:Gem::Requirement
32
+ requirement: !ruby/object:Gem::Requirement
51
33
  none: false
52
- requirements:
53
- - - ">="
54
- - !ruby/object:Gem::Version
55
- hash: 3
56
- segments:
57
- - 0
58
- version: "0"
59
- requirement: *id003
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
60
38
  type: :runtime
61
39
  prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
62
47
  name: sequel
63
- - !ruby/object:Gem::Dependency
64
- version_requirements: &id004 !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
65
49
  none: false
66
- requirements:
67
- - - ">="
68
- - !ruby/object:Gem::Version
69
- hash: 27
70
- segments:
71
- - 0
72
- - 0
73
- - 2
74
- version: 0.0.2
75
- requirement: *id004
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
76
54
  type: :runtime
77
55
  prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
78
63
  name: sequel_load_data_infile
79
- - !ruby/object:Gem::Dependency
80
- version_requirements: &id005 !ruby/object:Gem::Requirement
64
+ requirement: !ruby/object:Gem::Requirement
81
65
  none: false
82
- requirements:
83
- - - ">="
84
- - !ruby/object:Gem::Version
85
- hash: 3
86
- segments:
87
- - 0
88
- version: "0"
89
- requirement: *id005
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: 0.0.2
90
70
  type: :runtime
91
71
  prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: 0.0.2
78
+ - !ruby/object:Gem::Dependency
92
79
  name: sequel_fast_columns
93
- - !ruby/object:Gem::Dependency
94
- version_requirements: &id006 !ruby/object:Gem::Requirement
80
+ requirement: !ruby/object:Gem::Requirement
95
81
  none: false
96
- requirements:
97
- - - ~>
98
- - !ruby/object:Gem::Version
99
- hash: 7
100
- segments:
101
- - 2
102
- version: "2"
103
- requirement: *id006
104
- type: :development
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :runtime
105
87
  prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
106
95
  name: rspec
107
- - !ruby/object:Gem::Dependency
108
- version_requirements: &id007 !ruby/object:Gem::Requirement
96
+ requirement: !ruby/object:Gem::Requirement
109
97
  none: false
110
- requirements:
111
- - - ">="
112
- - !ruby/object:Gem::Version
113
- hash: 3
114
- segments:
115
- - 0
116
- version: "0"
117
- requirement: *id007
98
+ requirements:
99
+ - - ~>
100
+ - !ruby/object:Gem::Version
101
+ version: '2'
118
102
  type: :development
119
103
  prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ~>
108
+ - !ruby/object:Gem::Version
109
+ version: '2'
110
+ - !ruby/object:Gem::Dependency
120
111
  name: timecop
121
- - !ruby/object:Gem::Dependency
122
- version_requirements: &id008 !ruby/object:Gem::Requirement
112
+ requirement: !ruby/object:Gem::Requirement
123
113
  none: false
124
- requirements:
125
- - - ">="
126
- - !ruby/object:Gem::Version
127
- hash: 3
128
- segments:
129
- - 0
130
- version: "0"
131
- requirement: *id008
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
132
118
  type: :development
133
119
  prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ - !ruby/object:Gem::Dependency
134
127
  name: yard
135
- - !ruby/object:Gem::Dependency
136
- version_requirements: &id009 !ruby/object:Gem::Requirement
128
+ requirement: !ruby/object:Gem::Requirement
137
129
  none: false
138
- requirements:
139
- - - ">="
140
- - !ruby/object:Gem::Version
141
- hash: 3
142
- segments:
143
- - 0
144
- version: "0"
145
- requirement: *id009
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
146
134
  type: :development
147
135
  prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ - !ruby/object:Gem::Dependency
148
143
  name: flog
149
- - !ruby/object:Gem::Dependency
150
- version_requirements: &id010 !ruby/object:Gem::Requirement
144
+ requirement: !ruby/object:Gem::Requirement
151
145
  none: false
152
- requirements:
153
- - - ">="
154
- - !ruby/object:Gem::Version
155
- hash: 3
156
- segments:
157
- - 0
158
- version: "0"
159
- requirement: *id010
146
+ requirements:
147
+ - - ! '>='
148
+ - !ruby/object:Gem::Version
149
+ version: '0'
160
150
  type: :development
161
151
  prerelease: false
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
154
+ requirements:
155
+ - - ! '>='
156
+ - !ruby/object:Gem::Version
157
+ version: '0'
158
+ - !ruby/object:Gem::Dependency
162
159
  name: simplecov
163
- - !ruby/object:Gem::Dependency
164
- version_requirements: &id011 !ruby/object:Gem::Requirement
160
+ requirement: !ruby/object:Gem::Requirement
165
161
  none: false
166
- requirements:
167
- - - ">="
168
- - !ruby/object:Gem::Version
169
- hash: 3
170
- segments:
171
- - 0
172
- version: "0"
173
- requirement: *id011
162
+ requirements:
163
+ - - ! '>='
164
+ - !ruby/object:Gem::Version
165
+ version: '0'
174
166
  type: :development
175
167
  prerelease: false
168
+ version_requirements: !ruby/object:Gem::Requirement
169
+ none: false
170
+ requirements:
171
+ - - ! '>='
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ - !ruby/object:Gem::Dependency
176
175
  name: ZenTest
177
- - !ruby/object:Gem::Dependency
178
- version_requirements: &id012 !ruby/object:Gem::Requirement
176
+ requirement: !ruby/object:Gem::Requirement
179
177
  none: false
180
- requirements:
181
- - - "="
182
- - !ruby/object:Gem::Version
183
- hash: 45
184
- segments:
185
- - 2
186
- - 8
187
- - 1
188
- version: 2.8.1
189
- requirement: *id012
178
+ requirements:
179
+ - - ! '>='
180
+ - !ruby/object:Gem::Version
181
+ version: '0'
190
182
  type: :development
191
183
  prerelease: false
184
+ version_requirements: !ruby/object:Gem::Requirement
185
+ none: false
186
+ requirements:
187
+ - - ! '>='
188
+ - !ruby/object:Gem::Version
189
+ version: '0'
190
+ - !ruby/object:Gem::Dependency
192
191
  name: mysql
193
- - !ruby/object:Gem::Dependency
194
- version_requirements: &id013 !ruby/object:Gem::Requirement
192
+ requirement: !ruby/object:Gem::Requirement
195
193
  none: false
196
- requirements:
197
- - - ~>
198
- - !ruby/object:Gem::Version
199
- hash: 1
200
- segments:
201
- - 1
202
- version: "1"
203
- requirement: *id013
194
+ requirements:
195
+ - - '='
196
+ - !ruby/object:Gem::Version
197
+ version: 2.8.1
204
198
  type: :development
205
199
  prerelease: false
200
+ version_requirements: !ruby/object:Gem::Requirement
201
+ none: false
202
+ requirements:
203
+ - - '='
204
+ - !ruby/object:Gem::Version
205
+ version: 2.8.1
206
+ - !ruby/object:Gem::Dependency
206
207
  name: bundler
207
- - !ruby/object:Gem::Dependency
208
- version_requirements: &id014 !ruby/object:Gem::Requirement
208
+ requirement: !ruby/object:Gem::Requirement
209
209
  none: false
210
- requirements:
211
- - - ">="
212
- - !ruby/object:Gem::Version
213
- hash: 3
214
- segments:
215
- - 0
216
- version: "0"
217
- requirement: *id014
210
+ requirements:
211
+ - - ~>
212
+ - !ruby/object:Gem::Version
213
+ version: '1'
218
214
  type: :development
219
215
  prerelease: false
216
+ version_requirements: !ruby/object:Gem::Requirement
217
+ none: false
218
+ requirements:
219
+ - - ~>
220
+ - !ruby/object:Gem::Version
221
+ version: '1'
222
+ - !ruby/object:Gem::Dependency
220
223
  name: jeweler
224
+ requirement: !ruby/object:Gem::Requirement
225
+ none: false
226
+ requirements:
227
+ - - ! '>='
228
+ - !ruby/object:Gem::Version
229
+ version: '0'
230
+ type: :development
231
+ prerelease: false
232
+ version_requirements: !ruby/object:Gem::Requirement
233
+ none: false
234
+ requirements:
235
+ - - ! '>='
236
+ - !ruby/object:Gem::Version
237
+ version: '0'
221
238
  description: ETL tools for Chicago
222
239
  email: roland.swingler@gmail.com
223
240
  executables: []
224
-
225
241
  extensions: []
226
-
227
- extra_rdoc_files:
242
+ extra_rdoc_files:
228
243
  - LICENSE.txt
229
244
  - README.rdoc
230
- files:
245
+ files:
231
246
  - .document
232
247
  - .rspec
233
248
  - Gemfile
@@ -244,7 +259,6 @@ files:
244
259
  - lib/chicago/etl/batch.rb
245
260
  - lib/chicago/etl/core_extensions.rb
246
261
  - lib/chicago/etl/counter.rb
247
- - lib/chicago/etl/dataset_batch_stage.rb
248
262
  - lib/chicago/etl/dataset_builder.rb
249
263
  - lib/chicago/etl/dataset_source.rb
250
264
  - lib/chicago/etl/errors.rb
@@ -259,6 +273,7 @@ files:
259
273
  - lib/chicago/etl/pipeline_endpoint.rb
260
274
  - lib/chicago/etl/schema_sinks_and_transformations_builder.rb
261
275
  - lib/chicago/etl/schema_table_sink_factory.rb
276
+ - lib/chicago/etl/schema_table_stage_builder.rb
262
277
  - lib/chicago/etl/screens/column_screen.rb
263
278
  - lib/chicago/etl/screens/invalid_element.rb
264
279
  - lib/chicago/etl/screens/missing_value.rb
@@ -268,6 +283,7 @@ files:
268
283
  - lib/chicago/etl/sink.rb
269
284
  - lib/chicago/etl/stage.rb
270
285
  - lib/chicago/etl/stage_builder.rb
286
+ - lib/chicago/etl/stage_name.rb
271
287
  - lib/chicago/etl/table_builder.rb
272
288
  - lib/chicago/etl/task_invocation.rb
273
289
  - lib/chicago/etl/tasks.rb
@@ -287,6 +303,7 @@ files:
287
303
  - spec/etl/define_dimension_stage_spec.rb
288
304
  - spec/etl/define_stage_spec.rb
289
305
  - spec/etl/etl_batch_id_dataset_filter.rb
306
+ - spec/etl/execution_wrapper_spec.rb
290
307
  - spec/etl/filter_spec.rb
291
308
  - spec/etl/key_builder_spec.rb
292
309
  - spec/etl/load_dataset_builder_spec.rb
@@ -300,6 +317,7 @@ files:
300
317
  - spec/etl/screens/out_of_bounds_spec.rb
301
318
  - spec/etl/sequel/dependant_tables_spec.rb
302
319
  - spec/etl/sequel/filter_to_etl_batch_spec.rb
320
+ - spec/etl/stage_name_spec.rb
303
321
  - spec/etl/stage_spec.rb
304
322
  - spec/etl/table_builder_spec.rb
305
323
  - spec/etl/task_spec.rb
@@ -311,37 +329,31 @@ files:
311
329
  - spec/etl/transformations_spec.rb
312
330
  - spec/spec_helper.rb
313
331
  homepage: http://github.com/notonthehighstreet/chicago-etl
314
- licenses:
332
+ licenses:
315
333
  - MIT
316
334
  post_install_message:
317
335
  rdoc_options: []
318
-
319
- require_paths:
336
+ require_paths:
320
337
  - lib
321
- required_ruby_version: !ruby/object:Gem::Requirement
338
+ required_ruby_version: !ruby/object:Gem::Requirement
322
339
  none: false
323
- requirements:
324
- - - ">="
325
- - !ruby/object:Gem::Version
326
- hash: 3
327
- segments:
340
+ requirements:
341
+ - - ! '>='
342
+ - !ruby/object:Gem::Version
343
+ version: '0'
344
+ segments:
328
345
  - 0
329
- version: "0"
330
- required_rubygems_version: !ruby/object:Gem::Requirement
346
+ hash: -2294158408277347233
347
+ required_rubygems_version: !ruby/object:Gem::Requirement
331
348
  none: false
332
- requirements:
333
- - - ">="
334
- - !ruby/object:Gem::Version
335
- hash: 3
336
- segments:
337
- - 0
338
- version: "0"
349
+ requirements:
350
+ - - ! '>='
351
+ - !ruby/object:Gem::Version
352
+ version: '0'
339
353
  requirements: []
340
-
341
354
  rubyforge_project:
342
355
  rubygems_version: 1.8.25
343
356
  signing_key:
344
357
  specification_version: 3
345
358
  summary: Chicago ETL
346
359
  test_files: []
347
-
@@ -1,19 +0,0 @@
1
- module Chicago
2
- module ETL
3
- # Allows deferring constructing a DatasetSource until extract
4
- # time, so that it can be filtered to an ETL batch appropriately.
5
- class DatasetBatchStage < Stage
6
- # Executes this ETL stage.
7
- #
8
- # Configures the dataset and flows rows into the pipeline.
9
- def execute(etl_batch, reextract=false)
10
- if reextract && sink(:error) && !truncate_pre_load?
11
- sink(:error).truncate
12
- end
13
-
14
- sink(:default).set_constant_values(:_inserted_at => Time.now)
15
- super
16
- end
17
- end
18
- end
19
- end