chicago-etl 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.2.2
data/chicago-etl.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "chicago-etl"
8
- s.version = "0.2.1"
8
+ s.version = "0.2.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Roland Swingler"]
12
- s.date = "2013-11-19"
12
+ s.date = "2013-11-26"
13
13
  s.description = "ETL tools for Chicago"
14
14
  s.email = "roland.swingler@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -33,7 +33,6 @@ Gem::Specification.new do |s|
33
33
  "lib/chicago/etl/batch.rb",
34
34
  "lib/chicago/etl/core_extensions.rb",
35
35
  "lib/chicago/etl/counter.rb",
36
- "lib/chicago/etl/dataset_batch_stage.rb",
37
36
  "lib/chicago/etl/dataset_builder.rb",
38
37
  "lib/chicago/etl/dataset_source.rb",
39
38
  "lib/chicago/etl/errors.rb",
@@ -48,6 +47,7 @@ Gem::Specification.new do |s|
48
47
  "lib/chicago/etl/pipeline_endpoint.rb",
49
48
  "lib/chicago/etl/schema_sinks_and_transformations_builder.rb",
50
49
  "lib/chicago/etl/schema_table_sink_factory.rb",
50
+ "lib/chicago/etl/schema_table_stage_builder.rb",
51
51
  "lib/chicago/etl/screens/column_screen.rb",
52
52
  "lib/chicago/etl/screens/invalid_element.rb",
53
53
  "lib/chicago/etl/screens/missing_value.rb",
@@ -57,6 +57,7 @@ Gem::Specification.new do |s|
57
57
  "lib/chicago/etl/sink.rb",
58
58
  "lib/chicago/etl/stage.rb",
59
59
  "lib/chicago/etl/stage_builder.rb",
60
+ "lib/chicago/etl/stage_name.rb",
60
61
  "lib/chicago/etl/table_builder.rb",
61
62
  "lib/chicago/etl/task_invocation.rb",
62
63
  "lib/chicago/etl/tasks.rb",
@@ -76,6 +77,7 @@ Gem::Specification.new do |s|
76
77
  "spec/etl/define_dimension_stage_spec.rb",
77
78
  "spec/etl/define_stage_spec.rb",
78
79
  "spec/etl/etl_batch_id_dataset_filter.rb",
80
+ "spec/etl/execution_wrapper_spec.rb",
79
81
  "spec/etl/filter_spec.rb",
80
82
  "spec/etl/key_builder_spec.rb",
81
83
  "spec/etl/load_dataset_builder_spec.rb",
@@ -89,6 +91,7 @@ Gem::Specification.new do |s|
89
91
  "spec/etl/screens/out_of_bounds_spec.rb",
90
92
  "spec/etl/sequel/dependant_tables_spec.rb",
91
93
  "spec/etl/sequel/filter_to_etl_batch_spec.rb",
94
+ "spec/etl/stage_name_spec.rb",
92
95
  "spec/etl/stage_spec.rb",
93
96
  "spec/etl/table_builder_spec.rb",
94
97
  "spec/etl/task_spec.rb",
@@ -2,116 +2,44 @@ module Chicago
2
2
  module ETL
3
3
  # An ETL pipeline.
4
4
  class Pipeline
5
- # Returns all defined dimension load tasks
6
- attr_reader :load_dimensions
7
-
8
- # Returns all defined fact load tasks
9
- attr_reader :load_facts
10
-
11
- # Returns all the defined generic stages.
5
+ # Returns all the defined stages.
12
6
  attr_reader :stages
13
7
 
14
8
  # Creates a pipeline for a Schema.
15
9
  def initialize(db, schema)
16
10
  @schema, @db = schema, db
17
- @load_dimensions = Chicago::Schema::NamedElementCollection.new
18
- @load_facts = Chicago::Schema::NamedElementCollection.new
19
11
  @stages = Chicago::Schema::NamedElementCollection.new
20
12
  end
21
13
 
22
14
  # Defines a generic stage in the pipeline.
23
- def define_stage(name, &block)
24
- @stages << build_schemaless_stage(name, &block)
25
- end
26
-
27
- def build_schemaless_stage(name, &block)
28
- StageBuilder.new(@db).build(name, &block)
29
- end
30
-
31
- # Defines a dimension load stage
32
- def define_dimension_load(name, options={}, &block)
33
- dimension_name = options[:dimension] || name
34
- @load_dimensions << build_stage(name,
35
- @schema.dimension(dimension_name),
36
- &block)
37
- end
38
-
39
- # Defines a fact load stage
40
- def define_fact_load(name, options={}, &block)
41
- fact_name = options[:fact] || name
42
- @load_facts << build_stage(name, @schema.fact(fact_name), &block)
43
- end
44
-
45
- # Builds a stage, but does not define it.
46
- def build_stage(name, schema_table, &block)
47
- DatasetBatchStageBuilder.new(@db, schema_table).build(name, &block)
48
- end
49
- end
50
-
51
- # Provides DSL methods for building a DataSetBatchStage.
52
- #
53
- # Clients shouldn't need to instantiate this directly, but instead
54
- # call the protected methods in the context of defining a Pipeline
55
- class DatasetBatchStageBuilder
56
- # @api private
57
- def initialize(db, schema_table)
58
- @db, @schema_table = db, schema_table
59
- end
60
-
61
- # @api private
62
- def build(name, &block)
63
- instance_eval &block
64
- unless defined? @sinks_and_transformations
65
- pipeline do
66
- end
67
- end
15
+ def define_stage(*args, &block)
16
+ options = args.last.kind_of?(Hash) ? args.pop : {}
68
17
 
69
- @filter_strategy ||= lambda {|dataset, etl_batch|
70
- dataset.filter_to_etl_batch(etl_batch)
71
- }
18
+ name = StageName.new(args)
72
19
 
73
- DatasetBatchStage.new(name,
74
- :source => @dataset,
75
- :transformations => @sinks_and_transformations[:transformations],
76
- :sinks => @sinks_and_transformations[:sinks],
77
- :filter_strategy => @filter_strategy,
78
- :truncate_pre_load => @truncate_pre_load)
79
- end
80
-
81
- protected
82
-
83
- # Specifies that the sinks should be truncated before loading
84
- # data.
85
- def truncate_pre_load
86
- @truncate_pre_load = true
20
+ if name =~ [:load, :dimensions]
21
+ @stages << build_dimension_load_stage(name, options, &block)
22
+ elsif name =~ [:load, :facts]
23
+ @stages << build_fact_load_stage(name, options, &block)
24
+ else
25
+ @stages << StageBuilder.new(@db).build(name, &block)
26
+ end
87
27
  end
88
28
 
89
- # Specifies that the dataset should never be filtered to the ETL
90
- # batch - i.e. it should behave as if reextract was always true
91
- def full_reload
92
- @filter_strategy = lambda {|dataset, etl_batch| dataset }
29
+ def build_stage(name, schema_table, &block)
30
+ SchemaTableStageBuilder.new(@db, schema_table).build(name, &block)
93
31
  end
94
32
 
95
- # Define elements of the pipeline. See LoadPipelineStageBuilder
96
- # for details.
97
- # TODO: rename pipeline => transforms below this method
98
- def pipeline(&block)
99
- @sinks_and_transformations = SchemaSinksAndTransformationsBuilder.new(@db, @schema_table).
100
- build(&block)
101
- end
33
+ private
102
34
 
103
- # Defines the dataset, see DatasetBuilder .
104
- #
105
- # The block must return a Sequel::Dataset.
106
- # TODO: rename dataset => source below this method, make generic
107
- def source(&block)
108
- @dataset = DatasetBuilder.new(@db).build(&block)
35
+ def build_dimension_load_stage(name, options, &block)
36
+ dimension_name = options[:dimension] || name.name
37
+ build_stage(name, @schema.dimension(dimension_name), &block)
109
38
  end
110
- alias :dataset :source
111
39
 
112
- # Define a custom filter strategy for filtering to an ETL batch.
113
- def filter_strategy(&block)
114
- @filter_strategy = block
40
+ def build_fact_load_stage(name, options, &block)
41
+ fact_name = options[:fact] || name.name
42
+ build_stage(name, @schema.fact(fact_name), &block)
115
43
  end
116
44
  end
117
45
  end
@@ -0,0 +1,48 @@
1
+ require 'chicago/etl/stage_builder'
2
+
3
+ module Chicago
4
+ module ETL
5
+ # Provides DSL methods for building a DataSetBatchStage.
6
+ #
7
+ # Clients shouldn't need to instantiate this directly, but instead
8
+ # call the protected methods in the context of defining a Pipeline
9
+ class SchemaTableStageBuilder < StageBuilder
10
+ # @api private
11
+ def initialize(db, schema_table)
12
+ super(db)
13
+ @wrapped_builder = SchemaSinksAndTransformationsBuilder.
14
+ new(@db, schema_table)
15
+ end
16
+
17
+ protected
18
+
19
+ # Define elements of the pipeline. See LoadPipelineStageBuilder
20
+ # for details.
21
+ #
22
+ # @deprecated
23
+ def pipeline(&block)
24
+ sinks_and_transformations = @wrapped_builder.build(&block)
25
+ @sinks = sinks_and_transformations[:sinks]
26
+ @transformations = sinks_and_transformations[:transformations] || []
27
+ end
28
+
29
+ # @api private
30
+ def set_default_stage_values
31
+ unless defined? @sinks
32
+ pipeline do
33
+ end
34
+ end
35
+
36
+ @pre_execution_strategies << lambda {|stage, etl_batch, reextract|
37
+ stage.sink(:error).truncate if reextract && stage.sink(:error)
38
+ stage.sink(:default).
39
+ set_constant_values(:_inserted_at => Time.now)
40
+ }
41
+
42
+ @filter_strategy ||= lambda {|dataset, etl_batch|
43
+ dataset.filter_to_etl_batch(etl_batch)
44
+ }
45
+ end
46
+ end
47
+ end
48
+ end
@@ -16,20 +16,26 @@ module Chicago
16
16
  @source = options[:source]
17
17
  @sinks = options[:sinks]
18
18
  @transformations = options[:transformations] || []
19
- @filter_strategy = options[:filter_strategy] ||
20
- lambda {|source, _| source }
21
- @truncate_pre_load = !!options[:truncate_pre_load]
19
+ @filter_strategy = options[:filter_strategy] || lambda {|s, _| s }
20
+ @pre_execution_strategies = options[:pre_execution_strategies] || []
21
+ @executable = options.has_key?(:executable) ? options[:executable] : true
22
22
 
23
23
  validate_arguments
24
24
  end
25
25
 
26
- # Returns true if the sinks should be truncated pre-load.
27
- def truncate_pre_load?
28
- @truncate_pre_load
26
+ # Returns the unqualified name of this stage.
27
+ def task_name
28
+ name.name
29
29
  end
30
-
30
+
31
+ # Returns true if this stage should be executed.
32
+ def executable?
33
+ @executable
34
+ end
35
+
36
+ # Executes this stage in the context of an ETL::Batch
31
37
  def execute(etl_batch, reextract=false)
32
- sinks.each {|sink| sink.truncate } if truncate_pre_load?
38
+ prepare_stage(etl_batch, reextract)
33
39
  transform_and_load filtered_source(etl_batch, reextract)
34
40
  end
35
41
 
@@ -42,6 +48,7 @@ module Chicago
42
48
  @sinks.values
43
49
  end
44
50
 
51
+ # @api private
45
52
  def filtered_source(etl_batch, reextract=false)
46
53
  filtered_dataset = reextract ? source :
47
54
  @filter_strategy.call(source, etl_batch)
@@ -51,6 +58,12 @@ module Chicago
51
58
 
52
59
  private
53
60
 
61
+ def prepare_stage(etl_batch, reextract)
62
+ @pre_execution_strategies.each do |strategy|
63
+ strategy.call(self, etl_batch, reextract)
64
+ end
65
+ end
66
+
54
67
  def transform_and_load(source)
55
68
  sinks.each(&:open)
56
69
  pipe_rows_to_sinks_from(source)
@@ -6,20 +6,42 @@ module Chicago
6
6
  end
7
7
 
8
8
  def build(name, &block)
9
- @sinks = {}
10
- @transformations = []
9
+ @pre_execution_strategies = []
10
+ @executable = true
11
11
 
12
12
  instance_eval &block
13
-
13
+ set_default_stage_values
14
+
14
15
  Stage.new(name,
15
16
  :source => @dataset,
16
17
  :sinks => @sinks,
17
18
  :transformations => @transformations,
18
- :filter_strategy => @filter_strategy)
19
+ :filter_strategy => @filter_strategy,
20
+ :pre_execution_strategies => @pre_execution_strategies,
21
+ :executable => @executable)
19
22
  end
20
23
 
21
24
  protected
22
25
 
26
+ # Specifies that the sinks should be truncated before loading
27
+ # data.
28
+ def truncate_pre_load
29
+ @pre_execution_strategies << lambda {|stage, etl_batch, reextract|
30
+ stage.sinks.each {|sink| sink.truncate }
31
+ }
32
+ end
33
+
34
+ # Specifies that the dataset should never be filtered to the ETL
35
+ # batch - i.e. it should behave as if reextract was always true
36
+ def full_reload
37
+ @filter_strategy = lambda {|dataset, etl_batch| dataset }
38
+ end
39
+
40
+ # Mark this stage as executable or non-executable.
41
+ def executable(value=true)
42
+ @executable = value
43
+ end
44
+
23
45
  def source(&block)
24
46
  @dataset = DatasetBuilder.new(@db).build(&block)
25
47
  end
@@ -32,12 +54,18 @@ module Chicago
32
54
  @sinks = SinkBuilder.new.build(&block)
33
55
  end
34
56
 
35
- # TODO: think of potentially better ways of dealig with this
57
+ # TODO: think of potentially better ways of dealing with this
36
58
  # problem.
37
59
  def filter_strategy(&block)
38
60
  @filter_strategy = block
39
61
  end
40
62
 
63
+ # @api private
64
+ def set_default_stage_values
65
+ @sinks ||= sinks {}
66
+ @transformations ||= transformations {}
67
+ end
68
+
41
69
  class TransformationBuilder
42
70
  def build(&block)
43
71
  @transformations = []
@@ -0,0 +1,51 @@
1
+ module Chicago
2
+ module ETL
3
+ # A namespaced name for an ETL stage.
4
+ #
5
+ # @api private
6
+ class StageName
7
+ def initialize(*names)
8
+ if names.size == 1 && names.first.kind_of?(String)
9
+ @names = names.first.split(".").map(&:to_sym).freeze
10
+ else
11
+ @names = names.flatten.map(&:to_sym).freeze
12
+ end
13
+ end
14
+
15
+ def name
16
+ @names.last
17
+ end
18
+
19
+ def match?(*pattern)
20
+ pattern.flatten!
21
+ return false if pattern.size > @names.size
22
+
23
+ pattern.each_with_index.all? do |part, i|
24
+ part == :* || @names[i] == part
25
+ end
26
+ end
27
+ alias :=~ :match?
28
+
29
+ def namespace
30
+ @names[0...(@names.size - 1)]
31
+ end
32
+
33
+ def eql?(other)
34
+ to_s == other.to_s
35
+ end
36
+ alias :== :eql?
37
+
38
+ def hash
39
+ to_s.hash
40
+ end
41
+
42
+ def to_a
43
+ @names.dup
44
+ end
45
+
46
+ def to_s
47
+ @string_representation ||= @names.join('.')
48
+ end
49
+ end
50
+ end
51
+ end
data/lib/chicago/etl.rb CHANGED
@@ -19,15 +19,16 @@ require 'chicago/etl/null_sink'
19
19
  require 'chicago/etl/mysql'
20
20
 
21
21
  require 'chicago/etl/core_extensions'
22
+ require 'chicago/etl/stage_name'
22
23
  require 'chicago/etl/counter'
23
24
  require 'chicago/etl/key_builder'
24
25
  require 'chicago/etl/schema_table_sink_factory'
26
+ require 'chicago/etl/schema_table_stage_builder'
25
27
  require 'chicago/etl/transformations'
26
28
  require 'chicago/etl/load_dataset_builder'
27
29
  require 'chicago/etl/dataset_builder'
28
30
  require 'chicago/etl/stage'
29
31
  require 'chicago/etl/stage_builder'
30
- require 'chicago/etl/dataset_batch_stage'
31
32
  require 'chicago/etl/schema_sinks_and_transformations_builder'
32
33
  require 'chicago/etl/pipeline'
33
34
 
@@ -60,9 +61,13 @@ module Chicago
60
61
  # within a batch.
61
62
  def self.execute(stage, etl_batch, reextract, logger)
62
63
  etl_batch.perform_task(:load, stage.name) do
63
- logger.debug "Starting loading #{stage.name}"
64
- stage.execute(etl_batch, reextract)
65
- logger.debug "Finished loading #{stage.name}"
64
+ if stage.executable?
65
+ logger.debug "Starting loading #{stage.name}"
66
+ stage.execute(etl_batch, reextract)
67
+ logger.info "Finished loading #{stage.name}"
68
+ else
69
+ logger.info "Skipping stage #{stage.name}"
70
+ end
66
71
  end
67
72
  end
68
73
  end
@@ -18,8 +18,8 @@ describe "creating and running a dimension stage" do
18
18
  let(:pipeline) { Chicago::ETL::Pipeline.new(db, schema)}
19
19
 
20
20
  it "glues the source, transformations, and sink correctly" do
21
- pipeline.define_dimension_load(:test) do
22
- dataset do
21
+ pipeline.define_stage(:load, :dimensions, :test) do
22
+ source do
23
23
  db.test_dataset_method
24
24
  end
25
25
  end
@@ -0,0 +1,27 @@
1
+ require 'spec_helper'
2
+
3
+ describe "Chicago::ETL Execution method" do
4
+ class StubBatch
5
+ def perform_task(*args)
6
+ yield
7
+ end
8
+ end
9
+
10
+ let(:logger) { mock(:logger).as_null_object }
11
+ let(:batch) { StubBatch.new }
12
+
13
+ it "only logs skipping the stage if the stage is not executable" do
14
+ stage = stub(:stage, :executable? => false, :name => "test")
15
+ stage.should_not_receive(:execute)
16
+ logger.should_receive(:info).with("Skipping stage test")
17
+
18
+ Chicago::ETL.execute(stage, batch, false, logger)
19
+ end
20
+
21
+ it "executes the stage" do
22
+ stage = stub(:stage, :executable? => true, :name => "test")
23
+ stage.should_receive(:execute).with(batch, false)
24
+
25
+ Chicago::ETL.execute(stage, batch, false, logger)
26
+ end
27
+ end
@@ -0,0 +1,55 @@
1
+ require 'spec_helper'
2
+ require 'set'
3
+
4
+ describe Chicago::ETL::StageName do
5
+ it "can be consturcted with variable args" do
6
+ described_class.new(:a, :b).to_a.should == [:a, :b]
7
+ end
8
+
9
+ it "can be constructed with an array of symbols" do
10
+ described_class.new([:a, :b]).to_a.should == [:a, :b]
11
+ end
12
+
13
+ it "can be constructed with a dot seaprated string" do
14
+ described_class.new("foo.bar").to_a.should == [:foo, :bar]
15
+ end
16
+
17
+ it "has a name" do
18
+ described_class.new("foo.bar.baz").name.should == :baz
19
+ end
20
+
21
+ it "has a namespace" do
22
+ described_class.new("foo.bar.baz").namespace.should == [:foo, :bar]
23
+ end
24
+
25
+ it "supports equality" do
26
+ described_class.new(:a, :b).should == described_class.new(:a, :b)
27
+ set = Set.new
28
+ set << described_class.new(:a, :b)
29
+ set.should include(described_class.new(:a, :b))
30
+ end
31
+
32
+ it "has a dotted string representation" do
33
+ described_class.new(:a, :b).to_s.should == "a.b"
34
+ end
35
+
36
+ it "matches an exact pattern" do
37
+ described_class.new(:a, :b).match?(:a, :b).should be_true
38
+ described_class.new(:a, :b).match?(:a, :c).should be_false
39
+ end
40
+
41
+ it "matches a left-anchored partial pattern" do
42
+ described_class.new(:a, :b).match?(:a).should be_true
43
+ described_class.new(:a, :b).match?(:b).should be_false
44
+ end
45
+
46
+ it "allows wildcards matching" do
47
+ described_class.new(:a, :b).match?(:*, :b).should be_true
48
+ described_class.new(:a, :b).match?(:*, :*).should be_true
49
+ described_class.new(:a, :b).match?(:*, :*, :*).should be_false
50
+ end
51
+
52
+ it "can use the =~ operator" do
53
+ (described_class.new(:a, :b) =~ [:*, :b]).should be_true
54
+ end
55
+ end
metadata CHANGED
@@ -1,233 +1,248 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: chicago-etl
3
- version: !ruby/object:Gem::Version
4
- hash: 21
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.2
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 2
9
- - 1
10
- version: 0.2.1
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Roland Swingler
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2013-11-19 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
21
- version_requirements: &id001 !ruby/object:Gem::Requirement
12
+ date: 2013-11-26 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: chicagowarehouse
16
+ requirement: !ruby/object:Gem::Requirement
22
17
  none: false
23
- requirements:
18
+ requirements:
24
19
  - - ~>
25
- - !ruby/object:Gem::Version
26
- hash: 3
27
- segments:
28
- - 0
29
- - 4
30
- version: "0.4"
31
- requirement: *id001
20
+ - !ruby/object:Gem::Version
21
+ version: '0.4'
32
22
  type: :runtime
33
23
  prerelease: false
34
- name: chicagowarehouse
35
- - !ruby/object:Gem::Dependency
36
- version_requirements: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: !ruby/object:Gem::Requirement
37
25
  none: false
38
- requirements:
39
- - - ">="
40
- - !ruby/object:Gem::Version
41
- hash: 3
42
- segments:
43
- - 0
44
- version: "0"
45
- requirement: *id002
46
- type: :runtime
47
- prerelease: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '0.4'
30
+ - !ruby/object:Gem::Dependency
48
31
  name: fastercsv
49
- - !ruby/object:Gem::Dependency
50
- version_requirements: &id003 !ruby/object:Gem::Requirement
32
+ requirement: !ruby/object:Gem::Requirement
51
33
  none: false
52
- requirements:
53
- - - ">="
54
- - !ruby/object:Gem::Version
55
- hash: 3
56
- segments:
57
- - 0
58
- version: "0"
59
- requirement: *id003
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
60
38
  type: :runtime
61
39
  prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
62
47
  name: sequel
63
- - !ruby/object:Gem::Dependency
64
- version_requirements: &id004 !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
65
49
  none: false
66
- requirements:
67
- - - ">="
68
- - !ruby/object:Gem::Version
69
- hash: 27
70
- segments:
71
- - 0
72
- - 0
73
- - 2
74
- version: 0.0.2
75
- requirement: *id004
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
76
54
  type: :runtime
77
55
  prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
78
63
  name: sequel_load_data_infile
79
- - !ruby/object:Gem::Dependency
80
- version_requirements: &id005 !ruby/object:Gem::Requirement
64
+ requirement: !ruby/object:Gem::Requirement
81
65
  none: false
82
- requirements:
83
- - - ">="
84
- - !ruby/object:Gem::Version
85
- hash: 3
86
- segments:
87
- - 0
88
- version: "0"
89
- requirement: *id005
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: 0.0.2
90
70
  type: :runtime
91
71
  prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: 0.0.2
78
+ - !ruby/object:Gem::Dependency
92
79
  name: sequel_fast_columns
93
- - !ruby/object:Gem::Dependency
94
- version_requirements: &id006 !ruby/object:Gem::Requirement
80
+ requirement: !ruby/object:Gem::Requirement
95
81
  none: false
96
- requirements:
97
- - - ~>
98
- - !ruby/object:Gem::Version
99
- hash: 7
100
- segments:
101
- - 2
102
- version: "2"
103
- requirement: *id006
104
- type: :development
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :runtime
105
87
  prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
106
95
  name: rspec
107
- - !ruby/object:Gem::Dependency
108
- version_requirements: &id007 !ruby/object:Gem::Requirement
96
+ requirement: !ruby/object:Gem::Requirement
109
97
  none: false
110
- requirements:
111
- - - ">="
112
- - !ruby/object:Gem::Version
113
- hash: 3
114
- segments:
115
- - 0
116
- version: "0"
117
- requirement: *id007
98
+ requirements:
99
+ - - ~>
100
+ - !ruby/object:Gem::Version
101
+ version: '2'
118
102
  type: :development
119
103
  prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ~>
108
+ - !ruby/object:Gem::Version
109
+ version: '2'
110
+ - !ruby/object:Gem::Dependency
120
111
  name: timecop
121
- - !ruby/object:Gem::Dependency
122
- version_requirements: &id008 !ruby/object:Gem::Requirement
112
+ requirement: !ruby/object:Gem::Requirement
123
113
  none: false
124
- requirements:
125
- - - ">="
126
- - !ruby/object:Gem::Version
127
- hash: 3
128
- segments:
129
- - 0
130
- version: "0"
131
- requirement: *id008
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
132
118
  type: :development
133
119
  prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ - !ruby/object:Gem::Dependency
134
127
  name: yard
135
- - !ruby/object:Gem::Dependency
136
- version_requirements: &id009 !ruby/object:Gem::Requirement
128
+ requirement: !ruby/object:Gem::Requirement
137
129
  none: false
138
- requirements:
139
- - - ">="
140
- - !ruby/object:Gem::Version
141
- hash: 3
142
- segments:
143
- - 0
144
- version: "0"
145
- requirement: *id009
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
146
134
  type: :development
147
135
  prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ - !ruby/object:Gem::Dependency
148
143
  name: flog
149
- - !ruby/object:Gem::Dependency
150
- version_requirements: &id010 !ruby/object:Gem::Requirement
144
+ requirement: !ruby/object:Gem::Requirement
151
145
  none: false
152
- requirements:
153
- - - ">="
154
- - !ruby/object:Gem::Version
155
- hash: 3
156
- segments:
157
- - 0
158
- version: "0"
159
- requirement: *id010
146
+ requirements:
147
+ - - ! '>='
148
+ - !ruby/object:Gem::Version
149
+ version: '0'
160
150
  type: :development
161
151
  prerelease: false
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
154
+ requirements:
155
+ - - ! '>='
156
+ - !ruby/object:Gem::Version
157
+ version: '0'
158
+ - !ruby/object:Gem::Dependency
162
159
  name: simplecov
163
- - !ruby/object:Gem::Dependency
164
- version_requirements: &id011 !ruby/object:Gem::Requirement
160
+ requirement: !ruby/object:Gem::Requirement
165
161
  none: false
166
- requirements:
167
- - - ">="
168
- - !ruby/object:Gem::Version
169
- hash: 3
170
- segments:
171
- - 0
172
- version: "0"
173
- requirement: *id011
162
+ requirements:
163
+ - - ! '>='
164
+ - !ruby/object:Gem::Version
165
+ version: '0'
174
166
  type: :development
175
167
  prerelease: false
168
+ version_requirements: !ruby/object:Gem::Requirement
169
+ none: false
170
+ requirements:
171
+ - - ! '>='
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ - !ruby/object:Gem::Dependency
176
175
  name: ZenTest
177
- - !ruby/object:Gem::Dependency
178
- version_requirements: &id012 !ruby/object:Gem::Requirement
176
+ requirement: !ruby/object:Gem::Requirement
179
177
  none: false
180
- requirements:
181
- - - "="
182
- - !ruby/object:Gem::Version
183
- hash: 45
184
- segments:
185
- - 2
186
- - 8
187
- - 1
188
- version: 2.8.1
189
- requirement: *id012
178
+ requirements:
179
+ - - ! '>='
180
+ - !ruby/object:Gem::Version
181
+ version: '0'
190
182
  type: :development
191
183
  prerelease: false
184
+ version_requirements: !ruby/object:Gem::Requirement
185
+ none: false
186
+ requirements:
187
+ - - ! '>='
188
+ - !ruby/object:Gem::Version
189
+ version: '0'
190
+ - !ruby/object:Gem::Dependency
192
191
  name: mysql
193
- - !ruby/object:Gem::Dependency
194
- version_requirements: &id013 !ruby/object:Gem::Requirement
192
+ requirement: !ruby/object:Gem::Requirement
195
193
  none: false
196
- requirements:
197
- - - ~>
198
- - !ruby/object:Gem::Version
199
- hash: 1
200
- segments:
201
- - 1
202
- version: "1"
203
- requirement: *id013
194
+ requirements:
195
+ - - '='
196
+ - !ruby/object:Gem::Version
197
+ version: 2.8.1
204
198
  type: :development
205
199
  prerelease: false
200
+ version_requirements: !ruby/object:Gem::Requirement
201
+ none: false
202
+ requirements:
203
+ - - '='
204
+ - !ruby/object:Gem::Version
205
+ version: 2.8.1
206
+ - !ruby/object:Gem::Dependency
206
207
  name: bundler
207
- - !ruby/object:Gem::Dependency
208
- version_requirements: &id014 !ruby/object:Gem::Requirement
208
+ requirement: !ruby/object:Gem::Requirement
209
209
  none: false
210
- requirements:
211
- - - ">="
212
- - !ruby/object:Gem::Version
213
- hash: 3
214
- segments:
215
- - 0
216
- version: "0"
217
- requirement: *id014
210
+ requirements:
211
+ - - ~>
212
+ - !ruby/object:Gem::Version
213
+ version: '1'
218
214
  type: :development
219
215
  prerelease: false
216
+ version_requirements: !ruby/object:Gem::Requirement
217
+ none: false
218
+ requirements:
219
+ - - ~>
220
+ - !ruby/object:Gem::Version
221
+ version: '1'
222
+ - !ruby/object:Gem::Dependency
220
223
  name: jeweler
224
+ requirement: !ruby/object:Gem::Requirement
225
+ none: false
226
+ requirements:
227
+ - - ! '>='
228
+ - !ruby/object:Gem::Version
229
+ version: '0'
230
+ type: :development
231
+ prerelease: false
232
+ version_requirements: !ruby/object:Gem::Requirement
233
+ none: false
234
+ requirements:
235
+ - - ! '>='
236
+ - !ruby/object:Gem::Version
237
+ version: '0'
221
238
  description: ETL tools for Chicago
222
239
  email: roland.swingler@gmail.com
223
240
  executables: []
224
-
225
241
  extensions: []
226
-
227
- extra_rdoc_files:
242
+ extra_rdoc_files:
228
243
  - LICENSE.txt
229
244
  - README.rdoc
230
- files:
245
+ files:
231
246
  - .document
232
247
  - .rspec
233
248
  - Gemfile
@@ -244,7 +259,6 @@ files:
244
259
  - lib/chicago/etl/batch.rb
245
260
  - lib/chicago/etl/core_extensions.rb
246
261
  - lib/chicago/etl/counter.rb
247
- - lib/chicago/etl/dataset_batch_stage.rb
248
262
  - lib/chicago/etl/dataset_builder.rb
249
263
  - lib/chicago/etl/dataset_source.rb
250
264
  - lib/chicago/etl/errors.rb
@@ -259,6 +273,7 @@ files:
259
273
  - lib/chicago/etl/pipeline_endpoint.rb
260
274
  - lib/chicago/etl/schema_sinks_and_transformations_builder.rb
261
275
  - lib/chicago/etl/schema_table_sink_factory.rb
276
+ - lib/chicago/etl/schema_table_stage_builder.rb
262
277
  - lib/chicago/etl/screens/column_screen.rb
263
278
  - lib/chicago/etl/screens/invalid_element.rb
264
279
  - lib/chicago/etl/screens/missing_value.rb
@@ -268,6 +283,7 @@ files:
268
283
  - lib/chicago/etl/sink.rb
269
284
  - lib/chicago/etl/stage.rb
270
285
  - lib/chicago/etl/stage_builder.rb
286
+ - lib/chicago/etl/stage_name.rb
271
287
  - lib/chicago/etl/table_builder.rb
272
288
  - lib/chicago/etl/task_invocation.rb
273
289
  - lib/chicago/etl/tasks.rb
@@ -287,6 +303,7 @@ files:
287
303
  - spec/etl/define_dimension_stage_spec.rb
288
304
  - spec/etl/define_stage_spec.rb
289
305
  - spec/etl/etl_batch_id_dataset_filter.rb
306
+ - spec/etl/execution_wrapper_spec.rb
290
307
  - spec/etl/filter_spec.rb
291
308
  - spec/etl/key_builder_spec.rb
292
309
  - spec/etl/load_dataset_builder_spec.rb
@@ -300,6 +317,7 @@ files:
300
317
  - spec/etl/screens/out_of_bounds_spec.rb
301
318
  - spec/etl/sequel/dependant_tables_spec.rb
302
319
  - spec/etl/sequel/filter_to_etl_batch_spec.rb
320
+ - spec/etl/stage_name_spec.rb
303
321
  - spec/etl/stage_spec.rb
304
322
  - spec/etl/table_builder_spec.rb
305
323
  - spec/etl/task_spec.rb
@@ -311,37 +329,31 @@ files:
311
329
  - spec/etl/transformations_spec.rb
312
330
  - spec/spec_helper.rb
313
331
  homepage: http://github.com/notonthehighstreet/chicago-etl
314
- licenses:
332
+ licenses:
315
333
  - MIT
316
334
  post_install_message:
317
335
  rdoc_options: []
318
-
319
- require_paths:
336
+ require_paths:
320
337
  - lib
321
- required_ruby_version: !ruby/object:Gem::Requirement
338
+ required_ruby_version: !ruby/object:Gem::Requirement
322
339
  none: false
323
- requirements:
324
- - - ">="
325
- - !ruby/object:Gem::Version
326
- hash: 3
327
- segments:
340
+ requirements:
341
+ - - ! '>='
342
+ - !ruby/object:Gem::Version
343
+ version: '0'
344
+ segments:
328
345
  - 0
329
- version: "0"
330
- required_rubygems_version: !ruby/object:Gem::Requirement
346
+ hash: -2294158408277347233
347
+ required_rubygems_version: !ruby/object:Gem::Requirement
331
348
  none: false
332
- requirements:
333
- - - ">="
334
- - !ruby/object:Gem::Version
335
- hash: 3
336
- segments:
337
- - 0
338
- version: "0"
349
+ requirements:
350
+ - - ! '>='
351
+ - !ruby/object:Gem::Version
352
+ version: '0'
339
353
  requirements: []
340
-
341
354
  rubyforge_project:
342
355
  rubygems_version: 1.8.25
343
356
  signing_key:
344
357
  specification_version: 3
345
358
  summary: Chicago ETL
346
359
  test_files: []
347
-
@@ -1,19 +0,0 @@
1
- module Chicago
2
- module ETL
3
- # Allows deferring constructing a DatasetSource until extract
4
- # time, so that it can be filtered to an ETL batch appropriately.
5
- class DatasetBatchStage < Stage
6
- # Executes this ETL stage.
7
- #
8
- # Configures the dataset and flows rows into the pipeline.
9
- def execute(etl_batch, reextract=false)
10
- if reextract && sink(:error) && !truncate_pre_load?
11
- sink(:error).truncate
12
- end
13
-
14
- sink(:default).set_constant_values(:_inserted_at => Time.now)
15
- super
16
- end
17
- end
18
- end
19
- end