chicago-etl 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/chicago-etl.gemspec +6 -3
- data/lib/chicago/etl/pipeline.rb +20 -92
- data/lib/chicago/etl/schema_table_stage_builder.rb +48 -0
- data/lib/chicago/etl/stage.rb +21 -8
- data/lib/chicago/etl/stage_builder.rb +33 -5
- data/lib/chicago/etl/stage_name.rb +51 -0
- data/lib/chicago/etl.rb +9 -4
- data/spec/etl/define_dimension_stage_spec.rb +2 -2
- data/spec/etl/execution_wrapper_spec.rb +27 -0
- data/spec/etl/stage_name_spec.rb +55 -0
- metadata +197 -185
- data/lib/chicago/etl/dataset_batch_stage.rb +0 -19
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.2.
|
|
1
|
+
0.2.2
|
data/chicago-etl.gemspec
CHANGED
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |s|
|
|
7
7
|
s.name = "chicago-etl"
|
|
8
|
-
s.version = "0.2.
|
|
8
|
+
s.version = "0.2.2"
|
|
9
9
|
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
11
|
s.authors = ["Roland Swingler"]
|
|
12
|
-
s.date = "2013-11-
|
|
12
|
+
s.date = "2013-11-26"
|
|
13
13
|
s.description = "ETL tools for Chicago"
|
|
14
14
|
s.email = "roland.swingler@gmail.com"
|
|
15
15
|
s.extra_rdoc_files = [
|
|
@@ -33,7 +33,6 @@ Gem::Specification.new do |s|
|
|
|
33
33
|
"lib/chicago/etl/batch.rb",
|
|
34
34
|
"lib/chicago/etl/core_extensions.rb",
|
|
35
35
|
"lib/chicago/etl/counter.rb",
|
|
36
|
-
"lib/chicago/etl/dataset_batch_stage.rb",
|
|
37
36
|
"lib/chicago/etl/dataset_builder.rb",
|
|
38
37
|
"lib/chicago/etl/dataset_source.rb",
|
|
39
38
|
"lib/chicago/etl/errors.rb",
|
|
@@ -48,6 +47,7 @@ Gem::Specification.new do |s|
|
|
|
48
47
|
"lib/chicago/etl/pipeline_endpoint.rb",
|
|
49
48
|
"lib/chicago/etl/schema_sinks_and_transformations_builder.rb",
|
|
50
49
|
"lib/chicago/etl/schema_table_sink_factory.rb",
|
|
50
|
+
"lib/chicago/etl/schema_table_stage_builder.rb",
|
|
51
51
|
"lib/chicago/etl/screens/column_screen.rb",
|
|
52
52
|
"lib/chicago/etl/screens/invalid_element.rb",
|
|
53
53
|
"lib/chicago/etl/screens/missing_value.rb",
|
|
@@ -57,6 +57,7 @@ Gem::Specification.new do |s|
|
|
|
57
57
|
"lib/chicago/etl/sink.rb",
|
|
58
58
|
"lib/chicago/etl/stage.rb",
|
|
59
59
|
"lib/chicago/etl/stage_builder.rb",
|
|
60
|
+
"lib/chicago/etl/stage_name.rb",
|
|
60
61
|
"lib/chicago/etl/table_builder.rb",
|
|
61
62
|
"lib/chicago/etl/task_invocation.rb",
|
|
62
63
|
"lib/chicago/etl/tasks.rb",
|
|
@@ -76,6 +77,7 @@ Gem::Specification.new do |s|
|
|
|
76
77
|
"spec/etl/define_dimension_stage_spec.rb",
|
|
77
78
|
"spec/etl/define_stage_spec.rb",
|
|
78
79
|
"spec/etl/etl_batch_id_dataset_filter.rb",
|
|
80
|
+
"spec/etl/execution_wrapper_spec.rb",
|
|
79
81
|
"spec/etl/filter_spec.rb",
|
|
80
82
|
"spec/etl/key_builder_spec.rb",
|
|
81
83
|
"spec/etl/load_dataset_builder_spec.rb",
|
|
@@ -89,6 +91,7 @@ Gem::Specification.new do |s|
|
|
|
89
91
|
"spec/etl/screens/out_of_bounds_spec.rb",
|
|
90
92
|
"spec/etl/sequel/dependant_tables_spec.rb",
|
|
91
93
|
"spec/etl/sequel/filter_to_etl_batch_spec.rb",
|
|
94
|
+
"spec/etl/stage_name_spec.rb",
|
|
92
95
|
"spec/etl/stage_spec.rb",
|
|
93
96
|
"spec/etl/table_builder_spec.rb",
|
|
94
97
|
"spec/etl/task_spec.rb",
|
data/lib/chicago/etl/pipeline.rb
CHANGED
|
@@ -2,116 +2,44 @@ module Chicago
|
|
|
2
2
|
module ETL
|
|
3
3
|
# An ETL pipeline.
|
|
4
4
|
class Pipeline
|
|
5
|
-
# Returns all defined
|
|
6
|
-
attr_reader :load_dimensions
|
|
7
|
-
|
|
8
|
-
# Returns all defined fact load tasks
|
|
9
|
-
attr_reader :load_facts
|
|
10
|
-
|
|
11
|
-
# Returns all the defined generic stages.
|
|
5
|
+
# Returns all the defined stages.
|
|
12
6
|
attr_reader :stages
|
|
13
7
|
|
|
14
8
|
# Creates a pipeline for a Schema.
|
|
15
9
|
def initialize(db, schema)
|
|
16
10
|
@schema, @db = schema, db
|
|
17
|
-
@load_dimensions = Chicago::Schema::NamedElementCollection.new
|
|
18
|
-
@load_facts = Chicago::Schema::NamedElementCollection.new
|
|
19
11
|
@stages = Chicago::Schema::NamedElementCollection.new
|
|
20
12
|
end
|
|
21
13
|
|
|
22
14
|
# Defines a generic stage in the pipeline.
|
|
23
|
-
def define_stage(
|
|
24
|
-
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
def build_schemaless_stage(name, &block)
|
|
28
|
-
StageBuilder.new(@db).build(name, &block)
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
# Defines a dimension load stage
|
|
32
|
-
def define_dimension_load(name, options={}, &block)
|
|
33
|
-
dimension_name = options[:dimension] || name
|
|
34
|
-
@load_dimensions << build_stage(name,
|
|
35
|
-
@schema.dimension(dimension_name),
|
|
36
|
-
&block)
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
# Defines a fact load stage
|
|
40
|
-
def define_fact_load(name, options={}, &block)
|
|
41
|
-
fact_name = options[:fact] || name
|
|
42
|
-
@load_facts << build_stage(name, @schema.fact(fact_name), &block)
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
# Builds a stage, but does not define it.
|
|
46
|
-
def build_stage(name, schema_table, &block)
|
|
47
|
-
DatasetBatchStageBuilder.new(@db, schema_table).build(name, &block)
|
|
48
|
-
end
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
# Provides DSL methods for building a DataSetBatchStage.
|
|
52
|
-
#
|
|
53
|
-
# Clients shouldn't need to instantiate this directly, but instead
|
|
54
|
-
# call the protected methods in the context of defining a Pipeline
|
|
55
|
-
class DatasetBatchStageBuilder
|
|
56
|
-
# @api private
|
|
57
|
-
def initialize(db, schema_table)
|
|
58
|
-
@db, @schema_table = db, schema_table
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
# @api private
|
|
62
|
-
def build(name, &block)
|
|
63
|
-
instance_eval &block
|
|
64
|
-
unless defined? @sinks_and_transformations
|
|
65
|
-
pipeline do
|
|
66
|
-
end
|
|
67
|
-
end
|
|
15
|
+
def define_stage(*args, &block)
|
|
16
|
+
options = args.last.kind_of?(Hash) ? args.pop : {}
|
|
68
17
|
|
|
69
|
-
|
|
70
|
-
dataset.filter_to_etl_batch(etl_batch)
|
|
71
|
-
}
|
|
18
|
+
name = StageName.new(args)
|
|
72
19
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
protected
|
|
82
|
-
|
|
83
|
-
# Specifies that the sinks should be truncated before loading
|
|
84
|
-
# data.
|
|
85
|
-
def truncate_pre_load
|
|
86
|
-
@truncate_pre_load = true
|
|
20
|
+
if name =~ [:load, :dimensions]
|
|
21
|
+
@stages << build_dimension_load_stage(name, options, &block)
|
|
22
|
+
elsif name =~ [:load, :facts]
|
|
23
|
+
@stages << build_fact_load_stage(name, options, &block)
|
|
24
|
+
else
|
|
25
|
+
@stages << StageBuilder.new(@db).build(name, &block)
|
|
26
|
+
end
|
|
87
27
|
end
|
|
88
28
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
def full_reload
|
|
92
|
-
@filter_strategy = lambda {|dataset, etl_batch| dataset }
|
|
29
|
+
def build_stage(name, schema_table, &block)
|
|
30
|
+
SchemaTableStageBuilder.new(@db, schema_table).build(name, &block)
|
|
93
31
|
end
|
|
94
32
|
|
|
95
|
-
|
|
96
|
-
# for details.
|
|
97
|
-
# TODO: rename pipeline => transforms below this method
|
|
98
|
-
def pipeline(&block)
|
|
99
|
-
@sinks_and_transformations = SchemaSinksAndTransformationsBuilder.new(@db, @schema_table).
|
|
100
|
-
build(&block)
|
|
101
|
-
end
|
|
33
|
+
private
|
|
102
34
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
# TODO: rename dataset => source below this method, make generic
|
|
107
|
-
def source(&block)
|
|
108
|
-
@dataset = DatasetBuilder.new(@db).build(&block)
|
|
35
|
+
def build_dimension_load_stage(name, options, &block)
|
|
36
|
+
dimension_name = options[:dimension] || name.name
|
|
37
|
+
build_stage(name, @schema.dimension(dimension_name), &block)
|
|
109
38
|
end
|
|
110
|
-
alias :dataset :source
|
|
111
39
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
@
|
|
40
|
+
def build_fact_load_stage(name, options, &block)
|
|
41
|
+
fact_name = options[:fact] || name.name
|
|
42
|
+
build_stage(name, @schema.fact(fact_name), &block)
|
|
115
43
|
end
|
|
116
44
|
end
|
|
117
45
|
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
require 'chicago/etl/stage_builder'
|
|
2
|
+
|
|
3
|
+
module Chicago
|
|
4
|
+
module ETL
|
|
5
|
+
# Provides DSL methods for building a DataSetBatchStage.
|
|
6
|
+
#
|
|
7
|
+
# Clients shouldn't need to instantiate this directly, but instead
|
|
8
|
+
# call the protected methods in the context of defining a Pipeline
|
|
9
|
+
class SchemaTableStageBuilder < StageBuilder
|
|
10
|
+
# @api private
|
|
11
|
+
def initialize(db, schema_table)
|
|
12
|
+
super(db)
|
|
13
|
+
@wrapped_builder = SchemaSinksAndTransformationsBuilder.
|
|
14
|
+
new(@db, schema_table)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
protected
|
|
18
|
+
|
|
19
|
+
# Define elements of the pipeline. See LoadPipelineStageBuilder
|
|
20
|
+
# for details.
|
|
21
|
+
#
|
|
22
|
+
# @deprecated
|
|
23
|
+
def pipeline(&block)
|
|
24
|
+
sinks_and_transformations = @wrapped_builder.build(&block)
|
|
25
|
+
@sinks = sinks_and_transformations[:sinks]
|
|
26
|
+
@transformations = sinks_and_transformations[:transformations] || []
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# @api private
|
|
30
|
+
def set_default_stage_values
|
|
31
|
+
unless defined? @sinks
|
|
32
|
+
pipeline do
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
@pre_execution_strategies << lambda {|stage, etl_batch, reextract|
|
|
37
|
+
stage.sink(:error).truncate if reextract && stage.sink(:error)
|
|
38
|
+
stage.sink(:default).
|
|
39
|
+
set_constant_values(:_inserted_at => Time.now)
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
@filter_strategy ||= lambda {|dataset, etl_batch|
|
|
43
|
+
dataset.filter_to_etl_batch(etl_batch)
|
|
44
|
+
}
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
data/lib/chicago/etl/stage.rb
CHANGED
|
@@ -16,20 +16,26 @@ module Chicago
|
|
|
16
16
|
@source = options[:source]
|
|
17
17
|
@sinks = options[:sinks]
|
|
18
18
|
@transformations = options[:transformations] || []
|
|
19
|
-
@filter_strategy = options[:filter_strategy] ||
|
|
20
|
-
|
|
21
|
-
@
|
|
19
|
+
@filter_strategy = options[:filter_strategy] || lambda {|s, _| s }
|
|
20
|
+
@pre_execution_strategies = options[:pre_execution_strategies] || []
|
|
21
|
+
@executable = options.has_key?(:executable) ? options[:executable] : true
|
|
22
22
|
|
|
23
23
|
validate_arguments
|
|
24
24
|
end
|
|
25
25
|
|
|
26
|
-
# Returns
|
|
27
|
-
def
|
|
28
|
-
|
|
26
|
+
# Returns the unqualified name of this stage.
|
|
27
|
+
def task_name
|
|
28
|
+
name.name
|
|
29
29
|
end
|
|
30
|
-
|
|
30
|
+
|
|
31
|
+
# Returns true if this stage should be executed.
|
|
32
|
+
def executable?
|
|
33
|
+
@executable
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Executes this stage in the context of an ETL::Batch
|
|
31
37
|
def execute(etl_batch, reextract=false)
|
|
32
|
-
|
|
38
|
+
prepare_stage(etl_batch, reextract)
|
|
33
39
|
transform_and_load filtered_source(etl_batch, reextract)
|
|
34
40
|
end
|
|
35
41
|
|
|
@@ -42,6 +48,7 @@ module Chicago
|
|
|
42
48
|
@sinks.values
|
|
43
49
|
end
|
|
44
50
|
|
|
51
|
+
# @api private
|
|
45
52
|
def filtered_source(etl_batch, reextract=false)
|
|
46
53
|
filtered_dataset = reextract ? source :
|
|
47
54
|
@filter_strategy.call(source, etl_batch)
|
|
@@ -51,6 +58,12 @@ module Chicago
|
|
|
51
58
|
|
|
52
59
|
private
|
|
53
60
|
|
|
61
|
+
def prepare_stage(etl_batch, reextract)
|
|
62
|
+
@pre_execution_strategies.each do |strategy|
|
|
63
|
+
strategy.call(self, etl_batch, reextract)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
54
67
|
def transform_and_load(source)
|
|
55
68
|
sinks.each(&:open)
|
|
56
69
|
pipe_rows_to_sinks_from(source)
|
|
@@ -6,20 +6,42 @@ module Chicago
|
|
|
6
6
|
end
|
|
7
7
|
|
|
8
8
|
def build(name, &block)
|
|
9
|
-
@
|
|
10
|
-
@
|
|
9
|
+
@pre_execution_strategies = []
|
|
10
|
+
@executable = true
|
|
11
11
|
|
|
12
12
|
instance_eval &block
|
|
13
|
-
|
|
13
|
+
set_default_stage_values
|
|
14
|
+
|
|
14
15
|
Stage.new(name,
|
|
15
16
|
:source => @dataset,
|
|
16
17
|
:sinks => @sinks,
|
|
17
18
|
:transformations => @transformations,
|
|
18
|
-
:filter_strategy => @filter_strategy
|
|
19
|
+
:filter_strategy => @filter_strategy,
|
|
20
|
+
:pre_execution_strategies => @pre_execution_strategies,
|
|
21
|
+
:executable => @executable)
|
|
19
22
|
end
|
|
20
23
|
|
|
21
24
|
protected
|
|
22
25
|
|
|
26
|
+
# Specifies that the sinks should be truncated before loading
|
|
27
|
+
# data.
|
|
28
|
+
def truncate_pre_load
|
|
29
|
+
@pre_execution_strategies << lambda {|stage, etl_batch, reextract|
|
|
30
|
+
stage.sinks.each {|sink| sink.truncate }
|
|
31
|
+
}
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Specifies that the dataset should never be filtered to the ETL
|
|
35
|
+
# batch - i.e. it should behave as if reextract was always true
|
|
36
|
+
def full_reload
|
|
37
|
+
@filter_strategy = lambda {|dataset, etl_batch| dataset }
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Mark this stage as executable or non-executable.
|
|
41
|
+
def executable(value=true)
|
|
42
|
+
@executable = value
|
|
43
|
+
end
|
|
44
|
+
|
|
23
45
|
def source(&block)
|
|
24
46
|
@dataset = DatasetBuilder.new(@db).build(&block)
|
|
25
47
|
end
|
|
@@ -32,12 +54,18 @@ module Chicago
|
|
|
32
54
|
@sinks = SinkBuilder.new.build(&block)
|
|
33
55
|
end
|
|
34
56
|
|
|
35
|
-
# TODO: think of potentially better ways of
|
|
57
|
+
# TODO: think of potentially better ways of dealing with this
|
|
36
58
|
# problem.
|
|
37
59
|
def filter_strategy(&block)
|
|
38
60
|
@filter_strategy = block
|
|
39
61
|
end
|
|
40
62
|
|
|
63
|
+
# @api private
|
|
64
|
+
def set_default_stage_values
|
|
65
|
+
@sinks ||= sinks {}
|
|
66
|
+
@transformations ||= transformations {}
|
|
67
|
+
end
|
|
68
|
+
|
|
41
69
|
class TransformationBuilder
|
|
42
70
|
def build(&block)
|
|
43
71
|
@transformations = []
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
module Chicago
|
|
2
|
+
module ETL
|
|
3
|
+
# A namespaced name for an ETL stage.
|
|
4
|
+
#
|
|
5
|
+
# @api private
|
|
6
|
+
class StageName
|
|
7
|
+
def initialize(*names)
|
|
8
|
+
if names.size == 1 && names.first.kind_of?(String)
|
|
9
|
+
@names = names.first.split(".").map(&:to_sym).freeze
|
|
10
|
+
else
|
|
11
|
+
@names = names.flatten.map(&:to_sym).freeze
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def name
|
|
16
|
+
@names.last
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def match?(*pattern)
|
|
20
|
+
pattern.flatten!
|
|
21
|
+
return false if pattern.size > @names.size
|
|
22
|
+
|
|
23
|
+
pattern.each_with_index.all? do |part, i|
|
|
24
|
+
part == :* || @names[i] == part
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
alias :=~ :match?
|
|
28
|
+
|
|
29
|
+
def namespace
|
|
30
|
+
@names[0...(@names.size - 1)]
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def eql?(other)
|
|
34
|
+
to_s == other.to_s
|
|
35
|
+
end
|
|
36
|
+
alias :== :eql?
|
|
37
|
+
|
|
38
|
+
def hash
|
|
39
|
+
to_s.hash
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def to_a
|
|
43
|
+
@names.dup
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def to_s
|
|
47
|
+
@string_representation ||= @names.join('.')
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
data/lib/chicago/etl.rb
CHANGED
|
@@ -19,15 +19,16 @@ require 'chicago/etl/null_sink'
|
|
|
19
19
|
require 'chicago/etl/mysql'
|
|
20
20
|
|
|
21
21
|
require 'chicago/etl/core_extensions'
|
|
22
|
+
require 'chicago/etl/stage_name'
|
|
22
23
|
require 'chicago/etl/counter'
|
|
23
24
|
require 'chicago/etl/key_builder'
|
|
24
25
|
require 'chicago/etl/schema_table_sink_factory'
|
|
26
|
+
require 'chicago/etl/schema_table_stage_builder'
|
|
25
27
|
require 'chicago/etl/transformations'
|
|
26
28
|
require 'chicago/etl/load_dataset_builder'
|
|
27
29
|
require 'chicago/etl/dataset_builder'
|
|
28
30
|
require 'chicago/etl/stage'
|
|
29
31
|
require 'chicago/etl/stage_builder'
|
|
30
|
-
require 'chicago/etl/dataset_batch_stage'
|
|
31
32
|
require 'chicago/etl/schema_sinks_and_transformations_builder'
|
|
32
33
|
require 'chicago/etl/pipeline'
|
|
33
34
|
|
|
@@ -60,9 +61,13 @@ module Chicago
|
|
|
60
61
|
# within a batch.
|
|
61
62
|
def self.execute(stage, etl_batch, reextract, logger)
|
|
62
63
|
etl_batch.perform_task(:load, stage.name) do
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
64
|
+
if stage.executable?
|
|
65
|
+
logger.debug "Starting loading #{stage.name}"
|
|
66
|
+
stage.execute(etl_batch, reextract)
|
|
67
|
+
logger.info "Finished loading #{stage.name}"
|
|
68
|
+
else
|
|
69
|
+
logger.info "Skipping stage #{stage.name}"
|
|
70
|
+
end
|
|
66
71
|
end
|
|
67
72
|
end
|
|
68
73
|
end
|
|
@@ -18,8 +18,8 @@ describe "creating and running a dimension stage" do
|
|
|
18
18
|
let(:pipeline) { Chicago::ETL::Pipeline.new(db, schema)}
|
|
19
19
|
|
|
20
20
|
it "glues the source, transformations, and sink correctly" do
|
|
21
|
-
pipeline.
|
|
22
|
-
|
|
21
|
+
pipeline.define_stage(:load, :dimensions, :test) do
|
|
22
|
+
source do
|
|
23
23
|
db.test_dataset_method
|
|
24
24
|
end
|
|
25
25
|
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe "Chicago::ETL Execution method" do
|
|
4
|
+
class StubBatch
|
|
5
|
+
def perform_task(*args)
|
|
6
|
+
yield
|
|
7
|
+
end
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
let(:logger) { mock(:logger).as_null_object }
|
|
11
|
+
let(:batch) { StubBatch.new }
|
|
12
|
+
|
|
13
|
+
it "only logs skipping the stage if the stage is not executable" do
|
|
14
|
+
stage = stub(:stage, :executable? => false, :name => "test")
|
|
15
|
+
stage.should_not_receive(:execute)
|
|
16
|
+
logger.should_receive(:info).with("Skipping stage test")
|
|
17
|
+
|
|
18
|
+
Chicago::ETL.execute(stage, batch, false, logger)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it "executes the stage" do
|
|
22
|
+
stage = stub(:stage, :executable? => true, :name => "test")
|
|
23
|
+
stage.should_receive(:execute).with(batch, false)
|
|
24
|
+
|
|
25
|
+
Chicago::ETL.execute(stage, batch, false, logger)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'set'
|
|
3
|
+
|
|
4
|
+
describe Chicago::ETL::StageName do
|
|
5
|
+
it "can be consturcted with variable args" do
|
|
6
|
+
described_class.new(:a, :b).to_a.should == [:a, :b]
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
it "can be constructed with an array of symbols" do
|
|
10
|
+
described_class.new([:a, :b]).to_a.should == [:a, :b]
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it "can be constructed with a dot seaprated string" do
|
|
14
|
+
described_class.new("foo.bar").to_a.should == [:foo, :bar]
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
it "has a name" do
|
|
18
|
+
described_class.new("foo.bar.baz").name.should == :baz
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it "has a namespace" do
|
|
22
|
+
described_class.new("foo.bar.baz").namespace.should == [:foo, :bar]
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it "supports equality" do
|
|
26
|
+
described_class.new(:a, :b).should == described_class.new(:a, :b)
|
|
27
|
+
set = Set.new
|
|
28
|
+
set << described_class.new(:a, :b)
|
|
29
|
+
set.should include(described_class.new(:a, :b))
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
it "has a dotted string representation" do
|
|
33
|
+
described_class.new(:a, :b).to_s.should == "a.b"
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
it "matches an exact pattern" do
|
|
37
|
+
described_class.new(:a, :b).match?(:a, :b).should be_true
|
|
38
|
+
described_class.new(:a, :b).match?(:a, :c).should be_false
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
it "matches a left-anchored partial pattern" do
|
|
42
|
+
described_class.new(:a, :b).match?(:a).should be_true
|
|
43
|
+
described_class.new(:a, :b).match?(:b).should be_false
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
it "allows wildcards matching" do
|
|
47
|
+
described_class.new(:a, :b).match?(:*, :b).should be_true
|
|
48
|
+
described_class.new(:a, :b).match?(:*, :*).should be_true
|
|
49
|
+
described_class.new(:a, :b).match?(:*, :*, :*).should be_false
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
it "can use the =~ operator" do
|
|
53
|
+
(described_class.new(:a, :b) =~ [:*, :b]).should be_true
|
|
54
|
+
end
|
|
55
|
+
end
|
metadata
CHANGED
|
@@ -1,233 +1,248 @@
|
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: chicago-etl
|
|
3
|
-
version: !ruby/object:Gem::Version
|
|
4
|
-
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.2.2
|
|
5
5
|
prerelease:
|
|
6
|
-
segments:
|
|
7
|
-
- 0
|
|
8
|
-
- 2
|
|
9
|
-
- 1
|
|
10
|
-
version: 0.2.1
|
|
11
6
|
platform: ruby
|
|
12
|
-
authors:
|
|
7
|
+
authors:
|
|
13
8
|
- Roland Swingler
|
|
14
9
|
autorequire:
|
|
15
10
|
bindir: bin
|
|
16
11
|
cert_chain: []
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
12
|
+
date: 2013-11-26 00:00:00.000000000 Z
|
|
13
|
+
dependencies:
|
|
14
|
+
- !ruby/object:Gem::Dependency
|
|
15
|
+
name: chicagowarehouse
|
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
|
22
17
|
none: false
|
|
23
|
-
requirements:
|
|
18
|
+
requirements:
|
|
24
19
|
- - ~>
|
|
25
|
-
- !ruby/object:Gem::Version
|
|
26
|
-
|
|
27
|
-
segments:
|
|
28
|
-
- 0
|
|
29
|
-
- 4
|
|
30
|
-
version: "0.4"
|
|
31
|
-
requirement: *id001
|
|
20
|
+
- !ruby/object:Gem::Version
|
|
21
|
+
version: '0.4'
|
|
32
22
|
type: :runtime
|
|
33
23
|
prerelease: false
|
|
34
|
-
|
|
35
|
-
- !ruby/object:Gem::Dependency
|
|
36
|
-
version_requirements: &id002 !ruby/object:Gem::Requirement
|
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
25
|
none: false
|
|
38
|
-
requirements:
|
|
39
|
-
- -
|
|
40
|
-
- !ruby/object:Gem::Version
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
- 0
|
|
44
|
-
version: "0"
|
|
45
|
-
requirement: *id002
|
|
46
|
-
type: :runtime
|
|
47
|
-
prerelease: false
|
|
26
|
+
requirements:
|
|
27
|
+
- - ~>
|
|
28
|
+
- !ruby/object:Gem::Version
|
|
29
|
+
version: '0.4'
|
|
30
|
+
- !ruby/object:Gem::Dependency
|
|
48
31
|
name: fastercsv
|
|
49
|
-
|
|
50
|
-
version_requirements: &id003 !ruby/object:Gem::Requirement
|
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
|
51
33
|
none: false
|
|
52
|
-
requirements:
|
|
53
|
-
- -
|
|
54
|
-
- !ruby/object:Gem::Version
|
|
55
|
-
|
|
56
|
-
segments:
|
|
57
|
-
- 0
|
|
58
|
-
version: "0"
|
|
59
|
-
requirement: *id003
|
|
34
|
+
requirements:
|
|
35
|
+
- - ! '>='
|
|
36
|
+
- !ruby/object:Gem::Version
|
|
37
|
+
version: '0'
|
|
60
38
|
type: :runtime
|
|
61
39
|
prerelease: false
|
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
41
|
+
none: false
|
|
42
|
+
requirements:
|
|
43
|
+
- - ! '>='
|
|
44
|
+
- !ruby/object:Gem::Version
|
|
45
|
+
version: '0'
|
|
46
|
+
- !ruby/object:Gem::Dependency
|
|
62
47
|
name: sequel
|
|
63
|
-
|
|
64
|
-
version_requirements: &id004 !ruby/object:Gem::Requirement
|
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
|
65
49
|
none: false
|
|
66
|
-
requirements:
|
|
67
|
-
- -
|
|
68
|
-
- !ruby/object:Gem::Version
|
|
69
|
-
|
|
70
|
-
segments:
|
|
71
|
-
- 0
|
|
72
|
-
- 0
|
|
73
|
-
- 2
|
|
74
|
-
version: 0.0.2
|
|
75
|
-
requirement: *id004
|
|
50
|
+
requirements:
|
|
51
|
+
- - ! '>='
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: '0'
|
|
76
54
|
type: :runtime
|
|
77
55
|
prerelease: false
|
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
57
|
+
none: false
|
|
58
|
+
requirements:
|
|
59
|
+
- - ! '>='
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '0'
|
|
62
|
+
- !ruby/object:Gem::Dependency
|
|
78
63
|
name: sequel_load_data_infile
|
|
79
|
-
|
|
80
|
-
version_requirements: &id005 !ruby/object:Gem::Requirement
|
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
|
81
65
|
none: false
|
|
82
|
-
requirements:
|
|
83
|
-
- -
|
|
84
|
-
- !ruby/object:Gem::Version
|
|
85
|
-
|
|
86
|
-
segments:
|
|
87
|
-
- 0
|
|
88
|
-
version: "0"
|
|
89
|
-
requirement: *id005
|
|
66
|
+
requirements:
|
|
67
|
+
- - ! '>='
|
|
68
|
+
- !ruby/object:Gem::Version
|
|
69
|
+
version: 0.0.2
|
|
90
70
|
type: :runtime
|
|
91
71
|
prerelease: false
|
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
73
|
+
none: false
|
|
74
|
+
requirements:
|
|
75
|
+
- - ! '>='
|
|
76
|
+
- !ruby/object:Gem::Version
|
|
77
|
+
version: 0.0.2
|
|
78
|
+
- !ruby/object:Gem::Dependency
|
|
92
79
|
name: sequel_fast_columns
|
|
93
|
-
|
|
94
|
-
version_requirements: &id006 !ruby/object:Gem::Requirement
|
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
|
95
81
|
none: false
|
|
96
|
-
requirements:
|
|
97
|
-
- -
|
|
98
|
-
- !ruby/object:Gem::Version
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
- 2
|
|
102
|
-
version: "2"
|
|
103
|
-
requirement: *id006
|
|
104
|
-
type: :development
|
|
82
|
+
requirements:
|
|
83
|
+
- - ! '>='
|
|
84
|
+
- !ruby/object:Gem::Version
|
|
85
|
+
version: '0'
|
|
86
|
+
type: :runtime
|
|
105
87
|
prerelease: false
|
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
89
|
+
none: false
|
|
90
|
+
requirements:
|
|
91
|
+
- - ! '>='
|
|
92
|
+
- !ruby/object:Gem::Version
|
|
93
|
+
version: '0'
|
|
94
|
+
- !ruby/object:Gem::Dependency
|
|
106
95
|
name: rspec
|
|
107
|
-
|
|
108
|
-
version_requirements: &id007 !ruby/object:Gem::Requirement
|
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
|
109
97
|
none: false
|
|
110
|
-
requirements:
|
|
111
|
-
- -
|
|
112
|
-
- !ruby/object:Gem::Version
|
|
113
|
-
|
|
114
|
-
segments:
|
|
115
|
-
- 0
|
|
116
|
-
version: "0"
|
|
117
|
-
requirement: *id007
|
|
98
|
+
requirements:
|
|
99
|
+
- - ~>
|
|
100
|
+
- !ruby/object:Gem::Version
|
|
101
|
+
version: '2'
|
|
118
102
|
type: :development
|
|
119
103
|
prerelease: false
|
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
105
|
+
none: false
|
|
106
|
+
requirements:
|
|
107
|
+
- - ~>
|
|
108
|
+
- !ruby/object:Gem::Version
|
|
109
|
+
version: '2'
|
|
110
|
+
- !ruby/object:Gem::Dependency
|
|
120
111
|
name: timecop
|
|
121
|
-
|
|
122
|
-
version_requirements: &id008 !ruby/object:Gem::Requirement
|
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
|
123
113
|
none: false
|
|
124
|
-
requirements:
|
|
125
|
-
- -
|
|
126
|
-
- !ruby/object:Gem::Version
|
|
127
|
-
|
|
128
|
-
segments:
|
|
129
|
-
- 0
|
|
130
|
-
version: "0"
|
|
131
|
-
requirement: *id008
|
|
114
|
+
requirements:
|
|
115
|
+
- - ! '>='
|
|
116
|
+
- !ruby/object:Gem::Version
|
|
117
|
+
version: '0'
|
|
132
118
|
type: :development
|
|
133
119
|
prerelease: false
|
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
121
|
+
none: false
|
|
122
|
+
requirements:
|
|
123
|
+
- - ! '>='
|
|
124
|
+
- !ruby/object:Gem::Version
|
|
125
|
+
version: '0'
|
|
126
|
+
- !ruby/object:Gem::Dependency
|
|
134
127
|
name: yard
|
|
135
|
-
|
|
136
|
-
version_requirements: &id009 !ruby/object:Gem::Requirement
|
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
|
137
129
|
none: false
|
|
138
|
-
requirements:
|
|
139
|
-
- -
|
|
140
|
-
- !ruby/object:Gem::Version
|
|
141
|
-
|
|
142
|
-
segments:
|
|
143
|
-
- 0
|
|
144
|
-
version: "0"
|
|
145
|
-
requirement: *id009
|
|
130
|
+
requirements:
|
|
131
|
+
- - ! '>='
|
|
132
|
+
- !ruby/object:Gem::Version
|
|
133
|
+
version: '0'
|
|
146
134
|
type: :development
|
|
147
135
|
prerelease: false
|
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
137
|
+
none: false
|
|
138
|
+
requirements:
|
|
139
|
+
- - ! '>='
|
|
140
|
+
- !ruby/object:Gem::Version
|
|
141
|
+
version: '0'
|
|
142
|
+
- !ruby/object:Gem::Dependency
|
|
148
143
|
name: flog
|
|
149
|
-
|
|
150
|
-
version_requirements: &id010 !ruby/object:Gem::Requirement
|
|
144
|
+
requirement: !ruby/object:Gem::Requirement
|
|
151
145
|
none: false
|
|
152
|
-
requirements:
|
|
153
|
-
- -
|
|
154
|
-
- !ruby/object:Gem::Version
|
|
155
|
-
|
|
156
|
-
segments:
|
|
157
|
-
- 0
|
|
158
|
-
version: "0"
|
|
159
|
-
requirement: *id010
|
|
146
|
+
requirements:
|
|
147
|
+
- - ! '>='
|
|
148
|
+
- !ruby/object:Gem::Version
|
|
149
|
+
version: '0'
|
|
160
150
|
type: :development
|
|
161
151
|
prerelease: false
|
|
152
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
153
|
+
none: false
|
|
154
|
+
requirements:
|
|
155
|
+
- - ! '>='
|
|
156
|
+
- !ruby/object:Gem::Version
|
|
157
|
+
version: '0'
|
|
158
|
+
- !ruby/object:Gem::Dependency
|
|
162
159
|
name: simplecov
|
|
163
|
-
|
|
164
|
-
version_requirements: &id011 !ruby/object:Gem::Requirement
|
|
160
|
+
requirement: !ruby/object:Gem::Requirement
|
|
165
161
|
none: false
|
|
166
|
-
requirements:
|
|
167
|
-
- -
|
|
168
|
-
- !ruby/object:Gem::Version
|
|
169
|
-
|
|
170
|
-
segments:
|
|
171
|
-
- 0
|
|
172
|
-
version: "0"
|
|
173
|
-
requirement: *id011
|
|
162
|
+
requirements:
|
|
163
|
+
- - ! '>='
|
|
164
|
+
- !ruby/object:Gem::Version
|
|
165
|
+
version: '0'
|
|
174
166
|
type: :development
|
|
175
167
|
prerelease: false
|
|
168
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
169
|
+
none: false
|
|
170
|
+
requirements:
|
|
171
|
+
- - ! '>='
|
|
172
|
+
- !ruby/object:Gem::Version
|
|
173
|
+
version: '0'
|
|
174
|
+
- !ruby/object:Gem::Dependency
|
|
176
175
|
name: ZenTest
|
|
177
|
-
|
|
178
|
-
version_requirements: &id012 !ruby/object:Gem::Requirement
|
|
176
|
+
requirement: !ruby/object:Gem::Requirement
|
|
179
177
|
none: false
|
|
180
|
-
requirements:
|
|
181
|
-
- -
|
|
182
|
-
- !ruby/object:Gem::Version
|
|
183
|
-
|
|
184
|
-
segments:
|
|
185
|
-
- 2
|
|
186
|
-
- 8
|
|
187
|
-
- 1
|
|
188
|
-
version: 2.8.1
|
|
189
|
-
requirement: *id012
|
|
178
|
+
requirements:
|
|
179
|
+
- - ! '>='
|
|
180
|
+
- !ruby/object:Gem::Version
|
|
181
|
+
version: '0'
|
|
190
182
|
type: :development
|
|
191
183
|
prerelease: false
|
|
184
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
185
|
+
none: false
|
|
186
|
+
requirements:
|
|
187
|
+
- - ! '>='
|
|
188
|
+
- !ruby/object:Gem::Version
|
|
189
|
+
version: '0'
|
|
190
|
+
- !ruby/object:Gem::Dependency
|
|
192
191
|
name: mysql
|
|
193
|
-
|
|
194
|
-
version_requirements: &id013 !ruby/object:Gem::Requirement
|
|
192
|
+
requirement: !ruby/object:Gem::Requirement
|
|
195
193
|
none: false
|
|
196
|
-
requirements:
|
|
197
|
-
- -
|
|
198
|
-
- !ruby/object:Gem::Version
|
|
199
|
-
|
|
200
|
-
segments:
|
|
201
|
-
- 1
|
|
202
|
-
version: "1"
|
|
203
|
-
requirement: *id013
|
|
194
|
+
requirements:
|
|
195
|
+
- - '='
|
|
196
|
+
- !ruby/object:Gem::Version
|
|
197
|
+
version: 2.8.1
|
|
204
198
|
type: :development
|
|
205
199
|
prerelease: false
|
|
200
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
201
|
+
none: false
|
|
202
|
+
requirements:
|
|
203
|
+
- - '='
|
|
204
|
+
- !ruby/object:Gem::Version
|
|
205
|
+
version: 2.8.1
|
|
206
|
+
- !ruby/object:Gem::Dependency
|
|
206
207
|
name: bundler
|
|
207
|
-
|
|
208
|
-
version_requirements: &id014 !ruby/object:Gem::Requirement
|
|
208
|
+
requirement: !ruby/object:Gem::Requirement
|
|
209
209
|
none: false
|
|
210
|
-
requirements:
|
|
211
|
-
- -
|
|
212
|
-
- !ruby/object:Gem::Version
|
|
213
|
-
|
|
214
|
-
segments:
|
|
215
|
-
- 0
|
|
216
|
-
version: "0"
|
|
217
|
-
requirement: *id014
|
|
210
|
+
requirements:
|
|
211
|
+
- - ~>
|
|
212
|
+
- !ruby/object:Gem::Version
|
|
213
|
+
version: '1'
|
|
218
214
|
type: :development
|
|
219
215
|
prerelease: false
|
|
216
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
217
|
+
none: false
|
|
218
|
+
requirements:
|
|
219
|
+
- - ~>
|
|
220
|
+
- !ruby/object:Gem::Version
|
|
221
|
+
version: '1'
|
|
222
|
+
- !ruby/object:Gem::Dependency
|
|
220
223
|
name: jeweler
|
|
224
|
+
requirement: !ruby/object:Gem::Requirement
|
|
225
|
+
none: false
|
|
226
|
+
requirements:
|
|
227
|
+
- - ! '>='
|
|
228
|
+
- !ruby/object:Gem::Version
|
|
229
|
+
version: '0'
|
|
230
|
+
type: :development
|
|
231
|
+
prerelease: false
|
|
232
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
233
|
+
none: false
|
|
234
|
+
requirements:
|
|
235
|
+
- - ! '>='
|
|
236
|
+
- !ruby/object:Gem::Version
|
|
237
|
+
version: '0'
|
|
221
238
|
description: ETL tools for Chicago
|
|
222
239
|
email: roland.swingler@gmail.com
|
|
223
240
|
executables: []
|
|
224
|
-
|
|
225
241
|
extensions: []
|
|
226
|
-
|
|
227
|
-
extra_rdoc_files:
|
|
242
|
+
extra_rdoc_files:
|
|
228
243
|
- LICENSE.txt
|
|
229
244
|
- README.rdoc
|
|
230
|
-
files:
|
|
245
|
+
files:
|
|
231
246
|
- .document
|
|
232
247
|
- .rspec
|
|
233
248
|
- Gemfile
|
|
@@ -244,7 +259,6 @@ files:
|
|
|
244
259
|
- lib/chicago/etl/batch.rb
|
|
245
260
|
- lib/chicago/etl/core_extensions.rb
|
|
246
261
|
- lib/chicago/etl/counter.rb
|
|
247
|
-
- lib/chicago/etl/dataset_batch_stage.rb
|
|
248
262
|
- lib/chicago/etl/dataset_builder.rb
|
|
249
263
|
- lib/chicago/etl/dataset_source.rb
|
|
250
264
|
- lib/chicago/etl/errors.rb
|
|
@@ -259,6 +273,7 @@ files:
|
|
|
259
273
|
- lib/chicago/etl/pipeline_endpoint.rb
|
|
260
274
|
- lib/chicago/etl/schema_sinks_and_transformations_builder.rb
|
|
261
275
|
- lib/chicago/etl/schema_table_sink_factory.rb
|
|
276
|
+
- lib/chicago/etl/schema_table_stage_builder.rb
|
|
262
277
|
- lib/chicago/etl/screens/column_screen.rb
|
|
263
278
|
- lib/chicago/etl/screens/invalid_element.rb
|
|
264
279
|
- lib/chicago/etl/screens/missing_value.rb
|
|
@@ -268,6 +283,7 @@ files:
|
|
|
268
283
|
- lib/chicago/etl/sink.rb
|
|
269
284
|
- lib/chicago/etl/stage.rb
|
|
270
285
|
- lib/chicago/etl/stage_builder.rb
|
|
286
|
+
- lib/chicago/etl/stage_name.rb
|
|
271
287
|
- lib/chicago/etl/table_builder.rb
|
|
272
288
|
- lib/chicago/etl/task_invocation.rb
|
|
273
289
|
- lib/chicago/etl/tasks.rb
|
|
@@ -287,6 +303,7 @@ files:
|
|
|
287
303
|
- spec/etl/define_dimension_stage_spec.rb
|
|
288
304
|
- spec/etl/define_stage_spec.rb
|
|
289
305
|
- spec/etl/etl_batch_id_dataset_filter.rb
|
|
306
|
+
- spec/etl/execution_wrapper_spec.rb
|
|
290
307
|
- spec/etl/filter_spec.rb
|
|
291
308
|
- spec/etl/key_builder_spec.rb
|
|
292
309
|
- spec/etl/load_dataset_builder_spec.rb
|
|
@@ -300,6 +317,7 @@ files:
|
|
|
300
317
|
- spec/etl/screens/out_of_bounds_spec.rb
|
|
301
318
|
- spec/etl/sequel/dependant_tables_spec.rb
|
|
302
319
|
- spec/etl/sequel/filter_to_etl_batch_spec.rb
|
|
320
|
+
- spec/etl/stage_name_spec.rb
|
|
303
321
|
- spec/etl/stage_spec.rb
|
|
304
322
|
- spec/etl/table_builder_spec.rb
|
|
305
323
|
- spec/etl/task_spec.rb
|
|
@@ -311,37 +329,31 @@ files:
|
|
|
311
329
|
- spec/etl/transformations_spec.rb
|
|
312
330
|
- spec/spec_helper.rb
|
|
313
331
|
homepage: http://github.com/notonthehighstreet/chicago-etl
|
|
314
|
-
licenses:
|
|
332
|
+
licenses:
|
|
315
333
|
- MIT
|
|
316
334
|
post_install_message:
|
|
317
335
|
rdoc_options: []
|
|
318
|
-
|
|
319
|
-
require_paths:
|
|
336
|
+
require_paths:
|
|
320
337
|
- lib
|
|
321
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
|
338
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
322
339
|
none: false
|
|
323
|
-
requirements:
|
|
324
|
-
- -
|
|
325
|
-
- !ruby/object:Gem::Version
|
|
326
|
-
|
|
327
|
-
segments:
|
|
340
|
+
requirements:
|
|
341
|
+
- - ! '>='
|
|
342
|
+
- !ruby/object:Gem::Version
|
|
343
|
+
version: '0'
|
|
344
|
+
segments:
|
|
328
345
|
- 0
|
|
329
|
-
|
|
330
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
346
|
+
hash: -2294158408277347233
|
|
347
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
331
348
|
none: false
|
|
332
|
-
requirements:
|
|
333
|
-
- -
|
|
334
|
-
- !ruby/object:Gem::Version
|
|
335
|
-
|
|
336
|
-
segments:
|
|
337
|
-
- 0
|
|
338
|
-
version: "0"
|
|
349
|
+
requirements:
|
|
350
|
+
- - ! '>='
|
|
351
|
+
- !ruby/object:Gem::Version
|
|
352
|
+
version: '0'
|
|
339
353
|
requirements: []
|
|
340
|
-
|
|
341
354
|
rubyforge_project:
|
|
342
355
|
rubygems_version: 1.8.25
|
|
343
356
|
signing_key:
|
|
344
357
|
specification_version: 3
|
|
345
358
|
summary: Chicago ETL
|
|
346
359
|
test_files: []
|
|
347
|
-
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
module Chicago
|
|
2
|
-
module ETL
|
|
3
|
-
# Allows deferring constructing a DatasetSource until extract
|
|
4
|
-
# time, so that it can be filtered to an ETL batch appropriately.
|
|
5
|
-
class DatasetBatchStage < Stage
|
|
6
|
-
# Executes this ETL stage.
|
|
7
|
-
#
|
|
8
|
-
# Configures the dataset and flows rows into the pipeline.
|
|
9
|
-
def execute(etl_batch, reextract=false)
|
|
10
|
-
if reextract && sink(:error) && !truncate_pre_load?
|
|
11
|
-
sink(:error).truncate
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
sink(:default).set_constant_values(:_inserted_at => Time.now)
|
|
15
|
-
super
|
|
16
|
-
end
|
|
17
|
-
end
|
|
18
|
-
end
|
|
19
|
-
end
|