chicago-etl 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/chicago-etl.gemspec +2 -2
- data/lib/chicago/etl/dataset_batch_stage.rb +1 -13
- data/lib/chicago/etl/pipeline.rb +10 -6
- data/lib/chicago/etl/stage.rb +7 -0
- metadata +4 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.1
|
data/chicago-etl.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "chicago-etl"
|
8
|
-
s.version = "0.2.
|
8
|
+
s.version = "0.2.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Roland Swingler"]
|
12
|
-
s.date = "2013-11-
|
12
|
+
s.date = "2013-11-19"
|
13
13
|
s.description = "ETL tools for Chicago"
|
14
14
|
s.email = "roland.swingler@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -1,25 +1,13 @@
|
|
1
1
|
module Chicago
|
2
2
|
module ETL
|
3
|
-
# Links a PipelineStage to a Dataset.
|
4
|
-
#
|
5
3
|
# Allows deferring constructing a DatasetSource until extract
|
6
4
|
# time, so that it can be filtered to an ETL batch appropriately.
|
7
5
|
class DatasetBatchStage < Stage
|
8
|
-
attr_reader :name
|
9
|
-
|
10
|
-
def initialize(name, options={})
|
11
|
-
super
|
12
|
-
@filter_strategy = options[:filter_strategy] ||
|
13
|
-
lambda { |dataset, etl_batch| @source.filter_to_etl_batch(etl_batch)}
|
14
|
-
end
|
15
|
-
|
16
6
|
# Executes this ETL stage.
|
17
7
|
#
|
18
8
|
# Configures the dataset and flows rows into the pipeline.
|
19
9
|
def execute(etl_batch, reextract=false)
|
20
|
-
if
|
21
|
-
sinks.each {|sink| sink.truncate }
|
22
|
-
elsif reextract && sink(:error)
|
10
|
+
if reextract && sink(:error) && !truncate_pre_load?
|
23
11
|
sink(:error).truncate
|
24
12
|
end
|
25
13
|
|
data/lib/chicago/etl/pipeline.rb
CHANGED
@@ -65,13 +65,17 @@ module Chicago
|
|
65
65
|
pipeline do
|
66
66
|
end
|
67
67
|
end
|
68
|
-
DatasetBatchStage.new(name,
|
69
|
-
:source => @dataset,
|
70
|
-
:transformations => @sinks_and_transformations[:transformations],
|
71
|
-
:sinks => @sinks_and_transformations[:sinks],
|
72
|
-
:filter_strategy => @filter_strategy,
|
73
|
-
:truncate_pre_load => @truncate_pre_load)
|
74
68
|
|
69
|
+
@filter_strategy ||= lambda {|dataset, etl_batch|
|
70
|
+
dataset.filter_to_etl_batch(etl_batch)
|
71
|
+
}
|
72
|
+
|
73
|
+
DatasetBatchStage.new(name,
|
74
|
+
:source => @dataset,
|
75
|
+
:transformations => @sinks_and_transformations[:transformations],
|
76
|
+
:sinks => @sinks_and_transformations[:sinks],
|
77
|
+
:filter_strategy => @filter_strategy,
|
78
|
+
:truncate_pre_load => @truncate_pre_load)
|
75
79
|
end
|
76
80
|
|
77
81
|
protected
|
data/lib/chicago/etl/stage.rb
CHANGED
@@ -18,11 +18,18 @@ module Chicago
|
|
18
18
|
@transformations = options[:transformations] || []
|
19
19
|
@filter_strategy = options[:filter_strategy] ||
|
20
20
|
lambda {|source, _| source }
|
21
|
+
@truncate_pre_load = !!options[:truncate_pre_load]
|
21
22
|
|
22
23
|
validate_arguments
|
23
24
|
end
|
25
|
+
|
26
|
+
# Returns true if the sinks should be truncated pre-load.
|
27
|
+
def truncate_pre_load?
|
28
|
+
@truncate_pre_load
|
29
|
+
end
|
24
30
|
|
25
31
|
def execute(etl_batch, reextract=false)
|
32
|
+
sinks.each {|sink| sink.truncate } if truncate_pre_load?
|
26
33
|
transform_and_load filtered_source(etl_batch, reextract)
|
27
34
|
end
|
28
35
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chicago-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 21
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 0.2.
|
9
|
+
- 1
|
10
|
+
version: 0.2.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Roland Swingler
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2013-11-
|
18
|
+
date: 2013-11-19 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
version_requirements: &id001 !ruby/object:Gem::Requirement
|