chicago-etl 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.0
1
+ 0.2.1
data/chicago-etl.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "chicago-etl"
8
- s.version = "0.2.0"
8
+ s.version = "0.2.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Roland Swingler"]
12
- s.date = "2013-11-18"
12
+ s.date = "2013-11-19"
13
13
  s.description = "ETL tools for Chicago"
14
14
  s.email = "roland.swingler@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -1,25 +1,13 @@
1
1
  module Chicago
2
2
  module ETL
3
- # Links a PipelineStage to a Dataset.
4
- #
5
3
  # Allows deferring constructing a DatasetSource until extract
6
4
  # time, so that it can be filtered to an ETL batch appropriately.
7
5
  class DatasetBatchStage < Stage
8
- attr_reader :name
9
-
10
- def initialize(name, options={})
11
- super
12
- @filter_strategy = options[:filter_strategy] ||
13
- lambda { |dataset, etl_batch| @source.filter_to_etl_batch(etl_batch)}
14
- end
15
-
16
6
  # Executes this ETL stage.
17
7
  #
18
8
  # Configures the dataset and flows rows into the pipeline.
19
9
  def execute(etl_batch, reextract=false)
20
- if @truncate_pre_load
21
- sinks.each {|sink| sink.truncate }
22
- elsif reextract && sink(:error)
10
+ if reextract && sink(:error) && !truncate_pre_load?
23
11
  sink(:error).truncate
24
12
  end
25
13
 
@@ -65,13 +65,17 @@ module Chicago
65
65
  pipeline do
66
66
  end
67
67
  end
68
- DatasetBatchStage.new(name,
69
- :source => @dataset,
70
- :transformations => @sinks_and_transformations[:transformations],
71
- :sinks => @sinks_and_transformations[:sinks],
72
- :filter_strategy => @filter_strategy,
73
- :truncate_pre_load => @truncate_pre_load)
74
68
 
69
+ @filter_strategy ||= lambda {|dataset, etl_batch|
70
+ dataset.filter_to_etl_batch(etl_batch)
71
+ }
72
+
73
+ DatasetBatchStage.new(name,
74
+ :source => @dataset,
75
+ :transformations => @sinks_and_transformations[:transformations],
76
+ :sinks => @sinks_and_transformations[:sinks],
77
+ :filter_strategy => @filter_strategy,
78
+ :truncate_pre_load => @truncate_pre_load)
75
79
  end
76
80
 
77
81
  protected
@@ -18,11 +18,18 @@ module Chicago
18
18
  @transformations = options[:transformations] || []
19
19
  @filter_strategy = options[:filter_strategy] ||
20
20
  lambda {|source, _| source }
21
+ @truncate_pre_load = !!options[:truncate_pre_load]
21
22
 
22
23
  validate_arguments
23
24
  end
25
+
26
+ # Returns true if the sinks should be truncated pre-load.
27
+ def truncate_pre_load?
28
+ @truncate_pre_load
29
+ end
24
30
 
25
31
  def execute(etl_batch, reextract=false)
32
+ sinks.each {|sink| sink.truncate } if truncate_pre_load?
26
33
  transform_and_load filtered_source(etl_batch, reextract)
27
34
  end
28
35
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chicago-etl
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 21
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 0
10
- version: 0.2.0
9
+ - 1
10
+ version: 0.2.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Roland Swingler
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2013-11-18 00:00:00 Z
18
+ date: 2013-11-19 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  version_requirements: &id001 !ruby/object:Gem::Requirement