chicago-etl 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.0
1
+ 0.2.1
data/chicago-etl.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "chicago-etl"
8
- s.version = "0.2.0"
8
+ s.version = "0.2.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Roland Swingler"]
12
- s.date = "2013-11-18"
12
+ s.date = "2013-11-19"
13
13
  s.description = "ETL tools for Chicago"
14
14
  s.email = "roland.swingler@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -1,25 +1,13 @@
1
1
  module Chicago
2
2
  module ETL
3
- # Links a PipelineStage to a Dataset.
4
- #
5
3
  # Allows deferring constructing a DatasetSource until extract
6
4
  # time, so that it can be filtered to an ETL batch appropriately.
7
5
  class DatasetBatchStage < Stage
8
- attr_reader :name
9
-
10
- def initialize(name, options={})
11
- super
12
- @filter_strategy = options[:filter_strategy] ||
13
- lambda { |dataset, etl_batch| @source.filter_to_etl_batch(etl_batch)}
14
- end
15
-
16
6
  # Executes this ETL stage.
17
7
  #
18
8
  # Configures the dataset and flows rows into the pipeline.
19
9
  def execute(etl_batch, reextract=false)
20
- if @truncate_pre_load
21
- sinks.each {|sink| sink.truncate }
22
- elsif reextract && sink(:error)
10
+ if reextract && sink(:error) && !truncate_pre_load?
23
11
  sink(:error).truncate
24
12
  end
25
13
 
@@ -65,13 +65,17 @@ module Chicago
65
65
  pipeline do
66
66
  end
67
67
  end
68
- DatasetBatchStage.new(name,
69
- :source => @dataset,
70
- :transformations => @sinks_and_transformations[:transformations],
71
- :sinks => @sinks_and_transformations[:sinks],
72
- :filter_strategy => @filter_strategy,
73
- :truncate_pre_load => @truncate_pre_load)
74
68
 
69
+ @filter_strategy ||= lambda {|dataset, etl_batch|
70
+ dataset.filter_to_etl_batch(etl_batch)
71
+ }
72
+
73
+ DatasetBatchStage.new(name,
74
+ :source => @dataset,
75
+ :transformations => @sinks_and_transformations[:transformations],
76
+ :sinks => @sinks_and_transformations[:sinks],
77
+ :filter_strategy => @filter_strategy,
78
+ :truncate_pre_load => @truncate_pre_load)
75
79
  end
76
80
 
77
81
  protected
@@ -18,11 +18,18 @@ module Chicago
18
18
  @transformations = options[:transformations] || []
19
19
  @filter_strategy = options[:filter_strategy] ||
20
20
  lambda {|source, _| source }
21
+ @truncate_pre_load = !!options[:truncate_pre_load]
21
22
 
22
23
  validate_arguments
23
24
  end
25
+
26
+ # Returns true if the sinks should be truncated pre-load.
27
+ def truncate_pre_load?
28
+ @truncate_pre_load
29
+ end
24
30
 
25
31
  def execute(etl_batch, reextract=false)
32
+ sinks.each {|sink| sink.truncate } if truncate_pre_load?
26
33
  transform_and_load filtered_source(etl_batch, reextract)
27
34
  end
28
35
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chicago-etl
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 21
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 0
10
- version: 0.2.0
9
+ - 1
10
+ version: 0.2.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Roland Swingler
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2013-11-18 00:00:00 Z
18
+ date: 2013-11-19 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  version_requirements: &id001 !ruby/object:Gem::Requirement