chicago-etl 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZjJjZDBlOTdlMDlmZTZhNzAyM2Q1N2YxZTkxY2Q0YjA2ZThhNGU2OA==
4
+ NmMzMzcxMGE2ZjhmOTU5OTAyNmIzMDNiYjdmY2E5OGRjYTdiNDExYw==
5
5
  data.tar.gz: !binary |-
6
- NWY2M2Q1ZWUyMTFlNzY0NGM3MTBhMDM1MjZiMjlmYjI4OTdhNjM3MA==
6
+ OGM1OGM3NzBmMjA3OTA5NWQxNjcxMDQ3NDIzMmNkY2ZkYzI4MTBlNg==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- YzA1MzI3MGFlMjc1ZmMyODc3MjJkODlhZDRjNzU1ODQ4MTdmNDhhZjFhYzg3
10
- N2Q3NDY4ZmI2NmI1OTg1YTdkNjBmYjQxOGVjNmYwMzEyMTg0YzZiODA5MmQ2
11
- NjcxZTk1NzliMWI3MWZlMWQyM2M5NjFmMzU1NjNhMjhmYThhMTY=
9
+ MjhhODU2MDU0MmZmM2NmMzczMDEyZGI2MmVjMDAzYTI3MTEzYjZjMjZhOTI5
10
+ ODM2Zjc2YzYyY2Q3MGUzZWI4NjYxYzQ0ZDA1NGY2OThmNWM1OTBhMjI0N2Nj
11
+ NDIwYjRjMGI1YzI4NmJkNGIzOTRmMzk5OTk3YTY4ZmM2MDQ5NzM=
12
12
  data.tar.gz: !binary |-
13
- M2Y4ZDY0NjVhY2U1NTEwZmQxOGYzMjM5NGViMTE2MWIyZDIxZGYzOTU3ZGE0
14
- NTkyODU5YWMzMzkwNTMxODg0ZWRkOGZiZDUyMWNmNzVlOWYxMTBlOTM3NWYx
15
- NDkzNzdmYzQyY2E0MWI4NzdhZGRlNzdlYTYyYTgzOWFiYzBkOWE=
13
+ N2RlM2JlYjc0MTRjMmRlMjhkMDUyYTlhZWZjMzU2YTA0YWI1M2FkYzFmNjEz
14
+ YzU5MDE1MTJjNGU4Mzc4ZWQzNzIxYzM3OTI5NzA3YTAzNTQ0ODhkODU2Njc1
15
+ ZTZmMWUyOWRiYjNhNGI5ZGM4YzFlZmNmYzRiZWQ0MWIwNmNkMTQ=
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.7
1
+ 0.3.0
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: chicago-etl 0.2.7 ruby lib
5
+ # stub: chicago-etl 0.3.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "chicago-etl"
9
- s.version = "0.2.7"
9
+ s.version = "0.3.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib"]
13
13
  s.authors = ["Roland Swingler"]
14
- s.date = "2014-09-01"
14
+ s.date = "2014-09-08"
15
15
  s.description = "ETL tools for Chicago"
16
16
  s.email = "roland.swingler@gmail.com"
17
17
  s.extra_rdoc_files = [
@@ -27,7 +27,6 @@ Gem::Specification.new do |s|
27
27
  "Rakefile",
28
28
  "VERSION",
29
29
  "chicago-etl.gemspec",
30
- "chicago-flow.gemspec",
31
30
  "lib/chicago-etl.rb",
32
31
  "lib/chicago/etl.rb",
33
32
  "lib/chicago/etl/array_sink.rb",
@@ -22,7 +22,11 @@ module Chicago
22
22
  #
23
23
  # This should be used in preference to new or create.
24
24
  def instance
25
- (last_batch.nil? || last_batch.finished?) ? new : last_batch
25
+ if last_batch.nil? || last_batch.finished? || last_batch.started_at.to_date < Date.today
26
+ new
27
+ else
28
+ last_batch
29
+ end
26
30
  end
27
31
 
28
32
  # Returns the last batch run, or nil if this is the first batch.
@@ -57,6 +61,21 @@ module Chicago
57
61
  !!@reextract
58
62
  end
59
63
 
64
+ def extract_from
65
+ return if reextracting?
66
+ value = self.class.dataset.
67
+ where(:state => "Finished").
68
+ where {|r| r.id < id }.
69
+ select(:max.sql_function(:extracted_to).as(:extracted_to)).
70
+ single_value
71
+
72
+ if value && value.to_date == Date.today
73
+ (value.to_date - 1).to_time
74
+ else
75
+ value
76
+ end
77
+ end
78
+
60
79
  # Deprecated.
61
80
  #
62
81
  # @deprecated Use perform_task instead
@@ -1,4 +1,5 @@
1
1
  require 'spec_helper'
2
+ require 'date'
2
3
 
3
4
  describe Chicago::ETL::Batch do
4
5
  before :each do
@@ -55,15 +56,86 @@ describe Chicago::ETL::Batch do
55
56
  batch.should be_in_error
56
57
  end
57
58
 
58
- it "should not return a new batch if the last batch was not finished" do
59
- batch = ETL::Batch.instance.start
60
- ETL::Batch.instance == batch
59
+ it "returns nil from extract_from when re-extracting" do
60
+ batch = ETL::Batch.instance
61
+ batch.reextract
62
+ batch.start
63
+ expect(batch.extract_from).to be_nil
61
64
  end
62
65
 
63
- it "should not return a new batch if the last batch ended in error" do
64
- batch = ETL::Batch.instance.start
65
- batch.error
66
- ETL::Batch.instance.should == batch
66
+ it "returns nil from extract_from when the first batch" do
67
+ batch = ETL::Batch.instance
68
+ batch.start
69
+ expect(batch.extract_from).to be_nil
70
+ end
71
+
72
+ it "returns the previous finised batch's extracted_to as extract_from" do
73
+ Timecop.freeze(2014, 01, 6, 0, 0, 0) {
74
+ ETL::Batch.new.start.finish
75
+ }
76
+
77
+ Timecop.freeze(2014, 01, 10, 0, 0, 0) {
78
+ ETL::Batch.new.start.finish
79
+ }
80
+
81
+ ETL::Batch.new.start.error
82
+
83
+ batch = ETL::Batch.new.start
84
+ expect(batch.extract_from).to eql(Time.local(2014,1,10,0,0,0))
85
+ end
86
+
87
+ it "returns the previous finised batch's extracted_to as extract_from" do
88
+ Timecop.freeze(2014, 01, 6, 0, 0, 0) {
89
+ ETL::Batch.new.start.finish
90
+ }
91
+
92
+ Timecop.freeze(2014, 01, 8, 0, 0, 0) {
93
+ ETL::Batch.new.start.error
94
+ }
95
+
96
+ Timecop.freeze(2014, 01, 10, 0, 0, 0) {
97
+ ETL::Batch.new.start.finish
98
+ }
99
+
100
+ batch = ETL::Batch.new.start
101
+ expect(batch.extract_from).to eql(Time.local(2014,1,10,0,0,0))
102
+ end
103
+
104
+ it "returns yesterday, rather than extract_from if extract_from is today" do
105
+ Timecop.freeze(2014, 01, 6, 0, 0, 0)
106
+
107
+ ETL::Batch.new.start.finish
108
+
109
+ batch = ETL::Batch.new.start
110
+ expect(batch.extract_from).to eql(Time.local(2014,1,5,0,0,0))
111
+ end
112
+
113
+ context "when rerun in the same day" do
114
+ it "should not return a new batch if the last batch was not finished" do
115
+ batch = ETL::Batch.instance.start
116
+ expect(ETL::Batch.instance).to eql(batch)
117
+ end
118
+
119
+ it "should not return a new batch if the last batch ended in error" do
120
+ batch = ETL::Batch.instance.start
121
+ batch.error
122
+ ETL::Batch.instance.should == batch
123
+ end
124
+ end
125
+
126
+ context "when rerun a day later" do
127
+ it "returns a new batch when the previous batch was unfinished" do
128
+ batch = ETL::Batch.instance.start
129
+ Timecop.freeze(Date.today + 1)
130
+ expect(ETL::Batch.instance).to_not eql(batch)
131
+ end
132
+
133
+ it "returns a new batch when the previous batch was in error" do
134
+ batch = ETL::Batch.instance.start
135
+ batch.error
136
+ Timecop.freeze(Date.today + 1)
137
+ expect(ETL::Batch.instance).to_not eql(batch)
138
+ end
67
139
  end
68
140
 
69
141
  it "should create a log in tmp/batches/1/log" do
@@ -22,4 +22,7 @@ end
22
22
  Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
23
23
 
24
24
  RSpec.configure do |config|
25
+ config.after :each do
26
+ Timecop.return
27
+ end
25
28
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chicago-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.7
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Roland Swingler
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-01 00:00:00.000000000 Z
11
+ date: 2014-09-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: chicagowarehouse
@@ -200,7 +200,6 @@ files:
200
200
  - Rakefile
201
201
  - VERSION
202
202
  - chicago-etl.gemspec
203
- - chicago-flow.gemspec
204
203
  - lib/chicago-etl.rb
205
204
  - lib/chicago/etl.rb
206
205
  - lib/chicago/etl/array_sink.rb
@@ -1,92 +0,0 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
- # -*- encoding: utf-8 -*-
5
-
6
- Gem::Specification.new do |s|
7
- s.name = "chicago-flow"
8
- s.version = "0.0.1"
9
-
10
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = ["Roland Swingler"]
12
- s.date = "2013-06-05"
13
- s.description = "Dataflow-style processing for hash-like rows"
14
- s.email = "roland.swingler@gmail.com"
15
- s.extra_rdoc_files = [
16
- "LICENSE.txt",
17
- "README.rdoc"
18
- ]
19
- s.files = [
20
- ".document",
21
- ".rspec",
22
- "Gemfile",
23
- "LICENSE.txt",
24
- "README.rdoc",
25
- "Rakefile",
26
- "VERSION",
27
- "lib/chicago/flow.rb",
28
- "lib/chicago/flow/array_sink.rb",
29
- "lib/chicago/flow/array_source.rb",
30
- "lib/chicago/flow/dataset_source.rb",
31
- "lib/chicago/flow/filter.rb",
32
- "lib/chicago/flow/mysql.rb",
33
- "lib/chicago/flow/mysql_file_serializer.rb",
34
- "lib/chicago/flow/mysql_file_sink.rb",
35
- "lib/chicago/flow/pipeline_endpoint.rb",
36
- "lib/chicago/flow/pipeline_stage.rb",
37
- "lib/chicago/flow/sink.rb",
38
- "lib/chicago/flow/transformation.rb",
39
- "lib/chicago/flow/transformation_chain.rb",
40
- "spec/array_sink_spec.rb",
41
- "spec/array_source_spec.rb",
42
- "spec/database.yml.dist",
43
- "spec/dataset_source_spec.rb",
44
- "spec/filter_spec.rb",
45
- "spec/mysql_file_serializer_spec.rb",
46
- "spec/mysql_file_sink_spec.rb",
47
- "spec/mysql_integration_spec.rb",
48
- "spec/pipeline_stage_spec.rb",
49
- "spec/spec_helper.rb",
50
- "spec/transformation_chain_spec.rb",
51
- "spec/transformation_spec.rb"
52
- ]
53
- s.homepage = "http://github.com/notonthehighstreet/chicago-flow"
54
- s.licenses = ["MIT"]
55
- s.require_paths = ["lib"]
56
- s.rubygems_version = "1.8.24"
57
- s.summary = "Dataflow-style processing for hash-like rows"
58
-
59
- if s.respond_to? :specification_version then
60
- s.specification_version = 3
61
-
62
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
63
- s.add_runtime_dependency(%q<fastercsv>, [">= 0"])
64
- s.add_runtime_dependency(%q<sequel>, [">= 0"])
65
- s.add_runtime_dependency(%q<sequel_load_data_infile>, [">= 0.0.2"])
66
- s.add_runtime_dependency(%q<sequel_fast_columns>, [">= 0"])
67
- s.add_development_dependency(%q<mysql>, ["= 2.8.1"])
68
- s.add_development_dependency(%q<rspec>, ["~> 2"])
69
- s.add_development_dependency(%q<bundler>, ["~> 1"])
70
- s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
71
- else
72
- s.add_dependency(%q<fastercsv>, [">= 0"])
73
- s.add_dependency(%q<sequel>, [">= 0"])
74
- s.add_dependency(%q<sequel_load_data_infile>, [">= 0.0.2"])
75
- s.add_dependency(%q<sequel_fast_columns>, [">= 0"])
76
- s.add_dependency(%q<mysql>, ["= 2.8.1"])
77
- s.add_dependency(%q<rspec>, ["~> 2"])
78
- s.add_dependency(%q<bundler>, ["~> 1"])
79
- s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
80
- end
81
- else
82
- s.add_dependency(%q<fastercsv>, [">= 0"])
83
- s.add_dependency(%q<sequel>, [">= 0"])
84
- s.add_dependency(%q<sequel_load_data_infile>, [">= 0.0.2"])
85
- s.add_dependency(%q<sequel_fast_columns>, [">= 0"])
86
- s.add_dependency(%q<mysql>, ["= 2.8.1"])
87
- s.add_dependency(%q<rspec>, ["~> 2"])
88
- s.add_dependency(%q<bundler>, ["~> 1"])
89
- s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
90
- end
91
- end
92
-