chicago-etl 0.2.7 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZjJjZDBlOTdlMDlmZTZhNzAyM2Q1N2YxZTkxY2Q0YjA2ZThhNGU2OA==
4
+ NmMzMzcxMGE2ZjhmOTU5OTAyNmIzMDNiYjdmY2E5OGRjYTdiNDExYw==
5
5
  data.tar.gz: !binary |-
6
- NWY2M2Q1ZWUyMTFlNzY0NGM3MTBhMDM1MjZiMjlmYjI4OTdhNjM3MA==
6
+ OGM1OGM3NzBmMjA3OTA5NWQxNjcxMDQ3NDIzMmNkY2ZkYzI4MTBlNg==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- YzA1MzI3MGFlMjc1ZmMyODc3MjJkODlhZDRjNzU1ODQ4MTdmNDhhZjFhYzg3
10
- N2Q3NDY4ZmI2NmI1OTg1YTdkNjBmYjQxOGVjNmYwMzEyMTg0YzZiODA5MmQ2
11
- NjcxZTk1NzliMWI3MWZlMWQyM2M5NjFmMzU1NjNhMjhmYThhMTY=
9
+ MjhhODU2MDU0MmZmM2NmMzczMDEyZGI2MmVjMDAzYTI3MTEzYjZjMjZhOTI5
10
+ ODM2Zjc2YzYyY2Q3MGUzZWI4NjYxYzQ0ZDA1NGY2OThmNWM1OTBhMjI0N2Nj
11
+ NDIwYjRjMGI1YzI4NmJkNGIzOTRmMzk5OTk3YTY4ZmM2MDQ5NzM=
12
12
  data.tar.gz: !binary |-
13
- M2Y4ZDY0NjVhY2U1NTEwZmQxOGYzMjM5NGViMTE2MWIyZDIxZGYzOTU3ZGE0
14
- NTkyODU5YWMzMzkwNTMxODg0ZWRkOGZiZDUyMWNmNzVlOWYxMTBlOTM3NWYx
15
- NDkzNzdmYzQyY2E0MWI4NzdhZGRlNzdlYTYyYTgzOWFiYzBkOWE=
13
+ N2RlM2JlYjc0MTRjMmRlMjhkMDUyYTlhZWZjMzU2YTA0YWI1M2FkYzFmNjEz
14
+ YzU5MDE1MTJjNGU4Mzc4ZWQzNzIxYzM3OTI5NzA3YTAzNTQ0ODhkODU2Njc1
15
+ ZTZmMWUyOWRiYjNhNGI5ZGM4YzFlZmNmYzRiZWQ0MWIwNmNkMTQ=
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.7
1
+ 0.3.0
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: chicago-etl 0.2.7 ruby lib
5
+ # stub: chicago-etl 0.3.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "chicago-etl"
9
- s.version = "0.2.7"
9
+ s.version = "0.3.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib"]
13
13
  s.authors = ["Roland Swingler"]
14
- s.date = "2014-09-01"
14
+ s.date = "2014-09-08"
15
15
  s.description = "ETL tools for Chicago"
16
16
  s.email = "roland.swingler@gmail.com"
17
17
  s.extra_rdoc_files = [
@@ -27,7 +27,6 @@ Gem::Specification.new do |s|
27
27
  "Rakefile",
28
28
  "VERSION",
29
29
  "chicago-etl.gemspec",
30
- "chicago-flow.gemspec",
31
30
  "lib/chicago-etl.rb",
32
31
  "lib/chicago/etl.rb",
33
32
  "lib/chicago/etl/array_sink.rb",
@@ -22,7 +22,11 @@ module Chicago
22
22
  #
23
23
  # This should be used in preference to new or create.
24
24
  def instance
25
- (last_batch.nil? || last_batch.finished?) ? new : last_batch
25
+ if last_batch.nil? || last_batch.finished? || last_batch.started_at.to_date < Date.today
26
+ new
27
+ else
28
+ last_batch
29
+ end
26
30
  end
27
31
 
28
32
  # Returns the last batch run, or nil if this is the first batch.
@@ -57,6 +61,21 @@ module Chicago
57
61
  !!@reextract
58
62
  end
59
63
 
64
+ def extract_from
65
+ return if reextracting?
66
+ value = self.class.dataset.
67
+ where(:state => "Finished").
68
+ where {|r| r.id < id }.
69
+ select(:max.sql_function(:extracted_to).as(:extracted_to)).
70
+ single_value
71
+
72
+ if value && value.to_date == Date.today
73
+ (value.to_date - 1).to_time
74
+ else
75
+ value
76
+ end
77
+ end
78
+
60
79
  # Deprecated.
61
80
  #
62
81
  # @deprecated Use perform_task instead
@@ -1,4 +1,5 @@
1
1
  require 'spec_helper'
2
+ require 'date'
2
3
 
3
4
  describe Chicago::ETL::Batch do
4
5
  before :each do
@@ -55,15 +56,86 @@ describe Chicago::ETL::Batch do
55
56
  batch.should be_in_error
56
57
  end
57
58
 
58
- it "should not return a new batch if the last batch was not finished" do
59
- batch = ETL::Batch.instance.start
60
- ETL::Batch.instance == batch
59
+ it "returns nil from extract_from when re-extracting" do
60
+ batch = ETL::Batch.instance
61
+ batch.reextract
62
+ batch.start
63
+ expect(batch.extract_from).to be_nil
61
64
  end
62
65
 
63
- it "should not return a new batch if the last batch ended in error" do
64
- batch = ETL::Batch.instance.start
65
- batch.error
66
- ETL::Batch.instance.should == batch
66
+ it "returns nil from extract_from when the first batch" do
67
+ batch = ETL::Batch.instance
68
+ batch.start
69
+ expect(batch.extract_from).to be_nil
70
+ end
71
+
72
+ it "returns the previous finised batch's extracted_to as extract_from" do
73
+ Timecop.freeze(2014, 01, 6, 0, 0, 0) {
74
+ ETL::Batch.new.start.finish
75
+ }
76
+
77
+ Timecop.freeze(2014, 01, 10, 0, 0, 0) {
78
+ ETL::Batch.new.start.finish
79
+ }
80
+
81
+ ETL::Batch.new.start.error
82
+
83
+ batch = ETL::Batch.new.start
84
+ expect(batch.extract_from).to eql(Time.local(2014,1,10,0,0,0))
85
+ end
86
+
87
+ it "returns the previous finised batch's extracted_to as extract_from" do
88
+ Timecop.freeze(2014, 01, 6, 0, 0, 0) {
89
+ ETL::Batch.new.start.finish
90
+ }
91
+
92
+ Timecop.freeze(2014, 01, 8, 0, 0, 0) {
93
+ ETL::Batch.new.start.error
94
+ }
95
+
96
+ Timecop.freeze(2014, 01, 10, 0, 0, 0) {
97
+ ETL::Batch.new.start.finish
98
+ }
99
+
100
+ batch = ETL::Batch.new.start
101
+ expect(batch.extract_from).to eql(Time.local(2014,1,10,0,0,0))
102
+ end
103
+
104
+ it "returns yesterday, rather than extract_from if extract_from is today" do
105
+ Timecop.freeze(2014, 01, 6, 0, 0, 0)
106
+
107
+ ETL::Batch.new.start.finish
108
+
109
+ batch = ETL::Batch.new.start
110
+ expect(batch.extract_from).to eql(Time.local(2014,1,5,0,0,0))
111
+ end
112
+
113
+ context "when rerun in the same day" do
114
+ it "should not return a new batch if the last batch was not finished" do
115
+ batch = ETL::Batch.instance.start
116
+ expect(ETL::Batch.instance).to eql(batch)
117
+ end
118
+
119
+ it "should not return a new batch if the last batch ended in error" do
120
+ batch = ETL::Batch.instance.start
121
+ batch.error
122
+ ETL::Batch.instance.should == batch
123
+ end
124
+ end
125
+
126
+ context "when rerun a day later" do
127
+ it "returns a new batch when the previous batch was unfinished" do
128
+ batch = ETL::Batch.instance.start
129
+ Timecop.freeze(Date.today + 1)
130
+ expect(ETL::Batch.instance).to_not eql(batch)
131
+ end
132
+
133
+ it "returns a new batch when the previous batch was in error" do
134
+ batch = ETL::Batch.instance.start
135
+ batch.error
136
+ Timecop.freeze(Date.today + 1)
137
+ expect(ETL::Batch.instance).to_not eql(batch)
138
+ end
67
139
  end
68
140
 
69
141
  it "should create a log in tmp/batches/1/log" do
@@ -22,4 +22,7 @@ end
22
22
  Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
23
23
 
24
24
  RSpec.configure do |config|
25
+ config.after :each do
26
+ Timecop.return
27
+ end
25
28
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chicago-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.7
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Roland Swingler
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-01 00:00:00.000000000 Z
11
+ date: 2014-09-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: chicagowarehouse
@@ -200,7 +200,6 @@ files:
200
200
  - Rakefile
201
201
  - VERSION
202
202
  - chicago-etl.gemspec
203
- - chicago-flow.gemspec
204
203
  - lib/chicago-etl.rb
205
204
  - lib/chicago/etl.rb
206
205
  - lib/chicago/etl/array_sink.rb
@@ -1,92 +0,0 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
- # -*- encoding: utf-8 -*-
5
-
6
- Gem::Specification.new do |s|
7
- s.name = "chicago-flow"
8
- s.version = "0.0.1"
9
-
10
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = ["Roland Swingler"]
12
- s.date = "2013-06-05"
13
- s.description = "Dataflow-style processing for hash-like rows"
14
- s.email = "roland.swingler@gmail.com"
15
- s.extra_rdoc_files = [
16
- "LICENSE.txt",
17
- "README.rdoc"
18
- ]
19
- s.files = [
20
- ".document",
21
- ".rspec",
22
- "Gemfile",
23
- "LICENSE.txt",
24
- "README.rdoc",
25
- "Rakefile",
26
- "VERSION",
27
- "lib/chicago/flow.rb",
28
- "lib/chicago/flow/array_sink.rb",
29
- "lib/chicago/flow/array_source.rb",
30
- "lib/chicago/flow/dataset_source.rb",
31
- "lib/chicago/flow/filter.rb",
32
- "lib/chicago/flow/mysql.rb",
33
- "lib/chicago/flow/mysql_file_serializer.rb",
34
- "lib/chicago/flow/mysql_file_sink.rb",
35
- "lib/chicago/flow/pipeline_endpoint.rb",
36
- "lib/chicago/flow/pipeline_stage.rb",
37
- "lib/chicago/flow/sink.rb",
38
- "lib/chicago/flow/transformation.rb",
39
- "lib/chicago/flow/transformation_chain.rb",
40
- "spec/array_sink_spec.rb",
41
- "spec/array_source_spec.rb",
42
- "spec/database.yml.dist",
43
- "spec/dataset_source_spec.rb",
44
- "spec/filter_spec.rb",
45
- "spec/mysql_file_serializer_spec.rb",
46
- "spec/mysql_file_sink_spec.rb",
47
- "spec/mysql_integration_spec.rb",
48
- "spec/pipeline_stage_spec.rb",
49
- "spec/spec_helper.rb",
50
- "spec/transformation_chain_spec.rb",
51
- "spec/transformation_spec.rb"
52
- ]
53
- s.homepage = "http://github.com/notonthehighstreet/chicago-flow"
54
- s.licenses = ["MIT"]
55
- s.require_paths = ["lib"]
56
- s.rubygems_version = "1.8.24"
57
- s.summary = "Dataflow-style processing for hash-like rows"
58
-
59
- if s.respond_to? :specification_version then
60
- s.specification_version = 3
61
-
62
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
63
- s.add_runtime_dependency(%q<fastercsv>, [">= 0"])
64
- s.add_runtime_dependency(%q<sequel>, [">= 0"])
65
- s.add_runtime_dependency(%q<sequel_load_data_infile>, [">= 0.0.2"])
66
- s.add_runtime_dependency(%q<sequel_fast_columns>, [">= 0"])
67
- s.add_development_dependency(%q<mysql>, ["= 2.8.1"])
68
- s.add_development_dependency(%q<rspec>, ["~> 2"])
69
- s.add_development_dependency(%q<bundler>, ["~> 1"])
70
- s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
71
- else
72
- s.add_dependency(%q<fastercsv>, [">= 0"])
73
- s.add_dependency(%q<sequel>, [">= 0"])
74
- s.add_dependency(%q<sequel_load_data_infile>, [">= 0.0.2"])
75
- s.add_dependency(%q<sequel_fast_columns>, [">= 0"])
76
- s.add_dependency(%q<mysql>, ["= 2.8.1"])
77
- s.add_dependency(%q<rspec>, ["~> 2"])
78
- s.add_dependency(%q<bundler>, ["~> 1"])
79
- s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
80
- end
81
- else
82
- s.add_dependency(%q<fastercsv>, [">= 0"])
83
- s.add_dependency(%q<sequel>, [">= 0"])
84
- s.add_dependency(%q<sequel_load_data_infile>, [">= 0.0.2"])
85
- s.add_dependency(%q<sequel_fast_columns>, [">= 0"])
86
- s.add_dependency(%q<mysql>, ["= 2.8.1"])
87
- s.add_dependency(%q<rspec>, ["~> 2"])
88
- s.add_dependency(%q<bundler>, ["~> 1"])
89
- s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
90
- end
91
- end
92
-