chicago-etl 0.2.7 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/VERSION +1 -1
- data/chicago-etl.gemspec +3 -4
- data/lib/chicago/etl/batch.rb +20 -1
- data/spec/etl/batch_spec.rb +79 -7
- data/spec/spec_helper.rb +3 -0
- metadata +2 -3
- data/chicago-flow.gemspec +0 -92
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NmMzMzcxMGE2ZjhmOTU5OTAyNmIzMDNiYjdmY2E5OGRjYTdiNDExYw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
OGM1OGM3NzBmMjA3OTA5NWQxNjcxMDQ3NDIzMmNkY2ZkYzI4MTBlNg==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MjhhODU2MDU0MmZmM2NmMzczMDEyZGI2MmVjMDAzYTI3MTEzYjZjMjZhOTI5
|
10
|
+
ODM2Zjc2YzYyY2Q3MGUzZWI4NjYxYzQ0ZDA1NGY2OThmNWM1OTBhMjI0N2Nj
|
11
|
+
NDIwYjRjMGI1YzI4NmJkNGIzOTRmMzk5OTk3YTY4ZmM2MDQ5NzM=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
N2RlM2JlYjc0MTRjMmRlMjhkMDUyYTlhZWZjMzU2YTA0YWI1M2FkYzFmNjEz
|
14
|
+
YzU5MDE1MTJjNGU4Mzc4ZWQzNzIxYzM3OTI5NzA3YTAzNTQ0ODhkODU2Njc1
|
15
|
+
ZTZmMWUyOWRiYjNhNGI5ZGM4YzFlZmNmYzRiZWQ0MWIwNmNkMTQ=
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
data/chicago-etl.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: chicago-etl 0.
|
5
|
+
# stub: chicago-etl 0.3.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "chicago-etl"
|
9
|
-
s.version = "0.
|
9
|
+
s.version = "0.3.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Roland Swingler"]
|
14
|
-
s.date = "2014-09-
|
14
|
+
s.date = "2014-09-08"
|
15
15
|
s.description = "ETL tools for Chicago"
|
16
16
|
s.email = "roland.swingler@gmail.com"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -27,7 +27,6 @@ Gem::Specification.new do |s|
|
|
27
27
|
"Rakefile",
|
28
28
|
"VERSION",
|
29
29
|
"chicago-etl.gemspec",
|
30
|
-
"chicago-flow.gemspec",
|
31
30
|
"lib/chicago-etl.rb",
|
32
31
|
"lib/chicago/etl.rb",
|
33
32
|
"lib/chicago/etl/array_sink.rb",
|
data/lib/chicago/etl/batch.rb
CHANGED
@@ -22,7 +22,11 @@ module Chicago
|
|
22
22
|
#
|
23
23
|
# This should be used in preference to new or create.
|
24
24
|
def instance
|
25
|
-
|
25
|
+
if last_batch.nil? || last_batch.finished? || last_batch.started_at.to_date < Date.today
|
26
|
+
new
|
27
|
+
else
|
28
|
+
last_batch
|
29
|
+
end
|
26
30
|
end
|
27
31
|
|
28
32
|
# Returns the last batch run, or nil if this is the first batch.
|
@@ -57,6 +61,21 @@ module Chicago
|
|
57
61
|
!!@reextract
|
58
62
|
end
|
59
63
|
|
64
|
+
def extract_from
|
65
|
+
return if reextracting?
|
66
|
+
value = self.class.dataset.
|
67
|
+
where(:state => "Finished").
|
68
|
+
where {|r| r.id < id }.
|
69
|
+
select(:max.sql_function(:extracted_to).as(:extracted_to)).
|
70
|
+
single_value
|
71
|
+
|
72
|
+
if value && value.to_date == Date.today
|
73
|
+
(value.to_date - 1).to_time
|
74
|
+
else
|
75
|
+
value
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
60
79
|
# Deprecated.
|
61
80
|
#
|
62
81
|
# @deprecated Use perform_task instead
|
data/spec/etl/batch_spec.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
+
require 'date'
|
2
3
|
|
3
4
|
describe Chicago::ETL::Batch do
|
4
5
|
before :each do
|
@@ -55,15 +56,86 @@ describe Chicago::ETL::Batch do
|
|
55
56
|
batch.should be_in_error
|
56
57
|
end
|
57
58
|
|
58
|
-
it "
|
59
|
-
batch = ETL::Batch.instance
|
60
|
-
|
59
|
+
it "returns nil from extract_from when re-extracting" do
|
60
|
+
batch = ETL::Batch.instance
|
61
|
+
batch.reextract
|
62
|
+
batch.start
|
63
|
+
expect(batch.extract_from).to be_nil
|
61
64
|
end
|
62
65
|
|
63
|
-
it "
|
64
|
-
batch = ETL::Batch.instance
|
65
|
-
batch.
|
66
|
-
|
66
|
+
it "returns nil from extract_from when the first batch" do
|
67
|
+
batch = ETL::Batch.instance
|
68
|
+
batch.start
|
69
|
+
expect(batch.extract_from).to be_nil
|
70
|
+
end
|
71
|
+
|
72
|
+
it "returns the previous finised batch's extracted_to as extract_from" do
|
73
|
+
Timecop.freeze(2014, 01, 6, 0, 0, 0) {
|
74
|
+
ETL::Batch.new.start.finish
|
75
|
+
}
|
76
|
+
|
77
|
+
Timecop.freeze(2014, 01, 10, 0, 0, 0) {
|
78
|
+
ETL::Batch.new.start.finish
|
79
|
+
}
|
80
|
+
|
81
|
+
ETL::Batch.new.start.error
|
82
|
+
|
83
|
+
batch = ETL::Batch.new.start
|
84
|
+
expect(batch.extract_from).to eql(Time.local(2014,1,10,0,0,0))
|
85
|
+
end
|
86
|
+
|
87
|
+
it "returns the previous finised batch's extracted_to as extract_from" do
|
88
|
+
Timecop.freeze(2014, 01, 6, 0, 0, 0) {
|
89
|
+
ETL::Batch.new.start.finish
|
90
|
+
}
|
91
|
+
|
92
|
+
Timecop.freeze(2014, 01, 8, 0, 0, 0) {
|
93
|
+
ETL::Batch.new.start.error
|
94
|
+
}
|
95
|
+
|
96
|
+
Timecop.freeze(2014, 01, 10, 0, 0, 0) {
|
97
|
+
ETL::Batch.new.start.finish
|
98
|
+
}
|
99
|
+
|
100
|
+
batch = ETL::Batch.new.start
|
101
|
+
expect(batch.extract_from).to eql(Time.local(2014,1,10,0,0,0))
|
102
|
+
end
|
103
|
+
|
104
|
+
it "returns yesterday, rather than extract_from if extract_from is today" do
|
105
|
+
Timecop.freeze(2014, 01, 6, 0, 0, 0)
|
106
|
+
|
107
|
+
ETL::Batch.new.start.finish
|
108
|
+
|
109
|
+
batch = ETL::Batch.new.start
|
110
|
+
expect(batch.extract_from).to eql(Time.local(2014,1,5,0,0,0))
|
111
|
+
end
|
112
|
+
|
113
|
+
context "when rerun in the same day" do
|
114
|
+
it "should not return a new batch if the last batch was not finished" do
|
115
|
+
batch = ETL::Batch.instance.start
|
116
|
+
expect(ETL::Batch.instance).to eql(batch)
|
117
|
+
end
|
118
|
+
|
119
|
+
it "should not return a new batch if the last batch ended in error" do
|
120
|
+
batch = ETL::Batch.instance.start
|
121
|
+
batch.error
|
122
|
+
ETL::Batch.instance.should == batch
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
context "when rerun a day later" do
|
127
|
+
it "returns a new batch when the previous batch was unfinished" do
|
128
|
+
batch = ETL::Batch.instance.start
|
129
|
+
Timecop.freeze(Date.today + 1)
|
130
|
+
expect(ETL::Batch.instance).to_not eql(batch)
|
131
|
+
end
|
132
|
+
|
133
|
+
it "returns a new batch when the previous batch was in error" do
|
134
|
+
batch = ETL::Batch.instance.start
|
135
|
+
batch.error
|
136
|
+
Timecop.freeze(Date.today + 1)
|
137
|
+
expect(ETL::Batch.instance).to_not eql(batch)
|
138
|
+
end
|
67
139
|
end
|
68
140
|
|
69
141
|
it "should create a log in tmp/batches/1/log" do
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chicago-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Roland Swingler
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: chicagowarehouse
|
@@ -200,7 +200,6 @@ files:
|
|
200
200
|
- Rakefile
|
201
201
|
- VERSION
|
202
202
|
- chicago-etl.gemspec
|
203
|
-
- chicago-flow.gemspec
|
204
203
|
- lib/chicago-etl.rb
|
205
204
|
- lib/chicago/etl.rb
|
206
205
|
- lib/chicago/etl/array_sink.rb
|
data/chicago-flow.gemspec
DELETED
@@ -1,92 +0,0 @@
|
|
1
|
-
# Generated by jeweler
|
2
|
-
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
-
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
-
# -*- encoding: utf-8 -*-
|
5
|
-
|
6
|
-
Gem::Specification.new do |s|
|
7
|
-
s.name = "chicago-flow"
|
8
|
-
s.version = "0.0.1"
|
9
|
-
|
10
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
-
s.authors = ["Roland Swingler"]
|
12
|
-
s.date = "2013-06-05"
|
13
|
-
s.description = "Dataflow-style processing for hash-like rows"
|
14
|
-
s.email = "roland.swingler@gmail.com"
|
15
|
-
s.extra_rdoc_files = [
|
16
|
-
"LICENSE.txt",
|
17
|
-
"README.rdoc"
|
18
|
-
]
|
19
|
-
s.files = [
|
20
|
-
".document",
|
21
|
-
".rspec",
|
22
|
-
"Gemfile",
|
23
|
-
"LICENSE.txt",
|
24
|
-
"README.rdoc",
|
25
|
-
"Rakefile",
|
26
|
-
"VERSION",
|
27
|
-
"lib/chicago/flow.rb",
|
28
|
-
"lib/chicago/flow/array_sink.rb",
|
29
|
-
"lib/chicago/flow/array_source.rb",
|
30
|
-
"lib/chicago/flow/dataset_source.rb",
|
31
|
-
"lib/chicago/flow/filter.rb",
|
32
|
-
"lib/chicago/flow/mysql.rb",
|
33
|
-
"lib/chicago/flow/mysql_file_serializer.rb",
|
34
|
-
"lib/chicago/flow/mysql_file_sink.rb",
|
35
|
-
"lib/chicago/flow/pipeline_endpoint.rb",
|
36
|
-
"lib/chicago/flow/pipeline_stage.rb",
|
37
|
-
"lib/chicago/flow/sink.rb",
|
38
|
-
"lib/chicago/flow/transformation.rb",
|
39
|
-
"lib/chicago/flow/transformation_chain.rb",
|
40
|
-
"spec/array_sink_spec.rb",
|
41
|
-
"spec/array_source_spec.rb",
|
42
|
-
"spec/database.yml.dist",
|
43
|
-
"spec/dataset_source_spec.rb",
|
44
|
-
"spec/filter_spec.rb",
|
45
|
-
"spec/mysql_file_serializer_spec.rb",
|
46
|
-
"spec/mysql_file_sink_spec.rb",
|
47
|
-
"spec/mysql_integration_spec.rb",
|
48
|
-
"spec/pipeline_stage_spec.rb",
|
49
|
-
"spec/spec_helper.rb",
|
50
|
-
"spec/transformation_chain_spec.rb",
|
51
|
-
"spec/transformation_spec.rb"
|
52
|
-
]
|
53
|
-
s.homepage = "http://github.com/notonthehighstreet/chicago-flow"
|
54
|
-
s.licenses = ["MIT"]
|
55
|
-
s.require_paths = ["lib"]
|
56
|
-
s.rubygems_version = "1.8.24"
|
57
|
-
s.summary = "Dataflow-style processing for hash-like rows"
|
58
|
-
|
59
|
-
if s.respond_to? :specification_version then
|
60
|
-
s.specification_version = 3
|
61
|
-
|
62
|
-
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
63
|
-
s.add_runtime_dependency(%q<fastercsv>, [">= 0"])
|
64
|
-
s.add_runtime_dependency(%q<sequel>, [">= 0"])
|
65
|
-
s.add_runtime_dependency(%q<sequel_load_data_infile>, [">= 0.0.2"])
|
66
|
-
s.add_runtime_dependency(%q<sequel_fast_columns>, [">= 0"])
|
67
|
-
s.add_development_dependency(%q<mysql>, ["= 2.8.1"])
|
68
|
-
s.add_development_dependency(%q<rspec>, ["~> 2"])
|
69
|
-
s.add_development_dependency(%q<bundler>, ["~> 1"])
|
70
|
-
s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
|
71
|
-
else
|
72
|
-
s.add_dependency(%q<fastercsv>, [">= 0"])
|
73
|
-
s.add_dependency(%q<sequel>, [">= 0"])
|
74
|
-
s.add_dependency(%q<sequel_load_data_infile>, [">= 0.0.2"])
|
75
|
-
s.add_dependency(%q<sequel_fast_columns>, [">= 0"])
|
76
|
-
s.add_dependency(%q<mysql>, ["= 2.8.1"])
|
77
|
-
s.add_dependency(%q<rspec>, ["~> 2"])
|
78
|
-
s.add_dependency(%q<bundler>, ["~> 1"])
|
79
|
-
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
80
|
-
end
|
81
|
-
else
|
82
|
-
s.add_dependency(%q<fastercsv>, [">= 0"])
|
83
|
-
s.add_dependency(%q<sequel>, [">= 0"])
|
84
|
-
s.add_dependency(%q<sequel_load_data_infile>, [">= 0.0.2"])
|
85
|
-
s.add_dependency(%q<sequel_fast_columns>, [">= 0"])
|
86
|
-
s.add_dependency(%q<mysql>, ["= 2.8.1"])
|
87
|
-
s.add_dependency(%q<rspec>, ["~> 2"])
|
88
|
-
s.add_dependency(%q<bundler>, ["~> 1"])
|
89
|
-
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|