chicago-etl 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/VERSION +1 -1
- data/chicago-etl.gemspec +3 -4
- data/lib/chicago/etl/batch.rb +20 -1
- data/spec/etl/batch_spec.rb +79 -7
- data/spec/spec_helper.rb +3 -0
- metadata +2 -3
- data/chicago-flow.gemspec +0 -92
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NmMzMzcxMGE2ZjhmOTU5OTAyNmIzMDNiYjdmY2E5OGRjYTdiNDExYw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
OGM1OGM3NzBmMjA3OTA5NWQxNjcxMDQ3NDIzMmNkY2ZkYzI4MTBlNg==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MjhhODU2MDU0MmZmM2NmMzczMDEyZGI2MmVjMDAzYTI3MTEzYjZjMjZhOTI5
|
10
|
+
ODM2Zjc2YzYyY2Q3MGUzZWI4NjYxYzQ0ZDA1NGY2OThmNWM1OTBhMjI0N2Nj
|
11
|
+
NDIwYjRjMGI1YzI4NmJkNGIzOTRmMzk5OTk3YTY4ZmM2MDQ5NzM=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
N2RlM2JlYjc0MTRjMmRlMjhkMDUyYTlhZWZjMzU2YTA0YWI1M2FkYzFmNjEz
|
14
|
+
YzU5MDE1MTJjNGU4Mzc4ZWQzNzIxYzM3OTI5NzA3YTAzNTQ0ODhkODU2Njc1
|
15
|
+
ZTZmMWUyOWRiYjNhNGI5ZGM4YzFlZmNmYzRiZWQ0MWIwNmNkMTQ=
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
data/chicago-etl.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: chicago-etl 0.
|
5
|
+
# stub: chicago-etl 0.3.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "chicago-etl"
|
9
|
-
s.version = "0.
|
9
|
+
s.version = "0.3.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Roland Swingler"]
|
14
|
-
s.date = "2014-09-
|
14
|
+
s.date = "2014-09-08"
|
15
15
|
s.description = "ETL tools for Chicago"
|
16
16
|
s.email = "roland.swingler@gmail.com"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -27,7 +27,6 @@ Gem::Specification.new do |s|
|
|
27
27
|
"Rakefile",
|
28
28
|
"VERSION",
|
29
29
|
"chicago-etl.gemspec",
|
30
|
-
"chicago-flow.gemspec",
|
31
30
|
"lib/chicago-etl.rb",
|
32
31
|
"lib/chicago/etl.rb",
|
33
32
|
"lib/chicago/etl/array_sink.rb",
|
data/lib/chicago/etl/batch.rb
CHANGED
@@ -22,7 +22,11 @@ module Chicago
|
|
22
22
|
#
|
23
23
|
# This should be used in preference to new or create.
|
24
24
|
def instance
|
25
|
-
|
25
|
+
if last_batch.nil? || last_batch.finished? || last_batch.started_at.to_date < Date.today
|
26
|
+
new
|
27
|
+
else
|
28
|
+
last_batch
|
29
|
+
end
|
26
30
|
end
|
27
31
|
|
28
32
|
# Returns the last batch run, or nil if this is the first batch.
|
@@ -57,6 +61,21 @@ module Chicago
|
|
57
61
|
!!@reextract
|
58
62
|
end
|
59
63
|
|
64
|
+
def extract_from
|
65
|
+
return if reextracting?
|
66
|
+
value = self.class.dataset.
|
67
|
+
where(:state => "Finished").
|
68
|
+
where {|r| r.id < id }.
|
69
|
+
select(:max.sql_function(:extracted_to).as(:extracted_to)).
|
70
|
+
single_value
|
71
|
+
|
72
|
+
if value && value.to_date == Date.today
|
73
|
+
(value.to_date - 1).to_time
|
74
|
+
else
|
75
|
+
value
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
60
79
|
# Deprecated.
|
61
80
|
#
|
62
81
|
# @deprecated Use perform_task instead
|
data/spec/etl/batch_spec.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
+
require 'date'
|
2
3
|
|
3
4
|
describe Chicago::ETL::Batch do
|
4
5
|
before :each do
|
@@ -55,15 +56,86 @@ describe Chicago::ETL::Batch do
|
|
55
56
|
batch.should be_in_error
|
56
57
|
end
|
57
58
|
|
58
|
-
it "
|
59
|
-
batch = ETL::Batch.instance
|
60
|
-
|
59
|
+
it "returns nil from extract_from when re-extracting" do
|
60
|
+
batch = ETL::Batch.instance
|
61
|
+
batch.reextract
|
62
|
+
batch.start
|
63
|
+
expect(batch.extract_from).to be_nil
|
61
64
|
end
|
62
65
|
|
63
|
-
it "
|
64
|
-
batch = ETL::Batch.instance
|
65
|
-
batch.
|
66
|
-
|
66
|
+
it "returns nil from extract_from when the first batch" do
|
67
|
+
batch = ETL::Batch.instance
|
68
|
+
batch.start
|
69
|
+
expect(batch.extract_from).to be_nil
|
70
|
+
end
|
71
|
+
|
72
|
+
it "returns the previous finised batch's extracted_to as extract_from" do
|
73
|
+
Timecop.freeze(2014, 01, 6, 0, 0, 0) {
|
74
|
+
ETL::Batch.new.start.finish
|
75
|
+
}
|
76
|
+
|
77
|
+
Timecop.freeze(2014, 01, 10, 0, 0, 0) {
|
78
|
+
ETL::Batch.new.start.finish
|
79
|
+
}
|
80
|
+
|
81
|
+
ETL::Batch.new.start.error
|
82
|
+
|
83
|
+
batch = ETL::Batch.new.start
|
84
|
+
expect(batch.extract_from).to eql(Time.local(2014,1,10,0,0,0))
|
85
|
+
end
|
86
|
+
|
87
|
+
it "returns the previous finised batch's extracted_to as extract_from" do
|
88
|
+
Timecop.freeze(2014, 01, 6, 0, 0, 0) {
|
89
|
+
ETL::Batch.new.start.finish
|
90
|
+
}
|
91
|
+
|
92
|
+
Timecop.freeze(2014, 01, 8, 0, 0, 0) {
|
93
|
+
ETL::Batch.new.start.error
|
94
|
+
}
|
95
|
+
|
96
|
+
Timecop.freeze(2014, 01, 10, 0, 0, 0) {
|
97
|
+
ETL::Batch.new.start.finish
|
98
|
+
}
|
99
|
+
|
100
|
+
batch = ETL::Batch.new.start
|
101
|
+
expect(batch.extract_from).to eql(Time.local(2014,1,10,0,0,0))
|
102
|
+
end
|
103
|
+
|
104
|
+
it "returns yesterday, rather than extract_from if extract_from is today" do
|
105
|
+
Timecop.freeze(2014, 01, 6, 0, 0, 0)
|
106
|
+
|
107
|
+
ETL::Batch.new.start.finish
|
108
|
+
|
109
|
+
batch = ETL::Batch.new.start
|
110
|
+
expect(batch.extract_from).to eql(Time.local(2014,1,5,0,0,0))
|
111
|
+
end
|
112
|
+
|
113
|
+
context "when rerun in the same day" do
|
114
|
+
it "should not return a new batch if the last batch was not finished" do
|
115
|
+
batch = ETL::Batch.instance.start
|
116
|
+
expect(ETL::Batch.instance).to eql(batch)
|
117
|
+
end
|
118
|
+
|
119
|
+
it "should not return a new batch if the last batch ended in error" do
|
120
|
+
batch = ETL::Batch.instance.start
|
121
|
+
batch.error
|
122
|
+
ETL::Batch.instance.should == batch
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
context "when rerun a day later" do
|
127
|
+
it "returns a new batch when the previous batch was unfinished" do
|
128
|
+
batch = ETL::Batch.instance.start
|
129
|
+
Timecop.freeze(Date.today + 1)
|
130
|
+
expect(ETL::Batch.instance).to_not eql(batch)
|
131
|
+
end
|
132
|
+
|
133
|
+
it "returns a new batch when the previous batch was in error" do
|
134
|
+
batch = ETL::Batch.instance.start
|
135
|
+
batch.error
|
136
|
+
Timecop.freeze(Date.today + 1)
|
137
|
+
expect(ETL::Batch.instance).to_not eql(batch)
|
138
|
+
end
|
67
139
|
end
|
68
140
|
|
69
141
|
it "should create a log in tmp/batches/1/log" do
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chicago-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Roland Swingler
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: chicagowarehouse
|
@@ -200,7 +200,6 @@ files:
|
|
200
200
|
- Rakefile
|
201
201
|
- VERSION
|
202
202
|
- chicago-etl.gemspec
|
203
|
-
- chicago-flow.gemspec
|
204
203
|
- lib/chicago-etl.rb
|
205
204
|
- lib/chicago/etl.rb
|
206
205
|
- lib/chicago/etl/array_sink.rb
|
data/chicago-flow.gemspec
DELETED
@@ -1,92 +0,0 @@
|
|
1
|
-
# Generated by jeweler
|
2
|
-
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
-
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
-
# -*- encoding: utf-8 -*-
|
5
|
-
|
6
|
-
Gem::Specification.new do |s|
|
7
|
-
s.name = "chicago-flow"
|
8
|
-
s.version = "0.0.1"
|
9
|
-
|
10
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
-
s.authors = ["Roland Swingler"]
|
12
|
-
s.date = "2013-06-05"
|
13
|
-
s.description = "Dataflow-style processing for hash-like rows"
|
14
|
-
s.email = "roland.swingler@gmail.com"
|
15
|
-
s.extra_rdoc_files = [
|
16
|
-
"LICENSE.txt",
|
17
|
-
"README.rdoc"
|
18
|
-
]
|
19
|
-
s.files = [
|
20
|
-
".document",
|
21
|
-
".rspec",
|
22
|
-
"Gemfile",
|
23
|
-
"LICENSE.txt",
|
24
|
-
"README.rdoc",
|
25
|
-
"Rakefile",
|
26
|
-
"VERSION",
|
27
|
-
"lib/chicago/flow.rb",
|
28
|
-
"lib/chicago/flow/array_sink.rb",
|
29
|
-
"lib/chicago/flow/array_source.rb",
|
30
|
-
"lib/chicago/flow/dataset_source.rb",
|
31
|
-
"lib/chicago/flow/filter.rb",
|
32
|
-
"lib/chicago/flow/mysql.rb",
|
33
|
-
"lib/chicago/flow/mysql_file_serializer.rb",
|
34
|
-
"lib/chicago/flow/mysql_file_sink.rb",
|
35
|
-
"lib/chicago/flow/pipeline_endpoint.rb",
|
36
|
-
"lib/chicago/flow/pipeline_stage.rb",
|
37
|
-
"lib/chicago/flow/sink.rb",
|
38
|
-
"lib/chicago/flow/transformation.rb",
|
39
|
-
"lib/chicago/flow/transformation_chain.rb",
|
40
|
-
"spec/array_sink_spec.rb",
|
41
|
-
"spec/array_source_spec.rb",
|
42
|
-
"spec/database.yml.dist",
|
43
|
-
"spec/dataset_source_spec.rb",
|
44
|
-
"spec/filter_spec.rb",
|
45
|
-
"spec/mysql_file_serializer_spec.rb",
|
46
|
-
"spec/mysql_file_sink_spec.rb",
|
47
|
-
"spec/mysql_integration_spec.rb",
|
48
|
-
"spec/pipeline_stage_spec.rb",
|
49
|
-
"spec/spec_helper.rb",
|
50
|
-
"spec/transformation_chain_spec.rb",
|
51
|
-
"spec/transformation_spec.rb"
|
52
|
-
]
|
53
|
-
s.homepage = "http://github.com/notonthehighstreet/chicago-flow"
|
54
|
-
s.licenses = ["MIT"]
|
55
|
-
s.require_paths = ["lib"]
|
56
|
-
s.rubygems_version = "1.8.24"
|
57
|
-
s.summary = "Dataflow-style processing for hash-like rows"
|
58
|
-
|
59
|
-
if s.respond_to? :specification_version then
|
60
|
-
s.specification_version = 3
|
61
|
-
|
62
|
-
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
63
|
-
s.add_runtime_dependency(%q<fastercsv>, [">= 0"])
|
64
|
-
s.add_runtime_dependency(%q<sequel>, [">= 0"])
|
65
|
-
s.add_runtime_dependency(%q<sequel_load_data_infile>, [">= 0.0.2"])
|
66
|
-
s.add_runtime_dependency(%q<sequel_fast_columns>, [">= 0"])
|
67
|
-
s.add_development_dependency(%q<mysql>, ["= 2.8.1"])
|
68
|
-
s.add_development_dependency(%q<rspec>, ["~> 2"])
|
69
|
-
s.add_development_dependency(%q<bundler>, ["~> 1"])
|
70
|
-
s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
|
71
|
-
else
|
72
|
-
s.add_dependency(%q<fastercsv>, [">= 0"])
|
73
|
-
s.add_dependency(%q<sequel>, [">= 0"])
|
74
|
-
s.add_dependency(%q<sequel_load_data_infile>, [">= 0.0.2"])
|
75
|
-
s.add_dependency(%q<sequel_fast_columns>, [">= 0"])
|
76
|
-
s.add_dependency(%q<mysql>, ["= 2.8.1"])
|
77
|
-
s.add_dependency(%q<rspec>, ["~> 2"])
|
78
|
-
s.add_dependency(%q<bundler>, ["~> 1"])
|
79
|
-
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
80
|
-
end
|
81
|
-
else
|
82
|
-
s.add_dependency(%q<fastercsv>, [">= 0"])
|
83
|
-
s.add_dependency(%q<sequel>, [">= 0"])
|
84
|
-
s.add_dependency(%q<sequel_load_data_infile>, [">= 0.0.2"])
|
85
|
-
s.add_dependency(%q<sequel_fast_columns>, [">= 0"])
|
86
|
-
s.add_dependency(%q<mysql>, ["= 2.8.1"])
|
87
|
-
s.add_dependency(%q<rspec>, ["~> 2"])
|
88
|
-
s.add_dependency(%q<bundler>, ["~> 1"])
|
89
|
-
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|