chicago-etl 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/chicago-etl.gemspec +2 -2
- data/lib/chicago/etl/batch.rb +21 -0
- data/lib/chicago/etl/transformations/deduplicate_rows.rb +30 -5
- metadata +184 -193
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.2
|
data/chicago-etl.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "chicago-etl"
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Roland Swingler"]
|
12
|
-
s.date = "2013-11-
|
12
|
+
s.date = "2013-11-13"
|
13
13
|
s.description = "ETL tools for Chicago"
|
14
14
|
s.email = "roland.swingler@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
data/lib/chicago/etl/batch.rb
CHANGED
@@ -31,6 +31,27 @@ module Chicago
|
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
+
# Deprecated.
|
35
|
+
#
|
36
|
+
# @deprecated Use perform_task instead
|
37
|
+
def load(task_name, &block)
|
38
|
+
perform_task(:load, task_name, &block)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Deprecated.
|
42
|
+
#
|
43
|
+
# @deprecated Use perform_task instead
|
44
|
+
def transform(task_name, &block)
|
45
|
+
perform_task(:extract, task_name, &block)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Deprecated.
|
49
|
+
#
|
50
|
+
# @deprecated Use perform_task instead
|
51
|
+
def extract(task_name, &block)
|
52
|
+
perform_task(:extract, task_name, &block)
|
53
|
+
end
|
54
|
+
|
34
55
|
# Performs a named task if it hasn't already run successfully in
|
35
56
|
# this batch.
|
36
57
|
def perform_task(stage, task_name, &block)
|
@@ -3,7 +3,7 @@ module Chicago
|
|
3
3
|
class DeduplicateRows < Chicago::Flow::Transformation
|
4
4
|
def process_row(row)
|
5
5
|
if @working_row.nil?
|
6
|
-
@working_row = row
|
6
|
+
@working_row = new_row(row)
|
7
7
|
return
|
8
8
|
elsif same_row?(row)
|
9
9
|
@working_row = merge_rows(row)
|
@@ -14,25 +14,50 @@ module Chicago
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def flush
|
17
|
-
@working_row.nil? ? [] : [@working_row]
|
17
|
+
@working_row.nil? ? [] : [return_row(@working_row)]
|
18
18
|
end
|
19
19
|
|
20
20
|
protected
|
21
21
|
|
22
|
-
|
22
|
+
# Returns the current working row.
|
23
|
+
attr_accessor :working_row
|
23
24
|
|
24
25
|
# This should be implemented by clients
|
25
26
|
def merge_rows(row)
|
27
|
+
row
|
26
28
|
end
|
27
29
|
|
28
|
-
#
|
30
|
+
# Called for every row to determine whether the row is part of
|
31
|
+
# the same group as the current working row.
|
32
|
+
#
|
33
|
+
# This should be implemented by clients. By default, all rows
|
34
|
+
# are considered different.
|
29
35
|
def same_row?(row)
|
30
36
|
end
|
31
37
|
|
38
|
+
# Called whenever a new row is detected.
|
39
|
+
#
|
40
|
+
# Default behavior is to return the row unmodified - this may be
|
41
|
+
# overridden by clients - if it is then the method should return
|
42
|
+
# a row.
|
43
|
+
def new_row(row)
|
44
|
+
row
|
45
|
+
end
|
46
|
+
|
47
|
+
# Called whenever a row is about to be returned downstream.
|
48
|
+
#
|
49
|
+
# Default behavior is to return the row unmodified - this may be
|
50
|
+
# overridden by clients - if it is then the method should return
|
51
|
+
# a row.
|
52
|
+
def return_row(row)
|
53
|
+
row
|
54
|
+
end
|
55
|
+
|
32
56
|
private
|
33
57
|
|
34
58
|
def assign_new_row_and_return_old_row(row)
|
35
|
-
row
|
59
|
+
row = return_row(@working_row)
|
60
|
+
@working_row = new_row(row)
|
36
61
|
row
|
37
62
|
end
|
38
63
|
end
|
metadata
CHANGED
@@ -1,248 +1,233 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: chicago-etl
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 31
|
5
5
|
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 2
|
10
|
+
version: 0.1.2
|
6
11
|
platform: ruby
|
7
|
-
authors:
|
12
|
+
authors:
|
8
13
|
- Roland Swingler
|
9
14
|
autorequire:
|
10
15
|
bindir: bin
|
11
16
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
+
|
18
|
+
date: 2013-11-13 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
version_requirements: &id001 !ruby/object:Gem::Requirement
|
17
22
|
none: false
|
18
|
-
requirements:
|
23
|
+
requirements:
|
19
24
|
- - ~>
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
hash: 3
|
27
|
+
segments:
|
28
|
+
- 0
|
29
|
+
- 4
|
30
|
+
version: "0.4"
|
31
|
+
requirement: *id001
|
22
32
|
type: :runtime
|
23
33
|
prerelease: false
|
24
|
-
|
34
|
+
name: chicagowarehouse
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
version_requirements: &id002 !ruby/object:Gem::Requirement
|
25
37
|
none: false
|
26
|
-
requirements:
|
27
|
-
- -
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
|
30
|
-
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
hash: 3
|
42
|
+
segments:
|
43
|
+
- 0
|
44
|
+
version: "0"
|
45
|
+
requirement: *id002
|
46
|
+
type: :runtime
|
47
|
+
prerelease: false
|
31
48
|
name: fastercsv
|
32
|
-
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
version_requirements: &id003 !ruby/object:Gem::Requirement
|
33
51
|
none: false
|
34
|
-
requirements:
|
35
|
-
- -
|
36
|
-
- !ruby/object:Gem::Version
|
37
|
-
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
hash: 3
|
56
|
+
segments:
|
57
|
+
- 0
|
58
|
+
version: "0"
|
59
|
+
requirement: *id003
|
38
60
|
type: :runtime
|
39
61
|
prerelease: false
|
40
|
-
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
|
-
requirements:
|
43
|
-
- - ! '>='
|
44
|
-
- !ruby/object:Gem::Version
|
45
|
-
version: '0'
|
46
|
-
- !ruby/object:Gem::Dependency
|
47
62
|
name: sequel
|
48
|
-
|
63
|
+
- !ruby/object:Gem::Dependency
|
64
|
+
version_requirements: &id004 !ruby/object:Gem::Requirement
|
49
65
|
none: false
|
50
|
-
requirements:
|
51
|
-
- -
|
52
|
-
- !ruby/object:Gem::Version
|
53
|
-
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
hash: 27
|
70
|
+
segments:
|
71
|
+
- 0
|
72
|
+
- 0
|
73
|
+
- 2
|
74
|
+
version: 0.0.2
|
75
|
+
requirement: *id004
|
54
76
|
type: :runtime
|
55
77
|
prerelease: false
|
56
|
-
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
|
-
requirements:
|
59
|
-
- - ! '>='
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
62
|
-
- !ruby/object:Gem::Dependency
|
63
78
|
name: sequel_load_data_infile
|
64
|
-
|
79
|
+
- !ruby/object:Gem::Dependency
|
80
|
+
version_requirements: &id005 !ruby/object:Gem::Requirement
|
65
81
|
none: false
|
66
|
-
requirements:
|
67
|
-
- -
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
hash: 3
|
86
|
+
segments:
|
87
|
+
- 0
|
88
|
+
version: "0"
|
89
|
+
requirement: *id005
|
70
90
|
type: :runtime
|
71
91
|
prerelease: false
|
72
|
-
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
|
-
requirements:
|
75
|
-
- - ! '>='
|
76
|
-
- !ruby/object:Gem::Version
|
77
|
-
version: 0.0.2
|
78
|
-
- !ruby/object:Gem::Dependency
|
79
92
|
name: sequel_fast_columns
|
80
|
-
|
93
|
+
- !ruby/object:Gem::Dependency
|
94
|
+
version_requirements: &id006 !ruby/object:Gem::Requirement
|
81
95
|
none: false
|
82
|
-
requirements:
|
83
|
-
- -
|
84
|
-
- !ruby/object:Gem::Version
|
85
|
-
|
86
|
-
|
96
|
+
requirements:
|
97
|
+
- - ~>
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
hash: 7
|
100
|
+
segments:
|
101
|
+
- 2
|
102
|
+
version: "2"
|
103
|
+
requirement: *id006
|
104
|
+
type: :development
|
87
105
|
prerelease: false
|
88
|
-
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
|
-
requirements:
|
91
|
-
- - ! '>='
|
92
|
-
- !ruby/object:Gem::Version
|
93
|
-
version: '0'
|
94
|
-
- !ruby/object:Gem::Dependency
|
95
106
|
name: rspec
|
96
|
-
|
107
|
+
- !ruby/object:Gem::Dependency
|
108
|
+
version_requirements: &id007 !ruby/object:Gem::Requirement
|
97
109
|
none: false
|
98
|
-
requirements:
|
99
|
-
- -
|
100
|
-
- !ruby/object:Gem::Version
|
101
|
-
|
110
|
+
requirements:
|
111
|
+
- - ">="
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
hash: 3
|
114
|
+
segments:
|
115
|
+
- 0
|
116
|
+
version: "0"
|
117
|
+
requirement: *id007
|
102
118
|
type: :development
|
103
119
|
prerelease: false
|
104
|
-
version_requirements: !ruby/object:Gem::Requirement
|
105
|
-
none: false
|
106
|
-
requirements:
|
107
|
-
- - ~>
|
108
|
-
- !ruby/object:Gem::Version
|
109
|
-
version: '2'
|
110
|
-
- !ruby/object:Gem::Dependency
|
111
120
|
name: timecop
|
112
|
-
|
121
|
+
- !ruby/object:Gem::Dependency
|
122
|
+
version_requirements: &id008 !ruby/object:Gem::Requirement
|
113
123
|
none: false
|
114
|
-
requirements:
|
115
|
-
- -
|
116
|
-
- !ruby/object:Gem::Version
|
117
|
-
|
124
|
+
requirements:
|
125
|
+
- - ">="
|
126
|
+
- !ruby/object:Gem::Version
|
127
|
+
hash: 3
|
128
|
+
segments:
|
129
|
+
- 0
|
130
|
+
version: "0"
|
131
|
+
requirement: *id008
|
118
132
|
type: :development
|
119
133
|
prerelease: false
|
120
|
-
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
none: false
|
122
|
-
requirements:
|
123
|
-
- - ! '>='
|
124
|
-
- !ruby/object:Gem::Version
|
125
|
-
version: '0'
|
126
|
-
- !ruby/object:Gem::Dependency
|
127
134
|
name: yard
|
128
|
-
|
135
|
+
- !ruby/object:Gem::Dependency
|
136
|
+
version_requirements: &id009 !ruby/object:Gem::Requirement
|
129
137
|
none: false
|
130
|
-
requirements:
|
131
|
-
- -
|
132
|
-
- !ruby/object:Gem::Version
|
133
|
-
|
138
|
+
requirements:
|
139
|
+
- - ">="
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
hash: 3
|
142
|
+
segments:
|
143
|
+
- 0
|
144
|
+
version: "0"
|
145
|
+
requirement: *id009
|
134
146
|
type: :development
|
135
147
|
prerelease: false
|
136
|
-
version_requirements: !ruby/object:Gem::Requirement
|
137
|
-
none: false
|
138
|
-
requirements:
|
139
|
-
- - ! '>='
|
140
|
-
- !ruby/object:Gem::Version
|
141
|
-
version: '0'
|
142
|
-
- !ruby/object:Gem::Dependency
|
143
148
|
name: flog
|
144
|
-
|
149
|
+
- !ruby/object:Gem::Dependency
|
150
|
+
version_requirements: &id010 !ruby/object:Gem::Requirement
|
145
151
|
none: false
|
146
|
-
requirements:
|
147
|
-
- -
|
148
|
-
- !ruby/object:Gem::Version
|
149
|
-
|
152
|
+
requirements:
|
153
|
+
- - ">="
|
154
|
+
- !ruby/object:Gem::Version
|
155
|
+
hash: 3
|
156
|
+
segments:
|
157
|
+
- 0
|
158
|
+
version: "0"
|
159
|
+
requirement: *id010
|
150
160
|
type: :development
|
151
161
|
prerelease: false
|
152
|
-
version_requirements: !ruby/object:Gem::Requirement
|
153
|
-
none: false
|
154
|
-
requirements:
|
155
|
-
- - ! '>='
|
156
|
-
- !ruby/object:Gem::Version
|
157
|
-
version: '0'
|
158
|
-
- !ruby/object:Gem::Dependency
|
159
162
|
name: simplecov
|
160
|
-
|
163
|
+
- !ruby/object:Gem::Dependency
|
164
|
+
version_requirements: &id011 !ruby/object:Gem::Requirement
|
161
165
|
none: false
|
162
|
-
requirements:
|
163
|
-
- -
|
164
|
-
- !ruby/object:Gem::Version
|
165
|
-
|
166
|
+
requirements:
|
167
|
+
- - ">="
|
168
|
+
- !ruby/object:Gem::Version
|
169
|
+
hash: 3
|
170
|
+
segments:
|
171
|
+
- 0
|
172
|
+
version: "0"
|
173
|
+
requirement: *id011
|
166
174
|
type: :development
|
167
175
|
prerelease: false
|
168
|
-
version_requirements: !ruby/object:Gem::Requirement
|
169
|
-
none: false
|
170
|
-
requirements:
|
171
|
-
- - ! '>='
|
172
|
-
- !ruby/object:Gem::Version
|
173
|
-
version: '0'
|
174
|
-
- !ruby/object:Gem::Dependency
|
175
176
|
name: ZenTest
|
176
|
-
|
177
|
+
- !ruby/object:Gem::Dependency
|
178
|
+
version_requirements: &id012 !ruby/object:Gem::Requirement
|
177
179
|
none: false
|
178
|
-
requirements:
|
179
|
-
- -
|
180
|
-
- !ruby/object:Gem::Version
|
181
|
-
|
180
|
+
requirements:
|
181
|
+
- - "="
|
182
|
+
- !ruby/object:Gem::Version
|
183
|
+
hash: 45
|
184
|
+
segments:
|
185
|
+
- 2
|
186
|
+
- 8
|
187
|
+
- 1
|
188
|
+
version: 2.8.1
|
189
|
+
requirement: *id012
|
182
190
|
type: :development
|
183
191
|
prerelease: false
|
184
|
-
version_requirements: !ruby/object:Gem::Requirement
|
185
|
-
none: false
|
186
|
-
requirements:
|
187
|
-
- - ! '>='
|
188
|
-
- !ruby/object:Gem::Version
|
189
|
-
version: '0'
|
190
|
-
- !ruby/object:Gem::Dependency
|
191
192
|
name: mysql
|
192
|
-
|
193
|
+
- !ruby/object:Gem::Dependency
|
194
|
+
version_requirements: &id013 !ruby/object:Gem::Requirement
|
193
195
|
none: false
|
194
|
-
requirements:
|
195
|
-
- -
|
196
|
-
- !ruby/object:Gem::Version
|
197
|
-
|
196
|
+
requirements:
|
197
|
+
- - ~>
|
198
|
+
- !ruby/object:Gem::Version
|
199
|
+
hash: 1
|
200
|
+
segments:
|
201
|
+
- 1
|
202
|
+
version: "1"
|
203
|
+
requirement: *id013
|
198
204
|
type: :development
|
199
205
|
prerelease: false
|
200
|
-
version_requirements: !ruby/object:Gem::Requirement
|
201
|
-
none: false
|
202
|
-
requirements:
|
203
|
-
- - '='
|
204
|
-
- !ruby/object:Gem::Version
|
205
|
-
version: 2.8.1
|
206
|
-
- !ruby/object:Gem::Dependency
|
207
206
|
name: bundler
|
208
|
-
|
207
|
+
- !ruby/object:Gem::Dependency
|
208
|
+
version_requirements: &id014 !ruby/object:Gem::Requirement
|
209
209
|
none: false
|
210
|
-
requirements:
|
211
|
-
- -
|
212
|
-
- !ruby/object:Gem::Version
|
213
|
-
|
210
|
+
requirements:
|
211
|
+
- - ">="
|
212
|
+
- !ruby/object:Gem::Version
|
213
|
+
hash: 3
|
214
|
+
segments:
|
215
|
+
- 0
|
216
|
+
version: "0"
|
217
|
+
requirement: *id014
|
214
218
|
type: :development
|
215
219
|
prerelease: false
|
216
|
-
version_requirements: !ruby/object:Gem::Requirement
|
217
|
-
none: false
|
218
|
-
requirements:
|
219
|
-
- - ~>
|
220
|
-
- !ruby/object:Gem::Version
|
221
|
-
version: '1'
|
222
|
-
- !ruby/object:Gem::Dependency
|
223
220
|
name: jeweler
|
224
|
-
requirement: !ruby/object:Gem::Requirement
|
225
|
-
none: false
|
226
|
-
requirements:
|
227
|
-
- - ! '>='
|
228
|
-
- !ruby/object:Gem::Version
|
229
|
-
version: '0'
|
230
|
-
type: :development
|
231
|
-
prerelease: false
|
232
|
-
version_requirements: !ruby/object:Gem::Requirement
|
233
|
-
none: false
|
234
|
-
requirements:
|
235
|
-
- - ! '>='
|
236
|
-
- !ruby/object:Gem::Version
|
237
|
-
version: '0'
|
238
221
|
description: ETL tools for Chicago
|
239
222
|
email: roland.swingler@gmail.com
|
240
223
|
executables: []
|
224
|
+
|
241
225
|
extensions: []
|
242
|
-
|
226
|
+
|
227
|
+
extra_rdoc_files:
|
243
228
|
- LICENSE.txt
|
244
229
|
- README.rdoc
|
245
|
-
files:
|
230
|
+
files:
|
246
231
|
- .document
|
247
232
|
- .rspec
|
248
233
|
- Gemfile
|
@@ -326,31 +311,37 @@ files:
|
|
326
311
|
- spec/flow/transformation_spec.rb
|
327
312
|
- spec/spec_helper.rb
|
328
313
|
homepage: http://github.com/notonthehighstreet/chicago-etl
|
329
|
-
licenses:
|
314
|
+
licenses:
|
330
315
|
- MIT
|
331
316
|
post_install_message:
|
332
317
|
rdoc_options: []
|
333
|
-
|
318
|
+
|
319
|
+
require_paths:
|
334
320
|
- lib
|
335
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
321
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
336
322
|
none: false
|
337
|
-
requirements:
|
338
|
-
- -
|
339
|
-
- !ruby/object:Gem::Version
|
340
|
-
|
341
|
-
segments:
|
323
|
+
requirements:
|
324
|
+
- - ">="
|
325
|
+
- !ruby/object:Gem::Version
|
326
|
+
hash: 3
|
327
|
+
segments:
|
342
328
|
- 0
|
343
|
-
|
344
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
329
|
+
version: "0"
|
330
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
345
331
|
none: false
|
346
|
-
requirements:
|
347
|
-
- -
|
348
|
-
- !ruby/object:Gem::Version
|
349
|
-
|
332
|
+
requirements:
|
333
|
+
- - ">="
|
334
|
+
- !ruby/object:Gem::Version
|
335
|
+
hash: 3
|
336
|
+
segments:
|
337
|
+
- 0
|
338
|
+
version: "0"
|
350
339
|
requirements: []
|
340
|
+
|
351
341
|
rubyforge_project:
|
352
342
|
rubygems_version: 1.8.25
|
353
343
|
signing_key:
|
354
344
|
specification_version: 3
|
355
345
|
summary: Chicago ETL
|
356
346
|
test_files: []
|
347
|
+
|