ETL 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +60 -29
- data/examples/basic_etl.rb +115 -0
- data/examples/iterator_etl.rb +209 -0
- data/lib/etl.rb +14 -2
- data/lib/etl/helpers.rb +1 -1
- data/lib/etl/version.rb +1 -1
- data/spec/etl_spec.rb +19 -0
- metadata +4 -2
data/README.md
CHANGED
@@ -35,6 +35,16 @@ To run a basic ETL that is composed of sequential SQL statements, start by
|
|
35
35
|
creating a new ETL instance:
|
36
36
|
|
37
37
|
```ruby
|
38
|
+
# setting connection at the class level
|
39
|
+
ETL.connection = connection
|
40
|
+
|
41
|
+
etl = ETL.new(description: "a description of what this ETL does")
|
42
|
+
```
|
43
|
+
|
44
|
+
or
|
45
|
+
|
46
|
+
```ruby
|
47
|
+
# setting connection at the instance level
|
38
48
|
etl = ETL.new(description: "a description of what this ETL does",
|
39
49
|
connection: connection)
|
40
50
|
```
|
@@ -50,13 +60,12 @@ etl.config do |etl|
|
|
50
60
|
#
|
51
61
|
etl.query %[
|
52
62
|
CREATE TABLE IF NOT EXISTS some_database.some_destination_table (
|
53
|
-
|
54
|
-
created_date DATE NOT NULL
|
55
|
-
total_amount INT SIGNED NOT NULL
|
56
|
-
message VARCHAR(100) DEFAULT NULL
|
57
|
-
PRIMARY KEY (user_id)
|
58
|
-
KEY (
|
59
|
-
KEY (created_date)
|
63
|
+
user_id INT UNSIGNED NOT NULL
|
64
|
+
, created_date DATE NOT NULL
|
65
|
+
, total_amount INT SIGNED NOT NULL
|
66
|
+
, message VARCHAR(100) DEFAULT NULL
|
67
|
+
, PRIMARY KEY (user_id, created_date)
|
68
|
+
, KEY (created_date)
|
60
69
|
)]
|
61
70
|
end
|
62
71
|
|
@@ -81,8 +90,11 @@ etl.config do |etl|
|
|
81
90
|
# For example:
|
82
91
|
#
|
83
92
|
etl.query %[
|
84
|
-
REPLACE INTO some_database.some_destination_table
|
85
|
-
|
93
|
+
REPLACE INTO some_database.some_destination_table (
|
94
|
+
user_id
|
95
|
+
, created_date
|
96
|
+
, total_amount
|
97
|
+
) SELECT
|
86
98
|
user_id
|
87
99
|
, DATE(created_at) AS created_date
|
88
100
|
, SUM(amount) AS total_amount
|
@@ -90,7 +102,7 @@ etl.config do |etl|
|
|
90
102
|
some_database.some_source_table sst
|
91
103
|
GROUP BY
|
92
104
|
sst.user_id
|
93
|
-
, sst.
|
105
|
+
, DATE(sst.created_at)]
|
94
106
|
end
|
95
107
|
|
96
108
|
etl.after_etl do |etl|
|
@@ -142,13 +154,12 @@ etl.config do |etl|
|
|
142
154
|
#
|
143
155
|
etl.query %[
|
144
156
|
CREATE TABLE IF NOT EXISTS some_database.some_destination_table (
|
145
|
-
|
146
|
-
created_date DATE NOT NULL
|
147
|
-
total_amount INT SIGNED NOT NULL
|
148
|
-
message VARCHAR(100) DEFAULT NULL
|
149
|
-
PRIMARY KEY (user_id)
|
150
|
-
KEY (
|
151
|
-
KEY (created_date)
|
157
|
+
user_id INT UNSIGNED NOT NULL
|
158
|
+
, created_date DATE NOT NULL
|
159
|
+
, total_amount INT SIGNED NOT NULL
|
160
|
+
, message VARCHAR(100) DEFAULT NULL
|
161
|
+
, PRIMARY KEY (user_id, created_date)
|
162
|
+
, KEY (created_date)
|
152
163
|
)]
|
153
164
|
end
|
154
165
|
|
@@ -177,8 +188,11 @@ etl.config do |etl|
|
|
177
188
|
#
|
178
189
|
# As an example:
|
179
190
|
#
|
191
|
+
# Note that we cast the default date as a DATE. If we don't, it will be
|
192
|
+
# treated as a string and our iterator will fail under the hood when testing
|
193
|
+
# if it is complete.
|
180
194
|
res = etl.query %[
|
181
|
-
SELECT COALESCE(MAX(created_date), '
|
195
|
+
SELECT COALESCE(MAX(created_date), DATE('2010-01-01')) AS the_max
|
182
196
|
FROM some_database.some_destination_table]
|
183
197
|
|
184
198
|
res.to_a.first['the_max']
|
@@ -195,7 +209,7 @@ etl.config do |etl|
|
|
195
209
|
#
|
196
210
|
# As an example, to iterate 7 days at a time:
|
197
211
|
#
|
198
|
-
7
|
212
|
+
7
|
199
213
|
end
|
200
214
|
|
201
215
|
etl.stop do |etl|
|
@@ -237,24 +251,33 @@ etl.config do |etl|
|
|
237
251
|
# As a first example, to iterate over a set of ids:
|
238
252
|
#
|
239
253
|
# etl.query %[
|
240
|
-
# REPLACE INTO some_database.some_destination_table
|
241
|
-
#
|
242
|
-
#
|
243
|
-
# ,
|
254
|
+
# REPLACE INTO some_database.some_destination_table (
|
255
|
+
# created_date
|
256
|
+
# , user_id
|
257
|
+
# , total_amount
|
258
|
+
# ) SELECT
|
259
|
+
# DATE(sst.created_at) AS created_date
|
260
|
+
# , sst.user_id
|
261
|
+
# , SUM(sst.amount) AS total_amount
|
244
262
|
# FROM
|
245
263
|
# some_database.some_source_table sst
|
246
264
|
# WHERE
|
247
265
|
# sst.user_id > #{lbound} AND sst.user_id <= #{ubound}
|
248
266
|
# GROUP BY
|
249
|
-
#
|
267
|
+
# DATE(sst.created_at)
|
268
|
+
# , sst.user_id]
|
250
269
|
#
|
251
270
|
# To "window" a SQL query using dates:
|
252
271
|
#
|
253
272
|
etl.query %[
|
254
|
-
REPLACE INTO some_database.some_destination_table
|
255
|
-
|
256
|
-
|
257
|
-
,
|
273
|
+
REPLACE INTO some_database.some_destination_table (
|
274
|
+
created_date
|
275
|
+
, user_id
|
276
|
+
, total_amount
|
277
|
+
) SELECT
|
278
|
+
DATE(sst.created_at) AS created_date
|
279
|
+
, sst.user_id
|
280
|
+
, SUM(sst.amount) AS total_amount
|
258
281
|
FROM
|
259
282
|
some_database.some_source_table sst
|
260
283
|
WHERE
|
@@ -262,7 +285,8 @@ etl.config do |etl|
|
|
262
285
|
-- This is is required when dealing with dates / datetimes
|
263
286
|
sst.created_at >= '#{lbound}' AND sst.created_at < '#{ubound}'
|
264
287
|
GROUP BY
|
265
|
-
|
288
|
+
DATE(sst.created_at)
|
289
|
+
, sst.user_id]
|
266
290
|
|
267
291
|
# Note that there is no sql sanitization here so there is *potential* for SQL
|
268
292
|
# injection. That being said you'll likely be using this gem in an internal
|
@@ -295,6 +319,13 @@ Note that `#etl` executes `#start` and `#stop` once and memoizes the result for
|
|
295
319
|
each. It then begins to iterate from what `#start` evaluated to up until what `#stop`
|
296
320
|
evaluated to by what `#step` evaluates to.
|
297
321
|
|
322
|
+
## Examples
|
323
|
+
|
324
|
+
There are two examples found in `./examples` that demonstrate the basic ETL and
|
325
|
+
iteration ETL. Each file uses the [mysql2](https://github.com/brianmario/mysql2)
|
326
|
+
gem and reads / writes data to localhost using the root user with no password.
|
327
|
+
Adjust as needed.
|
328
|
+
|
298
329
|
## Logger Details
|
299
330
|
|
300
331
|
A logger must support two methods: `#info` and `#warn`.
|
@@ -0,0 +1,115 @@
|
|
1
|
+
require 'mysql2'
|
2
|
+
require 'ETL'
|
3
|
+
|
4
|
+
connection = Mysql2::Client.new host: 'localhost',
|
5
|
+
username: 'root',
|
6
|
+
password: '',
|
7
|
+
database: 'some_database'
|
8
|
+
|
9
|
+
# set up the source database
|
10
|
+
connection.query %[
|
11
|
+
CREATE DATABASE IF NOT EXISTS some_database]
|
12
|
+
|
13
|
+
connection.query %[
|
14
|
+
CREATE TABLE IF NOT EXISTS some_database.some_source_table (
|
15
|
+
user_id INT NOT NULL
|
16
|
+
, created_at DATETIME NOT NULL
|
17
|
+
, amount INT NOT NULL)]
|
18
|
+
|
19
|
+
connection.query %[
|
20
|
+
TRUNCATE some_database.some_source_table]
|
21
|
+
|
22
|
+
connection.query %[
|
23
|
+
INSERT INTO some_database.some_source_table (
|
24
|
+
user_id
|
25
|
+
, created_at
|
26
|
+
, amount
|
27
|
+
) VALUES
|
28
|
+
(1, UTC_TIMESTAMP, 100)
|
29
|
+
, (2, UTC_TIMESTAMP - INTERVAL 3 DAY, 200)
|
30
|
+
, (2, UTC_TIMESTAMP - INTERVAL 3 DAY, 400)
|
31
|
+
, (2, UTC_TIMESTAMP - INTERVAL 3 DAY, 600)
|
32
|
+
, (3, UTC_TIMESTAMP - INTERVAL 3 DAY, 600)
|
33
|
+
, (3, UTC_TIMESTAMP - INTERVAL 3 DAY, -100)
|
34
|
+
, (3, UTC_TIMESTAMP - INTERVAL 3 DAY, 200)
|
35
|
+
, (3, UTC_TIMESTAMP - INTERVAL 4 DAY, 200)]
|
36
|
+
|
37
|
+
# set up the ETL
|
38
|
+
etl = ETL.new(description: "a description of what this ETL does",
|
39
|
+
connection: connection)
|
40
|
+
|
41
|
+
# configure ETL
|
42
|
+
etl.config do |etl|
|
43
|
+
etl.ensure_destination do |etl|
|
44
|
+
# For most ETLs you may want to ensure that the destination exists, so the
|
45
|
+
# #ensure_destination block is ideally suited to fulfill this requirement.
|
46
|
+
#
|
47
|
+
# By way of example:
|
48
|
+
#
|
49
|
+
etl.query %[
|
50
|
+
CREATE TABLE IF NOT EXISTS some_database.some_destination_table (
|
51
|
+
user_id INT UNSIGNED NOT NULL
|
52
|
+
, created_date DATE NOT NULL
|
53
|
+
, total_amount INT SIGNED NOT NULL
|
54
|
+
, message VARCHAR(100) DEFAULT NULL
|
55
|
+
, PRIMARY KEY (user_id, created_date)
|
56
|
+
, KEY (created_date)
|
57
|
+
)]
|
58
|
+
end
|
59
|
+
|
60
|
+
etl.before_etl do |etl|
|
61
|
+
# All pre-ETL work is performed in this block.
|
62
|
+
#
|
63
|
+
# This can be thought of as a before-ETL hook that will fire only once. When
|
64
|
+
# you are not leveraging the ETL iteration capabilities, the value of this
|
65
|
+
# block vs the #etl block is not very clear. We will see how and when to
|
66
|
+
# leverage this block effectively when we introduce iteration.
|
67
|
+
#
|
68
|
+
# As an example, let's say we want to get rid of all entries that have an
|
69
|
+
# amount less than zero before moving on to our actual etl:
|
70
|
+
#
|
71
|
+
etl.query %[DELETE FROM some_database.some_source_table WHERE amount < 0]
|
72
|
+
end
|
73
|
+
|
74
|
+
etl.etl do |etl|
|
75
|
+
# Here is where the magic happens! This block contains the main ETL
|
76
|
+
# operation.
|
77
|
+
#
|
78
|
+
# For example:
|
79
|
+
#
|
80
|
+
etl.query %[
|
81
|
+
REPLACE INTO some_database.some_destination_table (
|
82
|
+
user_id
|
83
|
+
, created_date
|
84
|
+
, total_amount
|
85
|
+
) SELECT
|
86
|
+
sst.user_id
|
87
|
+
, DATE(sst.created_at) AS created_date
|
88
|
+
, SUM(sst.amount) AS total_amount
|
89
|
+
FROM
|
90
|
+
some_database.some_source_table sst
|
91
|
+
GROUP BY
|
92
|
+
sst.user_id
|
93
|
+
, DATE(sst.created_at)]
|
94
|
+
end
|
95
|
+
|
96
|
+
etl.after_etl do |etl|
|
97
|
+
# All post-ETL work is performed in this block.
|
98
|
+
#
|
99
|
+
# Again, to finish up with an example:
|
100
|
+
#
|
101
|
+
etl.query %[
|
102
|
+
UPDATE some_database.some_destination_table
|
103
|
+
SET message = "WOW"
|
104
|
+
WHERE total_amount > 100]
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# ship it
|
109
|
+
etl.run
|
110
|
+
|
111
|
+
puts %[
|
112
|
+
ETL complete. Now go have a look at some_database.some_destination_table
|
113
|
+
That was build from some_database.some_source_table using the above ETL configuration.
|
114
|
+
|
115
|
+
SELECT * FROM some_database.some_destination_table;]
|
@@ -0,0 +1,209 @@
|
|
1
|
+
require 'mysql2'
|
2
|
+
require 'ETL'
|
3
|
+
|
4
|
+
connection = Mysql2::Client.new host: 'localhost',
|
5
|
+
username: 'root',
|
6
|
+
password: '',
|
7
|
+
database: 'some_database'
|
8
|
+
|
9
|
+
# set up the source database:
|
10
|
+
connection.query %[
|
11
|
+
CREATE DATABASE IF NOT EXISTS some_database]
|
12
|
+
|
13
|
+
connection.query %[
|
14
|
+
CREATE TABLE IF NOT EXISTS some_database.some_source_table (
|
15
|
+
user_id INT NOT NULL
|
16
|
+
, created_at DATETIME NOT NULL
|
17
|
+
, amount INT NOT NULL)]
|
18
|
+
|
19
|
+
connection.query %[
|
20
|
+
TRUNCATE some_database.some_source_table]
|
21
|
+
|
22
|
+
connection.query %[
|
23
|
+
INSERT INTO some_database.some_source_table (
|
24
|
+
user_id
|
25
|
+
, created_at
|
26
|
+
, amount
|
27
|
+
) VALUES
|
28
|
+
(1, UTC_TIMESTAMP, 100)
|
29
|
+
, (2, UTC_TIMESTAMP - INTERVAL 3 DAY, 200)
|
30
|
+
, (2, UTC_TIMESTAMP - INTERVAL 3 DAY, 400)
|
31
|
+
, (2, UTC_TIMESTAMP - INTERVAL 3 DAY, 600)
|
32
|
+
, (3, UTC_TIMESTAMP - INTERVAL 3 DAY, 600)
|
33
|
+
, (3, UTC_TIMESTAMP - INTERVAL 3 DAY, -100)
|
34
|
+
, (3, UTC_TIMESTAMP - INTERVAL 3 DAY, 200)
|
35
|
+
, (3, UTC_TIMESTAMP - INTERVAL 4 DAY, 200)]
|
36
|
+
|
37
|
+
# set up the ETL
|
38
|
+
etl = ETL.new(description: "a description of what this ETL does",
|
39
|
+
connection: connection)
|
40
|
+
|
41
|
+
# configure it
|
42
|
+
etl.config do |etl|
|
43
|
+
etl.ensure_destination do |etl|
|
44
|
+
# For most ETLs you may want to ensure that the destination exists, so the
|
45
|
+
# #ensure_destination block is ideally suited to fulfill this requirement.
|
46
|
+
#
|
47
|
+
# By way of example:
|
48
|
+
#
|
49
|
+
etl.query %[
|
50
|
+
CREATE TABLE IF NOT EXISTS some_database.some_destination_table (
|
51
|
+
user_id INT UNSIGNED NOT NULL
|
52
|
+
, created_date DATE NOT NULL
|
53
|
+
, total_amount INT SIGNED NOT NULL
|
54
|
+
, message VARCHAR(100) DEFAULT NULL
|
55
|
+
, PRIMARY KEY (user_id, created_date)
|
56
|
+
, KEY (created_date)
|
57
|
+
)]
|
58
|
+
end
|
59
|
+
|
60
|
+
etl.before_etl do |etl|
|
61
|
+
# All pre-ETL work is performed in this block.
|
62
|
+
#
|
63
|
+
# Now that we are leveraging iteration the #before_etl block becomes
|
64
|
+
# more useful as a way to execute an operation once before we begin
|
65
|
+
# our iteration.
|
66
|
+
#
|
67
|
+
# As an example, let's say we want to get rid of all entries that have an
|
68
|
+
# amount less than zero before moving on to our actual etl:
|
69
|
+
#
|
70
|
+
etl.query %[
|
71
|
+
DELETE FROM some_database.some_source_table
|
72
|
+
WHERE amount < 0]
|
73
|
+
end
|
74
|
+
|
75
|
+
etl.start do |etl|
|
76
|
+
# This defines where the ETL should start. This can be a flat number
|
77
|
+
# or date, or even SQL / other code can be executed to produce a starting
|
78
|
+
# value.
|
79
|
+
#
|
80
|
+
# Usually, this is the last known entry for the destination table with
|
81
|
+
# some sensible default if the destination does not yet contain data.
|
82
|
+
#
|
83
|
+
# As an example:
|
84
|
+
#
|
85
|
+
# Note that we cast the default date as a DATE. If we don't, it will be
|
86
|
+
# treated as a string and our iterator will fail under the hood when testing
|
87
|
+
# if it is complete.
|
88
|
+
res = etl.query %[
|
89
|
+
SELECT COALESCE(MAX(created_date), DATE('2010-01-01')) AS the_max
|
90
|
+
FROM some_database.some_destination_table]
|
91
|
+
|
92
|
+
res.to_a.first['the_max']
|
93
|
+
end
|
94
|
+
|
95
|
+
etl.step do |etl|
|
96
|
+
# The step block defines the size of the iteration block. To iterate by
|
97
|
+
# ten records, the step block should be set to return 10.
|
98
|
+
#
|
99
|
+
# As an alternative example, to set the iteration to go 10,000 units
|
100
|
+
# at a time, the following value should be provided:
|
101
|
+
#
|
102
|
+
# 10_000 (Note: an underscore is used for readability)
|
103
|
+
#
|
104
|
+
# As an example, to iterate 7 days at a time:
|
105
|
+
#
|
106
|
+
7
|
107
|
+
end
|
108
|
+
|
109
|
+
etl.stop do |etl|
|
110
|
+
# The stop block defines when the iteration should halt.
|
111
|
+
# Again, this can be a flat value or code. Either way, one value *must* be
|
112
|
+
# returned.
|
113
|
+
#
|
114
|
+
# As a flat value:
|
115
|
+
#
|
116
|
+
# 1_000_000
|
117
|
+
#
|
118
|
+
# Or a date value:
|
119
|
+
#
|
120
|
+
# Time.now.to_date
|
121
|
+
#
|
122
|
+
# Or as a code example:
|
123
|
+
#
|
124
|
+
res = etl.query %[
|
125
|
+
SELECT DATE(MAX(created_at)) AS the_max
|
126
|
+
FROM some_database.some_source_table]
|
127
|
+
|
128
|
+
res.to_a.first['the_max']
|
129
|
+
end
|
130
|
+
|
131
|
+
etl.etl do |etl, lbound, ubound|
|
132
|
+
# The etl block is the main part of the framework. Note: there are
|
133
|
+
# two extra args with the iterator this time around: "lbound" and "ubound"
|
134
|
+
#
|
135
|
+
# "lbound" is the lower bound of the current iteration. When iterating
|
136
|
+
# from 0 to 10 and stepping by 2, the lbound would equal 2 on the
|
137
|
+
# second iteration.
|
138
|
+
#
|
139
|
+
# "ubound" is the upper bound of the current iteration. In continuing with the
|
140
|
+
# example above, when iterating from 0 to 10 and stepping by 2, the ubound would
|
141
|
+
# equal 4 on the second iteration.
|
142
|
+
#
|
143
|
+
# These args can be used to "window" SQL queries or other code operations.
|
144
|
+
#
|
145
|
+
# As a first example, to iterate over a set of ids:
|
146
|
+
#
|
147
|
+
# etl.query %[
|
148
|
+
# REPLACE INTO some_database.some_destination_table (
|
149
|
+
# created_date
|
150
|
+
# , user_id
|
151
|
+
# , total_amount
|
152
|
+
# ) SELECT
|
153
|
+
# DATE(sst.created_at) AS created_date
|
154
|
+
# , sst.user_id
|
155
|
+
# , SUM(sst.amount) AS total_amount
|
156
|
+
# FROM
|
157
|
+
# some_database.some_source_table sst
|
158
|
+
# WHERE
|
159
|
+
# sst.user_id > #{lbound} AND sst.user_id <= #{ubound}
|
160
|
+
# GROUP BY
|
161
|
+
# DATE(sst.created_at)
|
162
|
+
# , sst.user_id]
|
163
|
+
#
|
164
|
+
# To "window" a SQL query using dates:
|
165
|
+
#
|
166
|
+
etl.query %[
|
167
|
+
REPLACE INTO some_database.some_destination_table (
|
168
|
+
created_date
|
169
|
+
, user_id
|
170
|
+
, total_amount
|
171
|
+
) SELECT
|
172
|
+
DATE(sst.created_at) AS created_date
|
173
|
+
, sst.user_id
|
174
|
+
, SUM(sst.amount) AS total_amount
|
175
|
+
FROM
|
176
|
+
some_database.some_source_table sst
|
177
|
+
WHERE
|
178
|
+
-- Note the usage of quotes surrounding the lbound and ubound vars.
|
179
|
+
-- This is is required when dealing with dates / datetimes
|
180
|
+
sst.created_at >= '#{lbound}' AND sst.created_at < '#{ubound}'
|
181
|
+
GROUP BY
|
182
|
+
DATE(sst.created_at)
|
183
|
+
, sst.user_id]
|
184
|
+
|
185
|
+
# Note that there is no sql sanitization here so there is *potential* for SQL
|
186
|
+
# injection. That being said you'll likely be using this gem in an internal
|
187
|
+
# tool so hopefully your co-workers are not looking to sabotage your ETL
|
188
|
+
# pipeline. Just be aware of this and handle it as you see fit.
|
189
|
+
end
|
190
|
+
|
191
|
+
etl.after_etl do |etl|
|
192
|
+
# All post-ETL work is performed in this block.
|
193
|
+
#
|
194
|
+
# Again, to finish up with an example:
|
195
|
+
#
|
196
|
+
etl.query %[
|
197
|
+
UPDATE some_database.some_destination_table
|
198
|
+
SET message = "WOW"
|
199
|
+
WHERE total_amount > 100]
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
etl.run
|
204
|
+
|
205
|
+
puts %[
|
206
|
+
ETL complete. Now go have a look at some_database.some_destination_table
|
207
|
+
That was build from some_database.some_source_table using the above ETL configuration.
|
208
|
+
|
209
|
+
SELECT * FROM some_database.some_destination_table;]
|
data/lib/etl.rb
CHANGED
@@ -24,9 +24,21 @@ class ETL
|
|
24
24
|
:stop
|
25
25
|
]
|
26
26
|
|
27
|
+
def self.connection= connection
|
28
|
+
@connection = connection
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.connection
|
32
|
+
@connection
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.defaults
|
36
|
+
{connection: @connection}
|
37
|
+
end
|
38
|
+
|
27
39
|
def initialize attributes = {}
|
28
|
-
|
29
|
-
self.send "#{
|
40
|
+
self.class.defaults.merge(attributes).each do |key, value|
|
41
|
+
self.send "#{key}=", value
|
30
42
|
end
|
31
43
|
default_logger! unless attributes.keys.include?(:logger)
|
32
44
|
end
|
data/lib/etl/helpers.rb
CHANGED
@@ -23,7 +23,7 @@ class ETL
|
|
23
23
|
caster = ->(str) { "DATE(#{str})" }
|
24
24
|
end
|
25
25
|
|
26
|
-
max_sql_clause = "
|
26
|
+
max_sql_clause = "IFNULL(MAX(#{table}.#{column}), #{default_value})"
|
27
27
|
max_sql_clause = caster.(max_sql_clause) if caster
|
28
28
|
|
29
29
|
sql = <<-EOS
|
data/lib/etl/version.rb
CHANGED
data/spec/etl_spec.rb
CHANGED
@@ -38,6 +38,25 @@ end
|
|
38
38
|
describe ETL do
|
39
39
|
let(:logger) { nil }
|
40
40
|
|
41
|
+
describe ".connection=" do
|
42
|
+
let(:class_level_connection) { stub('class_level_connection') }
|
43
|
+
|
44
|
+
it "sets the #connection for all instances" do
|
45
|
+
ETL.connection = class_level_connection
|
46
|
+
etl = ETL.new
|
47
|
+
expect(etl.connection).to eq class_level_connection
|
48
|
+
end
|
49
|
+
|
50
|
+
it "allows instance-level overrides" do
|
51
|
+
instance_level_connection = stub('instance_level_connection')
|
52
|
+
ETL.connection = class_level_connection
|
53
|
+
etl_with_connection_override = ETL.new connection: instance_level_connection
|
54
|
+
etl = ETL.new
|
55
|
+
expect(etl.connection).to eq class_level_connection
|
56
|
+
expect(etl_with_connection_override.connection).to eq instance_level_connection
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
41
60
|
describe "#logger=" do
|
42
61
|
let(:etl) { described_class.new connection: stub }
|
43
62
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ETL
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-06-13 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|
@@ -108,6 +108,8 @@ files:
|
|
108
108
|
- README.md
|
109
109
|
- Rakefile
|
110
110
|
- etl.gemspec
|
111
|
+
- examples/basic_etl.rb
|
112
|
+
- examples/iterator_etl.rb
|
111
113
|
- lib/etl.rb
|
112
114
|
- lib/etl/helpers.rb
|
113
115
|
- lib/etl/version.rb
|