retl 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/README.md +101 -30
- data/lib/retl/context.rb +7 -1
- data/lib/retl/handlers/path_handler.rb +18 -0
- data/lib/retl/path_builder.rb +5 -0
- data/lib/retl/transformation.rb +8 -2
- data/lib/retl/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1262b0170c7b1fdbd343d510a6e8bab6edb3bc01
|
4
|
+
data.tar.gz: 4eeafb6b53d22901171185786c297b3b18fbfcb6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff134b73c4e49380cb63b6a2251d6d2a1823c8ff4c8c87fbae64860d0e96bfb4b740b3670a2858db00e1aac84ffdc7cb8251aeb93750e4544a563c5b4c2d0233
|
7
|
+
data.tar.gz: f02e88e10931ac4ff35e33223f539bb408e55faf2e9d5d7212fbeffd871a410f10fdc3f8df131b1b7be71f1370fea807839420f09e2cb4703b3b26e84bc429f1
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# Change Log
|
2
|
+
|
3
|
+
### 0.0.4
|
4
|
+
|
5
|
+
- Bug Fix: fixes memoized fork results
|
6
|
+
- Bug Fix: fixes memoized transformation results
|
7
|
+
- New Feature: added the `path` step to include other paths
|
8
|
+
- New Feature: paths that include other paths can now resolve their dependencies
|
9
|
+
with a hash or a block
|
10
|
+
- Improvement: Dependencies that don't define a block will raise an
|
11
|
+
ArgumentError if the dependency is not resolved during translation. The block
|
12
|
+
then becomes the default value of the dependency when it is not passed in.
|
13
|
+
- Specs: Added specs are all of the README examples.
|
14
|
+
|
15
|
+
|
16
|
+
### 0.0.3
|
17
|
+
|
18
|
+
- Proof of concept phase; too much going on too fast.
|
data/README.md
CHANGED
@@ -44,8 +44,8 @@ returned, which is `Enumerable`.
|
|
44
44
|
|
45
45
|
```
|
46
46
|
data = [
|
47
|
-
{
|
48
|
-
{
|
47
|
+
{ first_name: "David" , last_name: "Biehl" },
|
48
|
+
{ first_name: "Indiana", last_name: "Jones" }
|
49
49
|
]
|
50
50
|
|
51
51
|
result = my_path.transform(data)
|
@@ -70,15 +70,25 @@ common use is to use `replace` as the first step to convert incoming objects
|
|
70
70
|
into hashes.
|
71
71
|
|
72
72
|
```
|
73
|
-
users =
|
73
|
+
users = [
|
74
|
+
["David", "Biehl", 33],
|
75
|
+
["Indiana", "Jones", 50]
|
76
|
+
]
|
74
77
|
|
75
78
|
my_path = Retl::Path.new do
|
76
|
-
replace do |
|
77
|
-
|
79
|
+
replace do |row|
|
80
|
+
{ first_name: row[0], last_name: row[1], age: row[2] }
|
78
81
|
end
|
79
82
|
|
80
83
|
# perform other steps with the hash
|
81
84
|
end
|
85
|
+
|
86
|
+
result = my_path.transform(users)
|
87
|
+
result.to_a
|
88
|
+
#=> [
|
89
|
+
#=> {:first_name=>"David", :last_name=>"Biehl", :age=>33},
|
90
|
+
#=> {:first_name=>"Indiana", :last_name=>"Jones", :age=>50}
|
91
|
+
#=> ]
|
82
92
|
```
|
83
93
|
|
84
94
|
#### Filter & Reject
|
@@ -89,9 +99,9 @@ truthy. `select` is an alias for `filter`.
|
|
89
99
|
|
90
100
|
```
|
91
101
|
data = [
|
92
|
-
{name: "David" , age: 33}
|
93
|
-
{name: "Indiana", age: 50}
|
94
|
-
{name: "Sully" , age: 7}
|
102
|
+
{name: "David" , age: 33},
|
103
|
+
{name: "Indiana", age: 50},
|
104
|
+
{name: "Sully" , age: 7},
|
95
105
|
{name: "Boo" , age: 3}
|
96
106
|
]
|
97
107
|
|
@@ -111,7 +121,8 @@ end
|
|
111
121
|
|
112
122
|
result = my_path.transform(data)
|
113
123
|
result.to_a
|
114
|
-
#=> [
|
124
|
+
#=> [{:name=>"David", :age=>33, :adult_or_child=>"adult"}]
|
125
|
+
|
115
126
|
```
|
116
127
|
|
117
128
|
#### Calculate
|
@@ -162,9 +173,9 @@ are unaffected by any steps that take place after the fork is defined.
|
|
162
173
|
|
163
174
|
```
|
164
175
|
data = [
|
165
|
-
{ name: "David" , age: 33 }
|
166
|
-
{ name: "Indiana", age: 50 }
|
167
|
-
{ name: "Sully" , age: 7 }
|
176
|
+
{ name: "David" , age: 33 },
|
177
|
+
{ name: "Indiana", age: 50 },
|
178
|
+
{ name: "Sully" , age: 7 },
|
168
179
|
{ name: "Boo" , age: 3 }
|
169
180
|
]
|
170
181
|
|
@@ -208,7 +219,7 @@ The `explode` step adds additional data to the Path. The return value of the
|
|
208
219
|
block should respond to `#each`, like an Array.
|
209
220
|
|
210
221
|
```
|
211
|
-
my_path =
|
222
|
+
my_path = Retl::Path.new do
|
212
223
|
explode do |number|
|
213
224
|
number.times.map { |x| x + x + x }
|
214
225
|
end
|
@@ -218,32 +229,39 @@ my_path = Reth::Path.new do
|
|
218
229
|
end
|
219
230
|
end
|
220
231
|
|
221
|
-
my_path.transform(6)
|
232
|
+
result = my_path.transform([6])
|
233
|
+
result.to_a
|
222
234
|
#=> [3, 9, 15]
|
235
|
+
|
236
|
+
expect(result.to_a).to eq([3, 9, 15])
|
223
237
|
```
|
224
238
|
|
225
239
|
#### Path Reuse
|
226
240
|
|
227
|
-
In rETL,
|
228
|
-
to ensure that calculations yield consistent results throughout the
|
229
|
-
project. Consistent data and meanings will make the data warehouse
|
230
|
-
understand for data consumers.
|
241
|
+
In rETL, paths can be re-used with the `path` step. Common Paths can be
|
242
|
+
defined to ensure that calculations yield consistent results throughout the
|
243
|
+
entire ETL project. Consistent data and meanings will make the data warehouse
|
244
|
+
easier to understand for data consumers.
|
231
245
|
|
232
246
|
```
|
233
247
|
AdultOrChild = Retl::Path.new do
|
234
|
-
calculate(:adult_or_child) do
|
248
|
+
calculate(:adult_or_child) do |row|
|
235
249
|
row[:age] >= 18 ? "adult" : "child"
|
236
250
|
end
|
237
251
|
end
|
238
252
|
|
239
253
|
my_path = Retl::Path.new do
|
240
|
-
|
254
|
+
path AdultOrChild
|
241
255
|
|
242
|
-
### perform other steps
|
256
|
+
### perform other steps, if necessary
|
243
257
|
end
|
258
|
+
|
259
|
+
result = my_path.transform([{age: 3}])
|
260
|
+
result.to_a
|
261
|
+
#=> [ { age: 3, adult_or_child: "child" } ]
|
244
262
|
```
|
245
263
|
|
246
|
-
|
264
|
+
#### Dependencies
|
247
265
|
|
248
266
|
Dependencies can be defined with the `depends_on(name)`. The value of the
|
249
267
|
dependency is accessible inside of each step by its name. In this example, we'll
|
@@ -251,21 +269,26 @@ define an `age_lookup` dependency.
|
|
251
269
|
|
252
270
|
```
|
253
271
|
my_path = Retl::Path.new do
|
254
|
-
depends_on(:age_lookup) do
|
272
|
+
depends_on(:age_lookup) do # hint: the block returns the default value
|
255
273
|
{
|
256
274
|
"adult" => "Adults are 18 or older",
|
257
275
|
"child" => "Children are younger than 18"
|
258
276
|
}
|
259
277
|
end
|
260
278
|
|
261
|
-
|
279
|
+
path AdultOrChild # see previous example
|
262
280
|
|
263
281
|
calculate(:age_description) do |row|
|
264
282
|
age_lookup[row[:adult_or_child]]
|
265
283
|
end
|
266
284
|
end
|
285
|
+
|
286
|
+
result = my_path.transform([{age: 19}])
|
287
|
+
result.to_a
|
267
288
|
```
|
268
289
|
|
290
|
+
##### Dependency Injection
|
291
|
+
|
269
292
|
Dependencies can also be injected when the transformation takes place. This is
|
270
293
|
useful for testing by passing in mocks or stubs. Also, concrete results from
|
271
294
|
other paths can be merged merged into a single path making data integration
|
@@ -273,11 +296,9 @@ possible.
|
|
273
296
|
|
274
297
|
```
|
275
298
|
my_path = Retl::Path.new do
|
276
|
-
depends_on(:age_lookup)
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
step AdultOrChild
|
299
|
+
depends_on(:age_lookup) # hint: without a block, the dependency must
|
300
|
+
# be provided when #transform is called
|
301
|
+
path AdultOrChild
|
281
302
|
|
282
303
|
calculate(:age_description) do |row|
|
283
304
|
age_lookup[row[:adult_or_child]]
|
@@ -289,8 +310,51 @@ age_lookup_hash = {
|
|
289
310
|
"child" => "Children are younger than 18"
|
290
311
|
}
|
291
312
|
|
292
|
-
my_path.transform(
|
313
|
+
result = my_path.transform([{age: 4}], age_lookup: age_lookup_hash)
|
314
|
+
result.to_a
|
315
|
+
#=> [ { age: 4, adult_or_child: "child", age_description: "Children are younger than 18" } ]
|
316
|
+
```
|
317
|
+
|
318
|
+
#### Path Reuse with Dependencies
|
319
|
+
|
320
|
+
The `AdultOrChild` path above isn't very flexible. It depends on an `:age` key
|
321
|
+
to be present in the hash. What if our data uses a different key, like
|
322
|
+
`:years_since_birth`? rETL can make this more flexible by adding dependencies.
|
323
|
+
|
293
324
|
```
|
325
|
+
FlexibleAdultOrChild = Retl::Path.new do
|
326
|
+
depends_on(:from)
|
327
|
+
|
328
|
+
depends_on(:to) do |options|
|
329
|
+
options[:to] || :adult_or_child # use a default value
|
330
|
+
end
|
331
|
+
|
332
|
+
transform do |row|
|
333
|
+
row[to] = row[from] >= 18 ? "adult" : "child"
|
334
|
+
end
|
335
|
+
end
|
336
|
+
|
337
|
+
|
338
|
+
path_with_age = Retl::Path.new do
|
339
|
+
path FlexibleAdultOrChild, from: :age
|
340
|
+
end
|
341
|
+
|
342
|
+
path_with_age_result = path_with_age.transform([{age: 33}])
|
343
|
+
path_with_age_result.to_a
|
344
|
+
#=> [{age: 33, adult_or_child: "adult"}]
|
345
|
+
|
346
|
+
|
347
|
+
path_with_years_since_birth = Retl::Path.new do
|
348
|
+
path FlexibleAdultOrChild do
|
349
|
+
{ from: :years_since_birth, to: :age_classification } # blocks work too
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
result = path_with_years_since_birth.transform([{years_since_birth: 7}])
|
354
|
+
result.to_a
|
355
|
+
#=> [{years_since_birth: 7, age_classification: "child"}]
|
356
|
+
```
|
357
|
+
|
294
358
|
|
295
359
|
## Roadmap
|
296
360
|
|
@@ -306,6 +370,13 @@ path.transform(Enumerable)
|
|
306
370
|
Enumerales in, Enumerales out. This makes the application of the gem pretty much
|
307
371
|
universal for any type of data transformation requirement in Ruby.
|
308
372
|
|
373
|
+
### Next Steps
|
374
|
+
|
375
|
+
- Error handling
|
376
|
+
- Tracing and logging
|
377
|
+
- Extract patterns
|
378
|
+
- Load patterns
|
379
|
+
|
309
380
|
## Installation
|
310
381
|
|
311
382
|
Add this line to your application's Gemfile:
|
data/lib/retl/context.rb
CHANGED
@@ -4,7 +4,13 @@ module Retl
|
|
4
4
|
class Context
|
5
5
|
def initialize(path, options={})
|
6
6
|
path.dependencies.each do |name, dependency|
|
7
|
-
|
7
|
+
if dependency.nil? && !options[name]
|
8
|
+
raise ArgumentError, "This transformation depends on `name`"
|
9
|
+
end
|
10
|
+
|
11
|
+
self.class.send(:define_method, name) do
|
12
|
+
(dependency && dependency.call(options)) || options[name]
|
13
|
+
end
|
8
14
|
end
|
9
15
|
|
10
16
|
@_events = EventRouter.new
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require_relative "handler"
|
2
|
+
|
3
|
+
module Retl
|
4
|
+
class PathHandler < Handler
|
5
|
+
def initialize(path, dependencies={}, &block)
|
6
|
+
super()
|
7
|
+
@path = path
|
8
|
+
dependencies.merge!(block.call) if block
|
9
|
+
@context = Context.new(@path, dependencies)
|
10
|
+
end
|
11
|
+
|
12
|
+
def push_in(data, context)
|
13
|
+
@context.execute_step(@path, data).each do |result|
|
14
|
+
push_out(result)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/retl/path_builder.rb
CHANGED
@@ -3,6 +3,7 @@ require "retl/handlers/transform_handler"
|
|
3
3
|
require "retl/handlers/filter_handler"
|
4
4
|
require "retl/handlers/inspect_handler"
|
5
5
|
require "retl/handlers/explode_handler"
|
6
|
+
require "retl/handlers/path_handler"
|
6
7
|
|
7
8
|
|
8
9
|
module Retl
|
@@ -58,5 +59,9 @@ module Retl
|
|
58
59
|
action ||= block
|
59
60
|
step(action, handler: ExplodeHandler)
|
60
61
|
end
|
62
|
+
|
63
|
+
def path(path, dependencies={}, &block)
|
64
|
+
@path.add_handler PathHandler.new(path, dependencies, &block)
|
65
|
+
end
|
61
66
|
end
|
62
67
|
end
|
data/lib/retl/transformation.rb
CHANGED
@@ -9,6 +9,7 @@ module Retl
|
|
9
9
|
@enumerable, @path, @options = enumerable, path, options
|
10
10
|
@context = Context.new(@path, @options)
|
11
11
|
@fork_data = ForkDataCollector.new(@context)
|
12
|
+
@forks = {}
|
12
13
|
end
|
13
14
|
|
14
15
|
def each(&block)
|
@@ -29,8 +30,12 @@ module Retl
|
|
29
30
|
end
|
30
31
|
|
31
32
|
def forks(name)
|
32
|
-
|
33
|
-
|
33
|
+
unless @forks[name]
|
34
|
+
build_each_result
|
35
|
+
@forks[name] = @path.forks(name).transform(@fork_data.take(name), @options)
|
36
|
+
end
|
37
|
+
|
38
|
+
@forks[name]
|
34
39
|
end
|
35
40
|
|
36
41
|
def load_into(*destinations)
|
@@ -55,6 +60,7 @@ module Retl
|
|
55
60
|
yield data if block_given?
|
56
61
|
result << data
|
57
62
|
end
|
63
|
+
result
|
58
64
|
end
|
59
65
|
end
|
60
66
|
|
data/lib/retl/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: retl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Biehl
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -62,6 +62,7 @@ files:
|
|
62
62
|
- ".gitignore"
|
63
63
|
- ".rspec"
|
64
64
|
- ".travis.yml"
|
65
|
+
- CHANGELOG.md
|
65
66
|
- CODE_OF_CONDUCT.md
|
66
67
|
- Gemfile
|
67
68
|
- LICENSE.txt
|
@@ -78,6 +79,7 @@ files:
|
|
78
79
|
- lib/retl/handlers/fork_handler.rb
|
79
80
|
- lib/retl/handlers/handler.rb
|
80
81
|
- lib/retl/handlers/inspect_handler.rb
|
82
|
+
- lib/retl/handlers/path_handler.rb
|
81
83
|
- lib/retl/handlers/step_handler.rb
|
82
84
|
- lib/retl/handlers/transform_handler.rb
|
83
85
|
- lib/retl/path.rb
|