retl 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/README.md +101 -30
- data/lib/retl/context.rb +7 -1
- data/lib/retl/handlers/path_handler.rb +18 -0
- data/lib/retl/path_builder.rb +5 -0
- data/lib/retl/transformation.rb +8 -2
- data/lib/retl/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1262b0170c7b1fdbd343d510a6e8bab6edb3bc01
|
4
|
+
data.tar.gz: 4eeafb6b53d22901171185786c297b3b18fbfcb6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff134b73c4e49380cb63b6a2251d6d2a1823c8ff4c8c87fbae64860d0e96bfb4b740b3670a2858db00e1aac84ffdc7cb8251aeb93750e4544a563c5b4c2d0233
|
7
|
+
data.tar.gz: f02e88e10931ac4ff35e33223f539bb408e55faf2e9d5d7212fbeffd871a410f10fdc3f8df131b1b7be71f1370fea807839420f09e2cb4703b3b26e84bc429f1
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# Change Log
|
2
|
+
|
3
|
+
### 0.0.4
|
4
|
+
|
5
|
+
- Bug Fix: fixes memoized fork results
|
6
|
+
- Bug Fix: fixes memoized transformation results
|
7
|
+
- New Feature: added the `path` step to include other paths
|
8
|
+
- New Feature: paths that include other paths can now resolve their dependencies
|
9
|
+
with a hash or a block
|
10
|
+
- Improvement: Dependencies that don't define a block will raise an
|
11
|
+
ArgumentError if the dependency is not resolved during translation. The block
|
12
|
+
then becomes the default value of the dependency when it is not passed in.
|
13
|
+
- Specs: Added specs are all of the README examples.
|
14
|
+
|
15
|
+
|
16
|
+
### 0.0.3
|
17
|
+
|
18
|
+
- Proof of concept phase; too much going on too fast.
|
data/README.md
CHANGED
@@ -44,8 +44,8 @@ returned, which is `Enumerable`.
|
|
44
44
|
|
45
45
|
```
|
46
46
|
data = [
|
47
|
-
{
|
48
|
-
{
|
47
|
+
{ first_name: "David" , last_name: "Biehl" },
|
48
|
+
{ first_name: "Indiana", last_name: "Jones" }
|
49
49
|
]
|
50
50
|
|
51
51
|
result = my_path.transform(data)
|
@@ -70,15 +70,25 @@ common use is to use `replace` as the first step to convert incoming objects
|
|
70
70
|
into hashes.
|
71
71
|
|
72
72
|
```
|
73
|
-
users =
|
73
|
+
users = [
|
74
|
+
["David", "Biehl", 33],
|
75
|
+
["Indiana", "Jones", 50]
|
76
|
+
]
|
74
77
|
|
75
78
|
my_path = Retl::Path.new do
|
76
|
-
replace do |
|
77
|
-
|
79
|
+
replace do |row|
|
80
|
+
{ first_name: row[0], last_name: row[1], age: row[2] }
|
78
81
|
end
|
79
82
|
|
80
83
|
# perform other steps with the hash
|
81
84
|
end
|
85
|
+
|
86
|
+
result = my_path.transform(users)
|
87
|
+
result.to_a
|
88
|
+
#=> [
|
89
|
+
#=> {:first_name=>"David", :last_name=>"Biehl", :age=>33},
|
90
|
+
#=> {:first_name=>"Indiana", :last_name=>"Jones", :age=>50}
|
91
|
+
#=> ]
|
82
92
|
```
|
83
93
|
|
84
94
|
#### Filter & Reject
|
@@ -89,9 +99,9 @@ truthy. `select` is an alias for `filter`.
|
|
89
99
|
|
90
100
|
```
|
91
101
|
data = [
|
92
|
-
{name: "David" , age: 33}
|
93
|
-
{name: "Indiana", age: 50}
|
94
|
-
{name: "Sully" , age: 7}
|
102
|
+
{name: "David" , age: 33},
|
103
|
+
{name: "Indiana", age: 50},
|
104
|
+
{name: "Sully" , age: 7},
|
95
105
|
{name: "Boo" , age: 3}
|
96
106
|
]
|
97
107
|
|
@@ -111,7 +121,8 @@ end
|
|
111
121
|
|
112
122
|
result = my_path.transform(data)
|
113
123
|
result.to_a
|
114
|
-
#=> [
|
124
|
+
#=> [{:name=>"David", :age=>33, :adult_or_child=>"adult"}]
|
125
|
+
|
115
126
|
```
|
116
127
|
|
117
128
|
#### Calculate
|
@@ -162,9 +173,9 @@ are unaffected by any steps that take place after the fork is defined.
|
|
162
173
|
|
163
174
|
```
|
164
175
|
data = [
|
165
|
-
{ name: "David" , age: 33 }
|
166
|
-
{ name: "Indiana", age: 50 }
|
167
|
-
{ name: "Sully" , age: 7 }
|
176
|
+
{ name: "David" , age: 33 },
|
177
|
+
{ name: "Indiana", age: 50 },
|
178
|
+
{ name: "Sully" , age: 7 },
|
168
179
|
{ name: "Boo" , age: 3 }
|
169
180
|
]
|
170
181
|
|
@@ -208,7 +219,7 @@ The `explode` step adds additional data to the Path. The return value of the
|
|
208
219
|
block should respond to `#each`, like an Array.
|
209
220
|
|
210
221
|
```
|
211
|
-
my_path =
|
222
|
+
my_path = Retl::Path.new do
|
212
223
|
explode do |number|
|
213
224
|
number.times.map { |x| x + x + x }
|
214
225
|
end
|
@@ -218,32 +229,39 @@ my_path = Reth::Path.new do
|
|
218
229
|
end
|
219
230
|
end
|
220
231
|
|
221
|
-
my_path.transform(6)
|
232
|
+
result = my_path.transform([6])
|
233
|
+
result.to_a
|
222
234
|
#=> [3, 9, 15]
|
235
|
+
|
236
|
+
expect(result.to_a).to eq([3, 9, 15])
|
223
237
|
```
|
224
238
|
|
225
239
|
#### Path Reuse
|
226
240
|
|
227
|
-
In rETL,
|
228
|
-
to ensure that calculations yield consistent results throughout the
|
229
|
-
project. Consistent data and meanings will make the data warehouse
|
230
|
-
understand for data consumers.
|
241
|
+
In rETL, paths can be re-used with the `path` step. Common Paths can be
|
242
|
+
defined to ensure that calculations yield consistent results throughout the
|
243
|
+
entire ETL project. Consistent data and meanings will make the data warehouse
|
244
|
+
easier to understand for data consumers.
|
231
245
|
|
232
246
|
```
|
233
247
|
AdultOrChild = Retl::Path.new do
|
234
|
-
calculate(:adult_or_child) do
|
248
|
+
calculate(:adult_or_child) do |row|
|
235
249
|
row[:age] >= 18 ? "adult" : "child"
|
236
250
|
end
|
237
251
|
end
|
238
252
|
|
239
253
|
my_path = Retl::Path.new do
|
240
|
-
|
254
|
+
path AdultOrChild
|
241
255
|
|
242
|
-
### perform other steps
|
256
|
+
### perform other steps, if necessary
|
243
257
|
end
|
258
|
+
|
259
|
+
result = my_path.transform([{age: 3}])
|
260
|
+
result.to_a
|
261
|
+
#=> [ { age: 3, adult_or_child: "child" } ]
|
244
262
|
```
|
245
263
|
|
246
|
-
|
264
|
+
#### Dependencies
|
247
265
|
|
248
266
|
Dependencies can be defined with the `depends_on(name)`. The value of the
|
249
267
|
dependency is accessible inside of each step by its name. In this example, we'll
|
@@ -251,21 +269,26 @@ define an `age_lookup` dependency.
|
|
251
269
|
|
252
270
|
```
|
253
271
|
my_path = Retl::Path.new do
|
254
|
-
depends_on(:age_lookup) do
|
272
|
+
depends_on(:age_lookup) do # hint: the block returns the default value
|
255
273
|
{
|
256
274
|
"adult" => "Adults are 18 or older",
|
257
275
|
"child" => "Children are younger than 18"
|
258
276
|
}
|
259
277
|
end
|
260
278
|
|
261
|
-
|
279
|
+
path AdultOrChild # see previous example
|
262
280
|
|
263
281
|
calculate(:age_description) do |row|
|
264
282
|
age_lookup[row[:adult_or_child]]
|
265
283
|
end
|
266
284
|
end
|
285
|
+
|
286
|
+
result = my_path.transform([{age: 19}])
|
287
|
+
result.to_a
|
267
288
|
```
|
268
289
|
|
290
|
+
##### Dependency Injection
|
291
|
+
|
269
292
|
Dependencies can also be injected when the transformation takes place. This is
|
270
293
|
useful for testing by passing in mocks or stubs. Also, concrete results from
|
271
294
|
other paths can be merged merged into a single path making data integration
|
@@ -273,11 +296,9 @@ possible.
|
|
273
296
|
|
274
297
|
```
|
275
298
|
my_path = Retl::Path.new do
|
276
|
-
depends_on(:age_lookup)
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
step AdultOrChild
|
299
|
+
depends_on(:age_lookup) # hint: without a block, the dependency must
|
300
|
+
# be provided when #transform is called
|
301
|
+
path AdultOrChild
|
281
302
|
|
282
303
|
calculate(:age_description) do |row|
|
283
304
|
age_lookup[row[:adult_or_child]]
|
@@ -289,8 +310,51 @@ age_lookup_hash = {
|
|
289
310
|
"child" => "Children are younger than 18"
|
290
311
|
}
|
291
312
|
|
292
|
-
my_path.transform(
|
313
|
+
result = my_path.transform([{age: 4}], age_lookup: age_lookup_hash)
|
314
|
+
result.to_a
|
315
|
+
#=> [ { age: 4, adult_or_child: "child", age_description: "Children are younger than 18" } ]
|
316
|
+
```
|
317
|
+
|
318
|
+
#### Path Reuse with Dependencies
|
319
|
+
|
320
|
+
The `AdultOrChild` path above isn't very flexible. It depends on an `:age` key
|
321
|
+
to be present in the hash. What if our data uses a different key, like
|
322
|
+
`:years_since_birth`? rETL can make this more flexible by adding dependencies.
|
323
|
+
|
293
324
|
```
|
325
|
+
FlexibleAdultOrChild = Retl::Path.new do
|
326
|
+
depends_on(:from)
|
327
|
+
|
328
|
+
depends_on(:to) do |options|
|
329
|
+
options[:to] || :adult_or_child # use a default value
|
330
|
+
end
|
331
|
+
|
332
|
+
transform do |row|
|
333
|
+
row[to] = row[from] >= 18 ? "adult" : "child"
|
334
|
+
end
|
335
|
+
end
|
336
|
+
|
337
|
+
|
338
|
+
path_with_age = Retl::Path.new do
|
339
|
+
path FlexibleAdultOrChild, from: :age
|
340
|
+
end
|
341
|
+
|
342
|
+
path_with_age_result = path_with_age.transform([{age: 33}])
|
343
|
+
path_with_age_result.to_a
|
344
|
+
#=> [{age: 33, adult_or_child: "adult"}]
|
345
|
+
|
346
|
+
|
347
|
+
path_with_years_since_birth = Retl::Path.new do
|
348
|
+
path FlexibleAdultOrChild do
|
349
|
+
{ from: :years_since_birth, to: :age_classification } # blocks work too
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
result = path_with_years_since_birth.transform([{years_since_birth: 7}])
|
354
|
+
result.to_a
|
355
|
+
#=> [{years_since_birth: 7, age_classification: "child"}]
|
356
|
+
```
|
357
|
+
|
294
358
|
|
295
359
|
## Roadmap
|
296
360
|
|
@@ -306,6 +370,13 @@ path.transform(Enumerable)
|
|
306
370
|
Enumerales in, Enumerales out. This makes the application of the gem pretty much
|
307
371
|
universal for any type of data transformation requirement in Ruby.
|
308
372
|
|
373
|
+
### Next Steps
|
374
|
+
|
375
|
+
- Error handling
|
376
|
+
- Tracing and logging
|
377
|
+
- Extract patterns
|
378
|
+
- Load patterns
|
379
|
+
|
309
380
|
## Installation
|
310
381
|
|
311
382
|
Add this line to your application's Gemfile:
|
data/lib/retl/context.rb
CHANGED
@@ -4,7 +4,13 @@ module Retl
|
|
4
4
|
class Context
|
5
5
|
def initialize(path, options={})
|
6
6
|
path.dependencies.each do |name, dependency|
|
7
|
-
|
7
|
+
if dependency.nil? && !options[name]
|
8
|
+
raise ArgumentError, "This transformation depends on `name`"
|
9
|
+
end
|
10
|
+
|
11
|
+
self.class.send(:define_method, name) do
|
12
|
+
(dependency && dependency.call(options)) || options[name]
|
13
|
+
end
|
8
14
|
end
|
9
15
|
|
10
16
|
@_events = EventRouter.new
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require_relative "handler"
|
2
|
+
|
3
|
+
module Retl
|
4
|
+
class PathHandler < Handler
|
5
|
+
def initialize(path, dependencies={}, &block)
|
6
|
+
super()
|
7
|
+
@path = path
|
8
|
+
dependencies.merge!(block.call) if block
|
9
|
+
@context = Context.new(@path, dependencies)
|
10
|
+
end
|
11
|
+
|
12
|
+
def push_in(data, context)
|
13
|
+
@context.execute_step(@path, data).each do |result|
|
14
|
+
push_out(result)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/retl/path_builder.rb
CHANGED
@@ -3,6 +3,7 @@ require "retl/handlers/transform_handler"
|
|
3
3
|
require "retl/handlers/filter_handler"
|
4
4
|
require "retl/handlers/inspect_handler"
|
5
5
|
require "retl/handlers/explode_handler"
|
6
|
+
require "retl/handlers/path_handler"
|
6
7
|
|
7
8
|
|
8
9
|
module Retl
|
@@ -58,5 +59,9 @@ module Retl
|
|
58
59
|
action ||= block
|
59
60
|
step(action, handler: ExplodeHandler)
|
60
61
|
end
|
62
|
+
|
63
|
+
def path(path, dependencies={}, &block)
|
64
|
+
@path.add_handler PathHandler.new(path, dependencies, &block)
|
65
|
+
end
|
61
66
|
end
|
62
67
|
end
|
data/lib/retl/transformation.rb
CHANGED
@@ -9,6 +9,7 @@ module Retl
|
|
9
9
|
@enumerable, @path, @options = enumerable, path, options
|
10
10
|
@context = Context.new(@path, @options)
|
11
11
|
@fork_data = ForkDataCollector.new(@context)
|
12
|
+
@forks = {}
|
12
13
|
end
|
13
14
|
|
14
15
|
def each(&block)
|
@@ -29,8 +30,12 @@ module Retl
|
|
29
30
|
end
|
30
31
|
|
31
32
|
def forks(name)
|
32
|
-
|
33
|
-
|
33
|
+
unless @forks[name]
|
34
|
+
build_each_result
|
35
|
+
@forks[name] = @path.forks(name).transform(@fork_data.take(name), @options)
|
36
|
+
end
|
37
|
+
|
38
|
+
@forks[name]
|
34
39
|
end
|
35
40
|
|
36
41
|
def load_into(*destinations)
|
@@ -55,6 +60,7 @@ module Retl
|
|
55
60
|
yield data if block_given?
|
56
61
|
result << data
|
57
62
|
end
|
63
|
+
result
|
58
64
|
end
|
59
65
|
end
|
60
66
|
|
data/lib/retl/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: retl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Biehl
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -62,6 +62,7 @@ files:
|
|
62
62
|
- ".gitignore"
|
63
63
|
- ".rspec"
|
64
64
|
- ".travis.yml"
|
65
|
+
- CHANGELOG.md
|
65
66
|
- CODE_OF_CONDUCT.md
|
66
67
|
- Gemfile
|
67
68
|
- LICENSE.txt
|
@@ -78,6 +79,7 @@ files:
|
|
78
79
|
- lib/retl/handlers/fork_handler.rb
|
79
80
|
- lib/retl/handlers/handler.rb
|
80
81
|
- lib/retl/handlers/inspect_handler.rb
|
82
|
+
- lib/retl/handlers/path_handler.rb
|
81
83
|
- lib/retl/handlers/step_handler.rb
|
82
84
|
- lib/retl/handlers/transform_handler.rb
|
83
85
|
- lib/retl/path.rb
|