retl 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 65892cccb09b0ee2bde26342b072f3c405b4b735
4
- data.tar.gz: b56d3d24285ce3c40aa5e8e98924fe2cf0a09341
3
+ metadata.gz: 1262b0170c7b1fdbd343d510a6e8bab6edb3bc01
4
+ data.tar.gz: 4eeafb6b53d22901171185786c297b3b18fbfcb6
5
5
  SHA512:
6
- metadata.gz: 50953eae75e94eb9383320c56268c0a18e3d1289d234509e13a90eb4c8d43ae2686c6adf6f45f45aed880680b0dd389f2a532a22555ed3bb140aa79a9f8f27cc
7
- data.tar.gz: 7cd0b87b33d5810820e978722f3c2e3b63f160a01b65ba4105e4f800135b43a2ec68d65eacf034d78542a8da9de6d3aca263a19df9dced3d600ef304952f9940
6
+ metadata.gz: ff134b73c4e49380cb63b6a2251d6d2a1823c8ff4c8c87fbae64860d0e96bfb4b740b3670a2858db00e1aac84ffdc7cb8251aeb93750e4544a563c5b4c2d0233
7
+ data.tar.gz: f02e88e10931ac4ff35e33223f539bb408e55faf2e9d5d7212fbeffd871a410f10fdc3f8df131b1b7be71f1370fea807839420f09e2cb4703b3b26e84bc429f1
@@ -0,0 +1,18 @@
1
+ # Change Log
2
+
3
+ ### 0.0.4
4
+
5
+ - Bug Fix: fixes memoized fork results
6
+ - Bug Fix: fixes memoized transformation results
7
+ - New Feature: added the `path` step to include other paths
8
+ - New Feature: paths that include other paths can now resolve their dependencies
9
+ with a hash or a block
10
+ - Improvement: Dependencies that don't define a block will raise an
11
+ ArgumentError if the dependency is not resolved during translation. The block
12
+ then becomes the default value of the dependency when it is not passed in.
13
+ - Specs: Added specs are all of the README examples.
14
+
15
+
16
+ ### 0.0.3
17
+
18
+ - Proof of concept phase; too much going on too fast.
data/README.md CHANGED
@@ -44,8 +44,8 @@ returned, which is `Enumerable`.
44
44
 
45
45
  ```
46
46
  data = [
47
- { full_name: "David" , last_name: "Biehl" },
48
- { full_name: "Indiana", last_name: "Jones" }
47
+ { first_name: "David" , last_name: "Biehl" },
48
+ { first_name: "Indiana", last_name: "Jones" }
49
49
  ]
50
50
 
51
51
  result = my_path.transform(data)
@@ -70,15 +70,25 @@ common use is to use `replace` as the first step to convert incoming objects
70
70
  into hashes.
71
71
 
72
72
  ```
73
- users = Users.where(active: true) # a bunch of ActiveRecord objects
73
+ users = [
74
+ ["David", "Biehl", 33],
75
+ ["Indiana", "Jones", 50]
76
+ ]
74
77
 
75
78
  my_path = Retl::Path.new do
76
- replace do |user|
77
- user.to_hash
79
+ replace do |row|
80
+ { first_name: row[0], last_name: row[1], age: row[2] }
78
81
  end
79
82
 
80
83
  # perform other steps with the hash
81
84
  end
85
+
86
+ result = my_path.transform(users)
87
+ result.to_a
88
+ #=> [
89
+ #=> {:first_name=>"David", :last_name=>"Biehl", :age=>33},
90
+ #=> {:first_name=>"Indiana", :last_name=>"Jones", :age=>50}
91
+ #=> ]
82
92
  ```
83
93
 
84
94
  #### Filter & Reject
@@ -89,9 +99,9 @@ truthy. `select` is an alias for `filter`.
89
99
 
90
100
  ```
91
101
  data = [
92
- {name: "David" , age: 33}
93
- {name: "Indiana", age: 50}
94
- {name: "Sully" , age: 7}
102
+ {name: "David" , age: 33},
103
+ {name: "Indiana", age: 50},
104
+ {name: "Sully" , age: 7},
95
105
  {name: "Boo" , age: 3}
96
106
  ]
97
107
 
@@ -111,7 +121,8 @@ end
111
121
 
112
122
  result = my_path.transform(data)
113
123
  result.to_a
114
- #=> [ { name: "David", ... } ]
124
+ #=> [{:name=>"David", :age=>33, :adult_or_child=>"adult"}]
125
+
115
126
  ```
116
127
 
117
128
  #### Calculate
@@ -162,9 +173,9 @@ are unaffected by any steps that take place after the fork is defined.
162
173
 
163
174
  ```
164
175
  data = [
165
- { name: "David" , age: 33 }
166
- { name: "Indiana", age: 50 }
167
- { name: "Sully" , age: 7 }
176
+ { name: "David" , age: 33 },
177
+ { name: "Indiana", age: 50 },
178
+ { name: "Sully" , age: 7 },
168
179
  { name: "Boo" , age: 3 }
169
180
  ]
170
181
 
@@ -208,7 +219,7 @@ The `explode` step adds additional data to the Path. The return value of the
208
219
  block should respond to `#each`, like an Array.
209
220
 
210
221
  ```
211
- my_path = Reth::Path.new do
222
+ my_path = Retl::Path.new do
212
223
  explode do |number|
213
224
  number.times.map { |x| x + x + x }
214
225
  end
@@ -218,32 +229,39 @@ my_path = Reth::Path.new do
218
229
  end
219
230
  end
220
231
 
221
- my_path.transform(6).to_a
232
+ result = my_path.transform([6])
233
+ result.to_a
222
234
  #=> [3, 9, 15]
235
+
236
+ expect(result.to_a).to eq([3, 9, 15])
223
237
  ```
224
238
 
225
239
  #### Path Reuse
226
240
 
227
- In rETL, Paths can be re-used with the `step` step. Common Paths can be defined
228
- to ensure that calculations yield consistent results throughout the entire ETL
229
- project. Consistent data and meanings will make the data warehouse easier to
230
- understand for data consumers.
241
+ In rETL, paths can be re-used with the `path` step. Common Paths can be
242
+ defined to ensure that calculations yield consistent results throughout the
243
+ entire ETL project. Consistent data and meanings will make the data warehouse
244
+ easier to understand for data consumers.
231
245
 
232
246
  ```
233
247
  AdultOrChild = Retl::Path.new do
234
- calculate(:adult_or_child) do
248
+ calculate(:adult_or_child) do |row|
235
249
  row[:age] >= 18 ? "adult" : "child"
236
250
  end
237
251
  end
238
252
 
239
253
  my_path = Retl::Path.new do
240
- step AdultOrChild
254
+ path AdultOrChild
241
255
 
242
- ### perform other steps
256
+ ### perform other steps, if necessary
243
257
  end
258
+
259
+ result = my_path.transform([{age: 3}])
260
+ result.to_a
261
+ #=> [ { age: 3, adult_or_child: "child" } ]
244
262
  ```
245
263
 
246
- ### Dependencies
264
+ #### Dependencies
247
265
 
248
266
  Dependencies can be defined with the `depends_on(name)`. The value of the
249
267
  dependency is accessible inside of each step by its name. In this example, we'll
@@ -251,21 +269,26 @@ define an `age_lookup` dependency.
251
269
 
252
270
  ```
253
271
  my_path = Retl::Path.new do
254
- depends_on(:age_lookup) do
272
+ depends_on(:age_lookup) do # hint: the block returns the default value
255
273
  {
256
274
  "adult" => "Adults are 18 or older",
257
275
  "child" => "Children are younger than 18"
258
276
  }
259
277
  end
260
278
 
261
- step AdultOrChild # see previous example
279
+ path AdultOrChild # see previous example
262
280
 
263
281
  calculate(:age_description) do |row|
264
282
  age_lookup[row[:adult_or_child]]
265
283
  end
266
284
  end
285
+
286
+ result = my_path.transform([{age: 19}])
287
+ result.to_a
267
288
  ```
268
289
 
290
+ ##### Dependency Injection
291
+
269
292
  Dependencies can also be injected when the transformation takes place. This is
270
293
  useful for testing by passing in mocks or stubs. Also, concrete results from
271
294
  other paths can be merged merged into a single path making data integration
@@ -273,11 +296,9 @@ possible.
273
296
 
274
297
  ```
275
298
  my_path = Retl::Path.new do
276
- depends_on(:age_lookup) do |options| # transformation options are passed into `depends_on`
277
- options[:age_lookup] || (raise ArgumentError, "This Path depends on an age lookup hash")
278
- end
279
-
280
- step AdultOrChild
299
+ depends_on(:age_lookup) # hint: without a block, the dependency must
300
+ # be provided when #transform is called
301
+ path AdultOrChild
281
302
 
282
303
  calculate(:age_description) do |row|
283
304
  age_lookup[row[:adult_or_child]]
@@ -289,8 +310,51 @@ age_lookup_hash = {
289
310
  "child" => "Children are younger than 18"
290
311
  }
291
312
 
292
- my_path.transform(data, age_lookup: age_lookup_hash)
313
+ result = my_path.transform([{age: 4}], age_lookup: age_lookup_hash)
314
+ result.to_a
315
+ #=> [ { age: 4, adult_or_child: "child", age_description: "Children are younger than 18" } ]
316
+ ```
317
+
318
+ #### Path Reuse with Dependencies
319
+
320
+ The `AdultOrChild` path above isn't very flexible. It depends on an `:age` key
321
+ to be present in the hash. What if our data uses a different key, like
322
+ `:years_since_birth`? rETL can make this more flexible by adding dependencies.
323
+
293
324
  ```
325
+ FlexibleAdultOrChild = Retl::Path.new do
326
+ depends_on(:from)
327
+
328
+ depends_on(:to) do |options|
329
+ options[:to] || :adult_or_child # use a default value
330
+ end
331
+
332
+ transform do |row|
333
+ row[to] = row[from] >= 18 ? "adult" : "child"
334
+ end
335
+ end
336
+
337
+
338
+ path_with_age = Retl::Path.new do
339
+ path FlexibleAdultOrChild, from: :age
340
+ end
341
+
342
+ path_with_age_result = path_with_age.transform([{age: 33}])
343
+ path_with_age_result.to_a
344
+ #=> [{age: 33, adult_or_child: "adult"}]
345
+
346
+
347
+ path_with_years_since_birth = Retl::Path.new do
348
+ path FlexibleAdultOrChild do
349
+ { from: :years_since_birth, to: :age_classification } # blocks work too
350
+ end
351
+ end
352
+
353
+ result = path_with_years_since_birth.transform([{years_since_birth: 7}])
354
+ result.to_a
355
+ #=> [{years_since_birth: 7, age_classification: "child"}]
356
+ ```
357
+
294
358
 
295
359
  ## Roadmap
296
360
 
@@ -306,6 +370,13 @@ path.transform(Enumerable)
306
370
  Enumerales in, Enumerales out. This makes the application of the gem pretty much
307
371
  universal for any type of data transformation requirement in Ruby.
308
372
 
373
+ ### Next Steps
374
+
375
+ - Error handling
376
+ - Tracing and logging
377
+ - Extract patterns
378
+ - Load patterns
379
+
309
380
  ## Installation
310
381
 
311
382
  Add this line to your application's Gemfile:
@@ -4,7 +4,13 @@ module Retl
4
4
  class Context
5
5
  def initialize(path, options={})
6
6
  path.dependencies.each do |name, dependency|
7
- self.class.send(:define_method, name) { dependency.call(options) }
7
+ if dependency.nil? && !options[name]
8
+ raise ArgumentError, "This transformation depends on `name`"
9
+ end
10
+
11
+ self.class.send(:define_method, name) do
12
+ (dependency && dependency.call(options)) || options[name]
13
+ end
8
14
  end
9
15
 
10
16
  @_events = EventRouter.new
@@ -0,0 +1,18 @@
1
+ require_relative "handler"
2
+
3
+ module Retl
4
+ class PathHandler < Handler
5
+ def initialize(path, dependencies={}, &block)
6
+ super()
7
+ @path = path
8
+ dependencies.merge!(block.call) if block
9
+ @context = Context.new(@path, dependencies)
10
+ end
11
+
12
+ def push_in(data, context)
13
+ @context.execute_step(@path, data).each do |result|
14
+ push_out(result)
15
+ end
16
+ end
17
+ end
18
+ end
@@ -3,6 +3,7 @@ require "retl/handlers/transform_handler"
3
3
  require "retl/handlers/filter_handler"
4
4
  require "retl/handlers/inspect_handler"
5
5
  require "retl/handlers/explode_handler"
6
+ require "retl/handlers/path_handler"
6
7
 
7
8
 
8
9
  module Retl
@@ -58,5 +59,9 @@ module Retl
58
59
  action ||= block
59
60
  step(action, handler: ExplodeHandler)
60
61
  end
62
+
63
+ def path(path, dependencies={}, &block)
64
+ @path.add_handler PathHandler.new(path, dependencies, &block)
65
+ end
61
66
  end
62
67
  end
@@ -9,6 +9,7 @@ module Retl
9
9
  @enumerable, @path, @options = enumerable, path, options
10
10
  @context = Context.new(@path, @options)
11
11
  @fork_data = ForkDataCollector.new(@context)
12
+ @forks = {}
12
13
  end
13
14
 
14
15
  def each(&block)
@@ -29,8 +30,12 @@ module Retl
29
30
  end
30
31
 
31
32
  def forks(name)
32
- build_each_result
33
- @path.forks(name).transform(@fork_data.take(name), @options)
33
+ unless @forks[name]
34
+ build_each_result
35
+ @forks[name] = @path.forks(name).transform(@fork_data.take(name), @options)
36
+ end
37
+
38
+ @forks[name]
34
39
  end
35
40
 
36
41
  def load_into(*destinations)
@@ -55,6 +60,7 @@ module Retl
55
60
  yield data if block_given?
56
61
  result << data
57
62
  end
63
+ result
58
64
  end
59
65
  end
60
66
 
@@ -1,3 +1,3 @@
1
1
  module Retl
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: retl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Biehl
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-11-20 00:00:00.000000000 Z
11
+ date: 2015-11-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -62,6 +62,7 @@ files:
62
62
  - ".gitignore"
63
63
  - ".rspec"
64
64
  - ".travis.yml"
65
+ - CHANGELOG.md
65
66
  - CODE_OF_CONDUCT.md
66
67
  - Gemfile
67
68
  - LICENSE.txt
@@ -78,6 +79,7 @@ files:
78
79
  - lib/retl/handlers/fork_handler.rb
79
80
  - lib/retl/handlers/handler.rb
80
81
  - lib/retl/handlers/inspect_handler.rb
82
+ - lib/retl/handlers/path_handler.rb
81
83
  - lib/retl/handlers/step_handler.rb
82
84
  - lib/retl/handlers/transform_handler.rb
83
85
  - lib/retl/path.rb