retl 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +11 -0
- data/lib/retl/configuration.rb +4 -0
- data/lib/retl/context.rb +1 -1
- data/lib/retl/default_execution.rb +45 -0
- data/lib/retl/path.rb +9 -0
- data/lib/retl/threaded_execution.rb +43 -0
- data/lib/retl/transformation.rb +17 -50
- data/lib/retl/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b72439339d6a06a2bef173718fbc40d953ee6cae
|
4
|
+
data.tar.gz: 6a35ec42b81631654b2e0a4ced399628e279185d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7b6b0431f65fa58a2d9a0f85779d3b284d81f51a9528dee74cf90ca334380f4a76ad92d88ec35e9fa3047552d8f6eb33dc4844f7608e745c03d2f43208d5c0ba
|
7
|
+
data.tar.gz: baef9bc311e17363a1aafeff0ad913184e2f6121b4241c11e0b82af13a762fa6e227a5e2681039b5187d587dad15d7456b61a12cc43c020625dd5c9f10e09089
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
# Change Log
|
2
2
|
|
3
|
+
### 0.0.6
|
4
|
+
|
5
|
+
- Improvement: No longer attempts to memoize transformation results. If you
|
6
|
+
want to re-iterate over the results, store them with `#to_a`
|
7
|
+
- New Feature: adds support for horizontal threading between the source
|
8
|
+
and the transformation and the transformation and the load.
|
9
|
+
- Bug Fix: Defines single methods for dependencies on a Context instead
|
10
|
+
of insteance methods for the whole class
|
11
|
+
|
3
12
|
### 0.0.5
|
4
13
|
|
5
14
|
- New Feature: Step descriptions allow steps to be described while being
|
data/README.md
CHANGED
@@ -424,6 +424,17 @@ result.errors.to_a
|
|
424
424
|
# above for use.
|
425
425
|
```
|
426
426
|
|
427
|
+
### Multi-Thread Support
|
428
|
+
|
429
|
+
Since extracting and loading can be I/O intense operations, it makes sense
|
430
|
+
to run them in their own threads. This can be done with the `Path#transform!`
|
431
|
+
method. It works just like `Path#transform` only it will separate extration,
|
432
|
+
transformation and loading into separate threads.
|
433
|
+
|
434
|
+
```
|
435
|
+
path.transform!(data)
|
436
|
+
```
|
437
|
+
|
427
438
|
## Roadmap
|
428
439
|
|
429
440
|
Currently the rETL gem's strengths are transforming data and code reuse. However
|
data/lib/retl/configuration.rb
CHANGED
data/lib/retl/context.rb
CHANGED
@@ -0,0 +1,45 @@
|
|
1
|
+
module Retl
|
2
|
+
class DefaultExecution
|
3
|
+
def initialize(enumerable, path, context, errors)
|
4
|
+
@enumerable, @path, @context, @errors = enumerable, path, context, errors
|
5
|
+
@executed = false
|
6
|
+
end
|
7
|
+
|
8
|
+
def each(&block)
|
9
|
+
@executed = true
|
10
|
+
@enumerable.each do |data|
|
11
|
+
execute(data, &block)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def execute(input)
|
16
|
+
@path.call(input, @context).each do |data|
|
17
|
+
yield data if block_given?
|
18
|
+
end
|
19
|
+
rescue StepExecutionError => e
|
20
|
+
if Retl.configuration.raise_errors
|
21
|
+
raise e
|
22
|
+
else
|
23
|
+
@errors << e
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def load_into(*destinations)
|
28
|
+
destinations = Array(destinations)
|
29
|
+
|
30
|
+
each do |data|
|
31
|
+
destinations.each do |destination|
|
32
|
+
destination << data
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
destinations.each do |destination|
|
37
|
+
destination.close if destination.respond_to?(:close)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def executed?
|
42
|
+
@executed
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/retl/path.rb
CHANGED
@@ -139,6 +139,9 @@ module Retl
|
|
139
139
|
# @return [void]
|
140
140
|
def add_fork(name, &block)
|
141
141
|
fork = Path.new(&block)
|
142
|
+
@dependencies.each do |name, dependency|
|
143
|
+
fork.add_dependency name, dependency
|
144
|
+
end
|
142
145
|
add_handler ForkHandler.new(name)
|
143
146
|
@forks[name] = fork
|
144
147
|
end
|
@@ -183,5 +186,11 @@ module Retl
|
|
183
186
|
def transform(enumerable, options={})
|
184
187
|
Transformation.new(enumerable, self, options)
|
185
188
|
end
|
189
|
+
|
190
|
+
def transform!(enumerable, options={})
|
191
|
+
transform(enumerable, options).tap do |transformation|
|
192
|
+
transformation.execution_strategy = ThreadedExecution
|
193
|
+
end
|
194
|
+
end
|
186
195
|
end
|
187
196
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Retl
|
2
|
+
class ThreadedExecution < DefaultExecution
|
3
|
+
def each(&block)
|
4
|
+
@executed = true
|
5
|
+
queue = Queue.new
|
6
|
+
|
7
|
+
producer = Thread.new do
|
8
|
+
@enumerable.each { |item| queue.push item }
|
9
|
+
queue.push :eoq
|
10
|
+
end
|
11
|
+
|
12
|
+
while((data = queue.pop) != :eoq)
|
13
|
+
execute(data, &block)
|
14
|
+
end
|
15
|
+
|
16
|
+
producer.join
|
17
|
+
end
|
18
|
+
|
19
|
+
def load_into(*destinations)
|
20
|
+
destinations = Array(destinations)
|
21
|
+
queue = Queue.new
|
22
|
+
|
23
|
+
producer = Thread.new do
|
24
|
+
each do |data|
|
25
|
+
queue.push data
|
26
|
+
end
|
27
|
+
queue.push :eoq
|
28
|
+
end
|
29
|
+
|
30
|
+
while((data = queue.pop) != :eoq)
|
31
|
+
destinations.each do |destination|
|
32
|
+
destination << data
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
producer.join
|
37
|
+
|
38
|
+
destinations.each do |destination|
|
39
|
+
destination.close if destination.respond_to?(:close)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/retl/transformation.rb
CHANGED
@@ -1,89 +1,56 @@
|
|
1
1
|
require "retl/context"
|
2
2
|
require "retl/fork_data_collector"
|
3
3
|
require "retl/errors/step_execution_error"
|
4
|
+
require_relative "default_execution"
|
5
|
+
require_relative "threaded_execution"
|
4
6
|
|
5
7
|
module Retl
|
6
8
|
class Transformation
|
7
9
|
include Enumerable
|
8
10
|
|
11
|
+
attr_writer :execution_strategy
|
12
|
+
|
9
13
|
def initialize(enumerable, path, options={})
|
10
14
|
@enumerable, @path, @options = enumerable, path, options
|
11
15
|
@context = Context.new(@path, @options)
|
12
16
|
@fork_data = ForkDataCollector.new(@context)
|
13
17
|
@forks = {}
|
14
18
|
@errors = []
|
19
|
+
self.execution_strategy = DefaultExecution
|
20
|
+
end
|
21
|
+
|
22
|
+
def execution_strategy=(strategy)
|
23
|
+
@execution_strategy = strategy.new(@enumerable, @path, @context, @errors)
|
15
24
|
end
|
16
25
|
|
17
26
|
def each(&block)
|
18
|
-
|
19
|
-
@each.each(&block)
|
20
|
-
else
|
21
|
-
build_each_result(&block)
|
22
|
-
end
|
27
|
+
@execution_strategy.each(&block)
|
23
28
|
end
|
24
29
|
|
25
30
|
def each_slice(size, &block)
|
26
|
-
@each_slice
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
build_each_slice_result(size, &block)
|
31
|
+
@enumerable.each_slice(size).map do |slice|
|
32
|
+
Transformation.new(slice, @path, @options).tap do |transformed_slice|
|
33
|
+
yield transformed_slice if block_given?
|
34
|
+
end
|
31
35
|
end
|
32
36
|
end
|
33
37
|
|
34
38
|
def forks(name)
|
35
39
|
unless @forks[name]
|
36
|
-
|
40
|
+
each unless @execution_strategy.executed?
|
37
41
|
@forks[name] = @path.forks(name).transform(@fork_data.take(name), @options)
|
42
|
+
@forks[name].execution_strategy = @execution_strategy.class
|
38
43
|
end
|
39
44
|
|
40
45
|
@forks[name]
|
41
46
|
end
|
42
47
|
|
43
48
|
def load_into(*destinations)
|
44
|
-
|
45
|
-
|
46
|
-
each do |data|
|
47
|
-
destinations.each do |destination|
|
48
|
-
destination << data
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
destinations.each do |destination|
|
53
|
-
destination.close if destination.respond_to?(:close)
|
54
|
-
end
|
49
|
+
@execution_strategy.load_into(*destinations)
|
55
50
|
end
|
56
51
|
|
57
52
|
def errors
|
58
53
|
@errors.each
|
59
54
|
end
|
60
|
-
|
61
|
-
private
|
62
|
-
|
63
|
-
def build_each_result(&block)
|
64
|
-
@each ||= @enumerable.reduce([]) do |result, data|
|
65
|
-
begin
|
66
|
-
@path.call(data, @context).each do |data|
|
67
|
-
yield data if block_given?
|
68
|
-
result << data
|
69
|
-
end
|
70
|
-
rescue StepExecutionError => e
|
71
|
-
if Retl.configuration.raise_errors
|
72
|
-
raise e
|
73
|
-
else
|
74
|
-
@errors << e
|
75
|
-
end
|
76
|
-
end
|
77
|
-
result
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
def build_each_slice_result(size, &block)
|
82
|
-
@each_slice[size] ||= @enumerable.each_slice(size).reduce([]) do |result, slice|
|
83
|
-
transformed_slice = Transformation.new(slice, @path, @options)
|
84
|
-
yield transformed_slice if block_given?
|
85
|
-
result << transformed_slice
|
86
|
-
end
|
87
|
-
end
|
88
55
|
end
|
89
56
|
end
|
data/lib/retl/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: retl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Biehl
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -73,6 +73,7 @@ files:
|
|
73
73
|
- lib/retl.rb
|
74
74
|
- lib/retl/configuration.rb
|
75
75
|
- lib/retl/context.rb
|
76
|
+
- lib/retl/default_execution.rb
|
76
77
|
- lib/retl/errors/step_execution_error.rb
|
77
78
|
- lib/retl/event_router.rb
|
78
79
|
- lib/retl/fork_data_collector.rb
|
@@ -87,6 +88,7 @@ files:
|
|
87
88
|
- lib/retl/next_description.rb
|
88
89
|
- lib/retl/path.rb
|
89
90
|
- lib/retl/path_builder.rb
|
91
|
+
- lib/retl/threaded_execution.rb
|
90
92
|
- lib/retl/transformation.rb
|
91
93
|
- lib/retl/version.rb
|
92
94
|
- retl.gemspec
|