retl 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +11 -0
- data/lib/retl/configuration.rb +4 -0
- data/lib/retl/context.rb +1 -1
- data/lib/retl/default_execution.rb +45 -0
- data/lib/retl/path.rb +9 -0
- data/lib/retl/threaded_execution.rb +43 -0
- data/lib/retl/transformation.rb +17 -50
- data/lib/retl/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b72439339d6a06a2bef173718fbc40d953ee6cae
|
4
|
+
data.tar.gz: 6a35ec42b81631654b2e0a4ced399628e279185d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7b6b0431f65fa58a2d9a0f85779d3b284d81f51a9528dee74cf90ca334380f4a76ad92d88ec35e9fa3047552d8f6eb33dc4844f7608e745c03d2f43208d5c0ba
|
7
|
+
data.tar.gz: baef9bc311e17363a1aafeff0ad913184e2f6121b4241c11e0b82af13a762fa6e227a5e2681039b5187d587dad15d7456b61a12cc43c020625dd5c9f10e09089
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
# Change Log
|
2
2
|
|
3
|
+
### 0.0.6
|
4
|
+
|
5
|
+
- Improvement: No longer attempts to memoize transformation results. If you
|
6
|
+
want to re-iterate over the results, store them with `#to_a`
|
7
|
+
- New Feature: adds support for horizontal threading between the source
|
8
|
+
and the transformation and the transformation and the load.
|
9
|
+
- Bug Fix: Defines single methods for dependencies on a Context instead
|
10
|
+
of insteance methods for the whole class
|
11
|
+
|
3
12
|
### 0.0.5
|
4
13
|
|
5
14
|
- New Feature: Step descriptions allow steps to be described while being
|
data/README.md
CHANGED
@@ -424,6 +424,17 @@ result.errors.to_a
|
|
424
424
|
# above for use.
|
425
425
|
```
|
426
426
|
|
427
|
+
### Multi-Thread Support
|
428
|
+
|
429
|
+
Since extracting and loading can be I/O intense operations, it makes sense
|
430
|
+
to run them in their own threads. This can be done with the `Path#transform!`
|
431
|
+
method. It works just like `Path#transform` only it will separate extration,
|
432
|
+
transformation and loading into separate threads.
|
433
|
+
|
434
|
+
```
|
435
|
+
path.transform!(data)
|
436
|
+
```
|
437
|
+
|
427
438
|
## Roadmap
|
428
439
|
|
429
440
|
Currently the rETL gem's strengths are transforming data and code reuse. However
|
data/lib/retl/configuration.rb
CHANGED
data/lib/retl/context.rb
CHANGED
@@ -0,0 +1,45 @@
|
|
1
|
+
module Retl
|
2
|
+
class DefaultExecution
|
3
|
+
def initialize(enumerable, path, context, errors)
|
4
|
+
@enumerable, @path, @context, @errors = enumerable, path, context, errors
|
5
|
+
@executed = false
|
6
|
+
end
|
7
|
+
|
8
|
+
def each(&block)
|
9
|
+
@executed = true
|
10
|
+
@enumerable.each do |data|
|
11
|
+
execute(data, &block)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def execute(input)
|
16
|
+
@path.call(input, @context).each do |data|
|
17
|
+
yield data if block_given?
|
18
|
+
end
|
19
|
+
rescue StepExecutionError => e
|
20
|
+
if Retl.configuration.raise_errors
|
21
|
+
raise e
|
22
|
+
else
|
23
|
+
@errors << e
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def load_into(*destinations)
|
28
|
+
destinations = Array(destinations)
|
29
|
+
|
30
|
+
each do |data|
|
31
|
+
destinations.each do |destination|
|
32
|
+
destination << data
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
destinations.each do |destination|
|
37
|
+
destination.close if destination.respond_to?(:close)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def executed?
|
42
|
+
@executed
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/retl/path.rb
CHANGED
@@ -139,6 +139,9 @@ module Retl
|
|
139
139
|
# @return [void]
|
140
140
|
def add_fork(name, &block)
|
141
141
|
fork = Path.new(&block)
|
142
|
+
@dependencies.each do |name, dependency|
|
143
|
+
fork.add_dependency name, dependency
|
144
|
+
end
|
142
145
|
add_handler ForkHandler.new(name)
|
143
146
|
@forks[name] = fork
|
144
147
|
end
|
@@ -183,5 +186,11 @@ module Retl
|
|
183
186
|
def transform(enumerable, options={})
|
184
187
|
Transformation.new(enumerable, self, options)
|
185
188
|
end
|
189
|
+
|
190
|
+
def transform!(enumerable, options={})
|
191
|
+
transform(enumerable, options).tap do |transformation|
|
192
|
+
transformation.execution_strategy = ThreadedExecution
|
193
|
+
end
|
194
|
+
end
|
186
195
|
end
|
187
196
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Retl
|
2
|
+
class ThreadedExecution < DefaultExecution
|
3
|
+
def each(&block)
|
4
|
+
@executed = true
|
5
|
+
queue = Queue.new
|
6
|
+
|
7
|
+
producer = Thread.new do
|
8
|
+
@enumerable.each { |item| queue.push item }
|
9
|
+
queue.push :eoq
|
10
|
+
end
|
11
|
+
|
12
|
+
while((data = queue.pop) != :eoq)
|
13
|
+
execute(data, &block)
|
14
|
+
end
|
15
|
+
|
16
|
+
producer.join
|
17
|
+
end
|
18
|
+
|
19
|
+
def load_into(*destinations)
|
20
|
+
destinations = Array(destinations)
|
21
|
+
queue = Queue.new
|
22
|
+
|
23
|
+
producer = Thread.new do
|
24
|
+
each do |data|
|
25
|
+
queue.push data
|
26
|
+
end
|
27
|
+
queue.push :eoq
|
28
|
+
end
|
29
|
+
|
30
|
+
while((data = queue.pop) != :eoq)
|
31
|
+
destinations.each do |destination|
|
32
|
+
destination << data
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
producer.join
|
37
|
+
|
38
|
+
destinations.each do |destination|
|
39
|
+
destination.close if destination.respond_to?(:close)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/retl/transformation.rb
CHANGED
@@ -1,89 +1,56 @@
|
|
1
1
|
require "retl/context"
|
2
2
|
require "retl/fork_data_collector"
|
3
3
|
require "retl/errors/step_execution_error"
|
4
|
+
require_relative "default_execution"
|
5
|
+
require_relative "threaded_execution"
|
4
6
|
|
5
7
|
module Retl
|
6
8
|
class Transformation
|
7
9
|
include Enumerable
|
8
10
|
|
11
|
+
attr_writer :execution_strategy
|
12
|
+
|
9
13
|
def initialize(enumerable, path, options={})
|
10
14
|
@enumerable, @path, @options = enumerable, path, options
|
11
15
|
@context = Context.new(@path, @options)
|
12
16
|
@fork_data = ForkDataCollector.new(@context)
|
13
17
|
@forks = {}
|
14
18
|
@errors = []
|
19
|
+
self.execution_strategy = DefaultExecution
|
20
|
+
end
|
21
|
+
|
22
|
+
def execution_strategy=(strategy)
|
23
|
+
@execution_strategy = strategy.new(@enumerable, @path, @context, @errors)
|
15
24
|
end
|
16
25
|
|
17
26
|
def each(&block)
|
18
|
-
|
19
|
-
@each.each(&block)
|
20
|
-
else
|
21
|
-
build_each_result(&block)
|
22
|
-
end
|
27
|
+
@execution_strategy.each(&block)
|
23
28
|
end
|
24
29
|
|
25
30
|
def each_slice(size, &block)
|
26
|
-
@each_slice
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
build_each_slice_result(size, &block)
|
31
|
+
@enumerable.each_slice(size).map do |slice|
|
32
|
+
Transformation.new(slice, @path, @options).tap do |transformed_slice|
|
33
|
+
yield transformed_slice if block_given?
|
34
|
+
end
|
31
35
|
end
|
32
36
|
end
|
33
37
|
|
34
38
|
def forks(name)
|
35
39
|
unless @forks[name]
|
36
|
-
|
40
|
+
each unless @execution_strategy.executed?
|
37
41
|
@forks[name] = @path.forks(name).transform(@fork_data.take(name), @options)
|
42
|
+
@forks[name].execution_strategy = @execution_strategy.class
|
38
43
|
end
|
39
44
|
|
40
45
|
@forks[name]
|
41
46
|
end
|
42
47
|
|
43
48
|
def load_into(*destinations)
|
44
|
-
|
45
|
-
|
46
|
-
each do |data|
|
47
|
-
destinations.each do |destination|
|
48
|
-
destination << data
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
destinations.each do |destination|
|
53
|
-
destination.close if destination.respond_to?(:close)
|
54
|
-
end
|
49
|
+
@execution_strategy.load_into(*destinations)
|
55
50
|
end
|
56
51
|
|
57
52
|
def errors
|
58
53
|
@errors.each
|
59
54
|
end
|
60
|
-
|
61
|
-
private
|
62
|
-
|
63
|
-
def build_each_result(&block)
|
64
|
-
@each ||= @enumerable.reduce([]) do |result, data|
|
65
|
-
begin
|
66
|
-
@path.call(data, @context).each do |data|
|
67
|
-
yield data if block_given?
|
68
|
-
result << data
|
69
|
-
end
|
70
|
-
rescue StepExecutionError => e
|
71
|
-
if Retl.configuration.raise_errors
|
72
|
-
raise e
|
73
|
-
else
|
74
|
-
@errors << e
|
75
|
-
end
|
76
|
-
end
|
77
|
-
result
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
def build_each_slice_result(size, &block)
|
82
|
-
@each_slice[size] ||= @enumerable.each_slice(size).reduce([]) do |result, slice|
|
83
|
-
transformed_slice = Transformation.new(slice, @path, @options)
|
84
|
-
yield transformed_slice if block_given?
|
85
|
-
result << transformed_slice
|
86
|
-
end
|
87
|
-
end
|
88
55
|
end
|
89
56
|
end
|
data/lib/retl/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: retl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Biehl
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -73,6 +73,7 @@ files:
|
|
73
73
|
- lib/retl.rb
|
74
74
|
- lib/retl/configuration.rb
|
75
75
|
- lib/retl/context.rb
|
76
|
+
- lib/retl/default_execution.rb
|
76
77
|
- lib/retl/errors/step_execution_error.rb
|
77
78
|
- lib/retl/event_router.rb
|
78
79
|
- lib/retl/fork_data_collector.rb
|
@@ -87,6 +88,7 @@ files:
|
|
87
88
|
- lib/retl/next_description.rb
|
88
89
|
- lib/retl/path.rb
|
89
90
|
- lib/retl/path_builder.rb
|
91
|
+
- lib/retl/threaded_execution.rb
|
90
92
|
- lib/retl/transformation.rb
|
91
93
|
- lib/retl/version.rb
|
92
94
|
- retl.gemspec
|