retl 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5fb3e97322814aed123d196693eacd2eb5560719
4
- data.tar.gz: 778407acae9f684ddaa8b746d0c8dd751ed2e9be
3
+ metadata.gz: b72439339d6a06a2bef173718fbc40d953ee6cae
4
+ data.tar.gz: 6a35ec42b81631654b2e0a4ced399628e279185d
5
5
  SHA512:
6
- metadata.gz: 8b0182706dbf3a4350d53cb967b576075a9fbf85dd1dd1efb778bc0bd4d648b15bddea9904ece0a973bf15896d60ed38ae4833f09b1c5ffc986f48650415fa8d
7
- data.tar.gz: 29a71295ac8f7025fde450c1f4f079e100fe082c2891d20fe34950f28f588c12affe2a0e1e8bcc50a3bd43ca20e80ad6ea97f9839fccf8535514b6c43a93fe84
6
+ metadata.gz: 7b6b0431f65fa58a2d9a0f85779d3b284d81f51a9528dee74cf90ca334380f4a76ad92d88ec35e9fa3047552d8f6eb33dc4844f7608e745c03d2f43208d5c0ba
7
+ data.tar.gz: baef9bc311e17363a1aafeff0ad913184e2f6121b4241c11e0b82af13a762fa6e227a5e2681039b5187d587dad15d7456b61a12cc43c020625dd5c9f10e09089
@@ -1,5 +1,14 @@
1
1
  # Change Log
2
2
 
3
+ ### 0.0.6
4
+
5
+ - Improvement: No longer attempts to memoize transformation results. If you
6
+ want to re-iterate over the results, store them with `#to_a`
7
+ - New Feature: adds support for horizontal threading between the source
8
+ and the transformation and the transformation and the load.
9
+ - Bug Fix: Defines single methods for dependencies on a Context instead
10
+ of insteance methods for the whole class
11
+
3
12
  ### 0.0.5
4
13
 
5
14
  - New Feature: Step descriptions allow steps to be described while being
data/README.md CHANGED
@@ -424,6 +424,17 @@ result.errors.to_a
424
424
  # above for use.
425
425
  ```
426
426
 
427
+ ### Multi-Thread Support
428
+
429
+ Since extracting and loading can be I/O intense operations, it makes sense
430
+ to run them in their own threads. This can be done with the `Path#transform!`
431
+ method. It works just like `Path#transform` only it will separate extration,
432
+ transformation and loading into separate threads.
433
+
434
+ ```
435
+ path.transform!(data)
436
+ ```
437
+
427
438
  ## Roadmap
428
439
 
429
440
  Currently the rETL gem's strengths are transforming data and code reuse. However
@@ -2,6 +2,10 @@ class Configuration
2
2
  attr_accessor :raise_errors
3
3
 
4
4
  def initialize
5
+ reset!
6
+ end
7
+
8
+ def reset!
5
9
  self.raise_errors = true
6
10
  end
7
11
  end
@@ -8,7 +8,7 @@ module Retl
8
8
  raise ArgumentError, "This transformation depends on `name`"
9
9
  end
10
10
 
11
- self.class.send(:define_method, name) do
11
+ define_singleton_method(name) do
12
12
  (dependency && dependency.call(options)) || options[name]
13
13
  end
14
14
  end
@@ -0,0 +1,45 @@
1
+ module Retl
2
+ class DefaultExecution
3
+ def initialize(enumerable, path, context, errors)
4
+ @enumerable, @path, @context, @errors = enumerable, path, context, errors
5
+ @executed = false
6
+ end
7
+
8
+ def each(&block)
9
+ @executed = true
10
+ @enumerable.each do |data|
11
+ execute(data, &block)
12
+ end
13
+ end
14
+
15
+ def execute(input)
16
+ @path.call(input, @context).each do |data|
17
+ yield data if block_given?
18
+ end
19
+ rescue StepExecutionError => e
20
+ if Retl.configuration.raise_errors
21
+ raise e
22
+ else
23
+ @errors << e
24
+ end
25
+ end
26
+
27
+ def load_into(*destinations)
28
+ destinations = Array(destinations)
29
+
30
+ each do |data|
31
+ destinations.each do |destination|
32
+ destination << data
33
+ end
34
+ end
35
+
36
+ destinations.each do |destination|
37
+ destination.close if destination.respond_to?(:close)
38
+ end
39
+ end
40
+
41
+ def executed?
42
+ @executed
43
+ end
44
+ end
45
+ end
@@ -139,6 +139,9 @@ module Retl
139
139
  # @return [void]
140
140
  def add_fork(name, &block)
141
141
  fork = Path.new(&block)
142
+ @dependencies.each do |name, dependency|
143
+ fork.add_dependency name, dependency
144
+ end
142
145
  add_handler ForkHandler.new(name)
143
146
  @forks[name] = fork
144
147
  end
@@ -183,5 +186,11 @@ module Retl
183
186
  def transform(enumerable, options={})
184
187
  Transformation.new(enumerable, self, options)
185
188
  end
189
+
190
+ def transform!(enumerable, options={})
191
+ transform(enumerable, options).tap do |transformation|
192
+ transformation.execution_strategy = ThreadedExecution
193
+ end
194
+ end
186
195
  end
187
196
  end
@@ -0,0 +1,43 @@
1
+ module Retl
2
+ class ThreadedExecution < DefaultExecution
3
+ def each(&block)
4
+ @executed = true
5
+ queue = Queue.new
6
+
7
+ producer = Thread.new do
8
+ @enumerable.each { |item| queue.push item }
9
+ queue.push :eoq
10
+ end
11
+
12
+ while((data = queue.pop) != :eoq)
13
+ execute(data, &block)
14
+ end
15
+
16
+ producer.join
17
+ end
18
+
19
+ def load_into(*destinations)
20
+ destinations = Array(destinations)
21
+ queue = Queue.new
22
+
23
+ producer = Thread.new do
24
+ each do |data|
25
+ queue.push data
26
+ end
27
+ queue.push :eoq
28
+ end
29
+
30
+ while((data = queue.pop) != :eoq)
31
+ destinations.each do |destination|
32
+ destination << data
33
+ end
34
+ end
35
+
36
+ producer.join
37
+
38
+ destinations.each do |destination|
39
+ destination.close if destination.respond_to?(:close)
40
+ end
41
+ end
42
+ end
43
+ end
@@ -1,89 +1,56 @@
1
1
  require "retl/context"
2
2
  require "retl/fork_data_collector"
3
3
  require "retl/errors/step_execution_error"
4
+ require_relative "default_execution"
5
+ require_relative "threaded_execution"
4
6
 
5
7
  module Retl
6
8
  class Transformation
7
9
  include Enumerable
8
10
 
11
+ attr_writer :execution_strategy
12
+
9
13
  def initialize(enumerable, path, options={})
10
14
  @enumerable, @path, @options = enumerable, path, options
11
15
  @context = Context.new(@path, @options)
12
16
  @fork_data = ForkDataCollector.new(@context)
13
17
  @forks = {}
14
18
  @errors = []
19
+ self.execution_strategy = DefaultExecution
20
+ end
21
+
22
+ def execution_strategy=(strategy)
23
+ @execution_strategy = strategy.new(@enumerable, @path, @context, @errors)
15
24
  end
16
25
 
17
26
  def each(&block)
18
- if @each
19
- @each.each(&block)
20
- else
21
- build_each_result(&block)
22
- end
27
+ @execution_strategy.each(&block)
23
28
  end
24
29
 
25
30
  def each_slice(size, &block)
26
- @each_slice ||= {}
27
- if @each_slice[size]
28
- @each_slice[size].each(&block)
29
- else
30
- build_each_slice_result(size, &block)
31
+ @enumerable.each_slice(size).map do |slice|
32
+ Transformation.new(slice, @path, @options).tap do |transformed_slice|
33
+ yield transformed_slice if block_given?
34
+ end
31
35
  end
32
36
  end
33
37
 
34
38
  def forks(name)
35
39
  unless @forks[name]
36
- build_each_result
40
+ each unless @execution_strategy.executed?
37
41
  @forks[name] = @path.forks(name).transform(@fork_data.take(name), @options)
42
+ @forks[name].execution_strategy = @execution_strategy.class
38
43
  end
39
44
 
40
45
  @forks[name]
41
46
  end
42
47
 
43
48
  def load_into(*destinations)
44
- destinations = Array(destinations)
45
-
46
- each do |data|
47
- destinations.each do |destination|
48
- destination << data
49
- end
50
- end
51
-
52
- destinations.each do |destination|
53
- destination.close if destination.respond_to?(:close)
54
- end
49
+ @execution_strategy.load_into(*destinations)
55
50
  end
56
51
 
57
52
  def errors
58
53
  @errors.each
59
54
  end
60
-
61
- private
62
-
63
- def build_each_result(&block)
64
- @each ||= @enumerable.reduce([]) do |result, data|
65
- begin
66
- @path.call(data, @context).each do |data|
67
- yield data if block_given?
68
- result << data
69
- end
70
- rescue StepExecutionError => e
71
- if Retl.configuration.raise_errors
72
- raise e
73
- else
74
- @errors << e
75
- end
76
- end
77
- result
78
- end
79
- end
80
-
81
- def build_each_slice_result(size, &block)
82
- @each_slice[size] ||= @enumerable.each_slice(size).reduce([]) do |result, slice|
83
- transformed_slice = Transformation.new(slice, @path, @options)
84
- yield transformed_slice if block_given?
85
- result << transformed_slice
86
- end
87
- end
88
55
  end
89
56
  end
@@ -1,3 +1,3 @@
1
1
  module Retl
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: retl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Biehl
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-11-23 00:00:00.000000000 Z
11
+ date: 2015-12-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -73,6 +73,7 @@ files:
73
73
  - lib/retl.rb
74
74
  - lib/retl/configuration.rb
75
75
  - lib/retl/context.rb
76
+ - lib/retl/default_execution.rb
76
77
  - lib/retl/errors/step_execution_error.rb
77
78
  - lib/retl/event_router.rb
78
79
  - lib/retl/fork_data_collector.rb
@@ -87,6 +88,7 @@ files:
87
88
  - lib/retl/next_description.rb
88
89
  - lib/retl/path.rb
89
90
  - lib/retl/path_builder.rb
91
+ - lib/retl/threaded_execution.rb
90
92
  - lib/retl/transformation.rb
91
93
  - lib/retl/version.rb
92
94
  - retl.gemspec