retl 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5fb3e97322814aed123d196693eacd2eb5560719
4
- data.tar.gz: 778407acae9f684ddaa8b746d0c8dd751ed2e9be
3
+ metadata.gz: b72439339d6a06a2bef173718fbc40d953ee6cae
4
+ data.tar.gz: 6a35ec42b81631654b2e0a4ced399628e279185d
5
5
  SHA512:
6
- metadata.gz: 8b0182706dbf3a4350d53cb967b576075a9fbf85dd1dd1efb778bc0bd4d648b15bddea9904ece0a973bf15896d60ed38ae4833f09b1c5ffc986f48650415fa8d
7
- data.tar.gz: 29a71295ac8f7025fde450c1f4f079e100fe082c2891d20fe34950f28f588c12affe2a0e1e8bcc50a3bd43ca20e80ad6ea97f9839fccf8535514b6c43a93fe84
6
+ metadata.gz: 7b6b0431f65fa58a2d9a0f85779d3b284d81f51a9528dee74cf90ca334380f4a76ad92d88ec35e9fa3047552d8f6eb33dc4844f7608e745c03d2f43208d5c0ba
7
+ data.tar.gz: baef9bc311e17363a1aafeff0ad913184e2f6121b4241c11e0b82af13a762fa6e227a5e2681039b5187d587dad15d7456b61a12cc43c020625dd5c9f10e09089
@@ -1,5 +1,14 @@
1
1
  # Change Log
2
2
 
3
+ ### 0.0.6
4
+
5
+ - Improvement: No longer attempts to memoize transformation results. If you
6
+ want to re-iterate over the results, store them with `#to_a`
7
+ - New Feature: adds support for horizontal threading between the source
8
+ and the transformation and the transformation and the load.
9
+ - Bug Fix: Defines single methods for dependencies on a Context instead
10
+ of insteance methods for the whole class
11
+
3
12
  ### 0.0.5
4
13
 
5
14
  - New Feature: Step descriptions allow steps to be described while being
data/README.md CHANGED
@@ -424,6 +424,17 @@ result.errors.to_a
424
424
  # above for use.
425
425
  ```
426
426
 
427
+ ### Multi-Thread Support
428
+
429
+ Since extracting and loading can be I/O intense operations, it makes sense
430
+ to run them in their own threads. This can be done with the `Path#transform!`
431
+ method. It works just like `Path#transform` only it will separate extration,
432
+ transformation and loading into separate threads.
433
+
434
+ ```
435
+ path.transform!(data)
436
+ ```
437
+
427
438
  ## Roadmap
428
439
 
429
440
  Currently the rETL gem's strengths are transforming data and code reuse. However
@@ -2,6 +2,10 @@ class Configuration
2
2
  attr_accessor :raise_errors
3
3
 
4
4
  def initialize
5
+ reset!
6
+ end
7
+
8
+ def reset!
5
9
  self.raise_errors = true
6
10
  end
7
11
  end
@@ -8,7 +8,7 @@ module Retl
8
8
  raise ArgumentError, "This transformation depends on `name`"
9
9
  end
10
10
 
11
- self.class.send(:define_method, name) do
11
+ define_singleton_method(name) do
12
12
  (dependency && dependency.call(options)) || options[name]
13
13
  end
14
14
  end
@@ -0,0 +1,45 @@
1
+ module Retl
2
+ class DefaultExecution
3
+ def initialize(enumerable, path, context, errors)
4
+ @enumerable, @path, @context, @errors = enumerable, path, context, errors
5
+ @executed = false
6
+ end
7
+
8
+ def each(&block)
9
+ @executed = true
10
+ @enumerable.each do |data|
11
+ execute(data, &block)
12
+ end
13
+ end
14
+
15
+ def execute(input)
16
+ @path.call(input, @context).each do |data|
17
+ yield data if block_given?
18
+ end
19
+ rescue StepExecutionError => e
20
+ if Retl.configuration.raise_errors
21
+ raise e
22
+ else
23
+ @errors << e
24
+ end
25
+ end
26
+
27
+ def load_into(*destinations)
28
+ destinations = Array(destinations)
29
+
30
+ each do |data|
31
+ destinations.each do |destination|
32
+ destination << data
33
+ end
34
+ end
35
+
36
+ destinations.each do |destination|
37
+ destination.close if destination.respond_to?(:close)
38
+ end
39
+ end
40
+
41
+ def executed?
42
+ @executed
43
+ end
44
+ end
45
+ end
@@ -139,6 +139,9 @@ module Retl
139
139
  # @return [void]
140
140
  def add_fork(name, &block)
141
141
  fork = Path.new(&block)
142
+ @dependencies.each do |name, dependency|
143
+ fork.add_dependency name, dependency
144
+ end
142
145
  add_handler ForkHandler.new(name)
143
146
  @forks[name] = fork
144
147
  end
@@ -183,5 +186,11 @@ module Retl
183
186
  def transform(enumerable, options={})
184
187
  Transformation.new(enumerable, self, options)
185
188
  end
189
+
190
+ def transform!(enumerable, options={})
191
+ transform(enumerable, options).tap do |transformation|
192
+ transformation.execution_strategy = ThreadedExecution
193
+ end
194
+ end
186
195
  end
187
196
  end
@@ -0,0 +1,43 @@
1
+ module Retl
2
+ class ThreadedExecution < DefaultExecution
3
+ def each(&block)
4
+ @executed = true
5
+ queue = Queue.new
6
+
7
+ producer = Thread.new do
8
+ @enumerable.each { |item| queue.push item }
9
+ queue.push :eoq
10
+ end
11
+
12
+ while((data = queue.pop) != :eoq)
13
+ execute(data, &block)
14
+ end
15
+
16
+ producer.join
17
+ end
18
+
19
+ def load_into(*destinations)
20
+ destinations = Array(destinations)
21
+ queue = Queue.new
22
+
23
+ producer = Thread.new do
24
+ each do |data|
25
+ queue.push data
26
+ end
27
+ queue.push :eoq
28
+ end
29
+
30
+ while((data = queue.pop) != :eoq)
31
+ destinations.each do |destination|
32
+ destination << data
33
+ end
34
+ end
35
+
36
+ producer.join
37
+
38
+ destinations.each do |destination|
39
+ destination.close if destination.respond_to?(:close)
40
+ end
41
+ end
42
+ end
43
+ end
@@ -1,89 +1,56 @@
1
1
  require "retl/context"
2
2
  require "retl/fork_data_collector"
3
3
  require "retl/errors/step_execution_error"
4
+ require_relative "default_execution"
5
+ require_relative "threaded_execution"
4
6
 
5
7
  module Retl
6
8
  class Transformation
7
9
  include Enumerable
8
10
 
11
+ attr_writer :execution_strategy
12
+
9
13
  def initialize(enumerable, path, options={})
10
14
  @enumerable, @path, @options = enumerable, path, options
11
15
  @context = Context.new(@path, @options)
12
16
  @fork_data = ForkDataCollector.new(@context)
13
17
  @forks = {}
14
18
  @errors = []
19
+ self.execution_strategy = DefaultExecution
20
+ end
21
+
22
+ def execution_strategy=(strategy)
23
+ @execution_strategy = strategy.new(@enumerable, @path, @context, @errors)
15
24
  end
16
25
 
17
26
  def each(&block)
18
- if @each
19
- @each.each(&block)
20
- else
21
- build_each_result(&block)
22
- end
27
+ @execution_strategy.each(&block)
23
28
  end
24
29
 
25
30
  def each_slice(size, &block)
26
- @each_slice ||= {}
27
- if @each_slice[size]
28
- @each_slice[size].each(&block)
29
- else
30
- build_each_slice_result(size, &block)
31
+ @enumerable.each_slice(size).map do |slice|
32
+ Transformation.new(slice, @path, @options).tap do |transformed_slice|
33
+ yield transformed_slice if block_given?
34
+ end
31
35
  end
32
36
  end
33
37
 
34
38
  def forks(name)
35
39
  unless @forks[name]
36
- build_each_result
40
+ each unless @execution_strategy.executed?
37
41
  @forks[name] = @path.forks(name).transform(@fork_data.take(name), @options)
42
+ @forks[name].execution_strategy = @execution_strategy.class
38
43
  end
39
44
 
40
45
  @forks[name]
41
46
  end
42
47
 
43
48
  def load_into(*destinations)
44
- destinations = Array(destinations)
45
-
46
- each do |data|
47
- destinations.each do |destination|
48
- destination << data
49
- end
50
- end
51
-
52
- destinations.each do |destination|
53
- destination.close if destination.respond_to?(:close)
54
- end
49
+ @execution_strategy.load_into(*destinations)
55
50
  end
56
51
 
57
52
  def errors
58
53
  @errors.each
59
54
  end
60
-
61
- private
62
-
63
- def build_each_result(&block)
64
- @each ||= @enumerable.reduce([]) do |result, data|
65
- begin
66
- @path.call(data, @context).each do |data|
67
- yield data if block_given?
68
- result << data
69
- end
70
- rescue StepExecutionError => e
71
- if Retl.configuration.raise_errors
72
- raise e
73
- else
74
- @errors << e
75
- end
76
- end
77
- result
78
- end
79
- end
80
-
81
- def build_each_slice_result(size, &block)
82
- @each_slice[size] ||= @enumerable.each_slice(size).reduce([]) do |result, slice|
83
- transformed_slice = Transformation.new(slice, @path, @options)
84
- yield transformed_slice if block_given?
85
- result << transformed_slice
86
- end
87
- end
88
55
  end
89
56
  end
@@ -1,3 +1,3 @@
1
1
  module Retl
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: retl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Biehl
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-11-23 00:00:00.000000000 Z
11
+ date: 2015-12-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -73,6 +73,7 @@ files:
73
73
  - lib/retl.rb
74
74
  - lib/retl/configuration.rb
75
75
  - lib/retl/context.rb
76
+ - lib/retl/default_execution.rb
76
77
  - lib/retl/errors/step_execution_error.rb
77
78
  - lib/retl/event_router.rb
78
79
  - lib/retl/fork_data_collector.rb
@@ -87,6 +88,7 @@ files:
87
88
  - lib/retl/next_description.rb
88
89
  - lib/retl/path.rb
89
90
  - lib/retl/path_builder.rb
91
+ - lib/retl/threaded_execution.rb
90
92
  - lib/retl/transformation.rb
91
93
  - lib/retl/version.rb
92
94
  - retl.gemspec