rodimus 0.1.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ require 'rodimus'
2
+ require_relative 'log_input'
3
+ require_relative 'parse_connection'
4
+ require_relative 'file_output'
5
+
6
+ log = File.expand_path('../rails_example.log', __FILE__)
7
+ t = Rodimus::Transformation.new
8
+ step1 = LogInput.new(log)
9
+ step2 = ParseConnection.new
10
+ step3 = FileOutput.new
11
+ t.steps << step1
12
+ t.steps << step2
13
+ t.steps << step3
14
+ t.run
15
+
16
+ puts "Transformation complete!"
@@ -1,4 +1,8 @@
1
1
  require 'rodimus/configuration'
2
+ require 'rodimus/observable'
3
+ require 'rodimus/observing'
4
+ require 'rodimus/benchmark'
5
+ require 'rodimus/runtime_logging'
2
6
  require 'rodimus/step'
3
7
  require 'rodimus/transformation'
4
8
  require 'rodimus/version'
@@ -0,0 +1,48 @@
1
+ module Rodimus
2
+
3
+ class Benchmark
4
+ include Observing
5
+
6
+ attr_reader :stats
7
+
8
+ def on_notify(subject, event_type)
9
+ case event_type
10
+ when :before_run
11
+ initialize_stats
12
+ when :after_run
13
+ finalize_stats(subject)
14
+ when :before_row
15
+ before_row
16
+ when :after_row
17
+ after_row
18
+ end
19
+ end
20
+
21
+ private
22
+
23
+ def after_row
24
+ row_run_time = (Time.now.to_f - @start_time).round(4)
25
+ stats[:total] = (stats[:total] + row_run_time).round(4)
26
+ stats[:min] = row_run_time if stats[:min] > row_run_time
27
+ stats[:max] = row_run_time if stats[:max] < row_run_time
28
+ end
29
+
30
+ def before_row
31
+ stats[:count] += 1
32
+ @start_time = Time.now.to_f
33
+ end
34
+
35
+ def finalize_stats(subject)
36
+ if stats[:count] > 0
37
+ stats[:average] = (stats[:total] / stats[:count]).round(4)
38
+ end
39
+
40
+ Rodimus.logger.info "#{subject} benchmarks: #{stats}"
41
+ end
42
+
43
+ def initialize_stats
44
+ @stats = {count: 0, total: 0, min: 1, max: 0, average: 0}
45
+ end
46
+ end
47
+
48
+ end
@@ -3,10 +3,11 @@ require 'logger'
3
3
  module Rodimus
4
4
 
5
5
  class Configuration
6
- attr_accessor :logger
6
+ attr_accessor :logger, :benchmarking
7
7
 
8
8
  def initialize
9
9
  @logger = Logger.new(STDOUT)
10
+ @benchmarking = false
10
11
  end
11
12
  end
12
13
 
@@ -0,0 +1,17 @@
1
+ require 'set'
2
+
3
+ module Rodimus
4
+
5
+ module Observable
6
+ def notify(subject, event)
7
+ observers.each do |observer|
8
+ observer.on_notify(subject, event)
9
+ end
10
+ end
11
+
12
+ def observers
13
+ @observers ||= Set.new
14
+ end
15
+ end
16
+
17
+ end
@@ -0,0 +1,17 @@
1
+ module Rodimus
2
+
3
+ module Observing
4
+ def on_notify(subject, event_type)
5
+ discovered_hooks(event_type).each do |hook|
6
+ self.send(hook)
7
+ end
8
+ end
9
+
10
+ private
11
+
12
+ def discovered_hooks(matcher)
13
+ methods.grep(/^#{matcher}/)
14
+ end
15
+ end
16
+
17
+ end
@@ -0,0 +1,22 @@
1
+ module Rodimus
2
+
3
+ module RuntimeLogging
4
+ attr_reader :start_time
5
+
6
+ def before_run_record_time
7
+ @start_time = Time.now.to_i
8
+ Rodimus.logger.info "Running #{self}"
9
+ end
10
+
11
+ def after_run_record_time
12
+ run_time = Time.now.to_i - start_time
13
+ remaining_seconds = run_time % 3600
14
+ elapsed_hours = run_time / 3600
15
+ elapsed_minuntes = remaining_seconds / 60
16
+ elapsed_seconds = remaining_seconds % 60
17
+
18
+ Rodimus.logger.info "Finished #{self} after #{elapsed_hours} hours, #{elapsed_minuntes} minutes, #{elapsed_seconds} seconds."
19
+ end
20
+ end
21
+
22
+ end
@@ -1,6 +1,10 @@
1
1
  module Rodimus
2
2
 
3
- module Step
3
+ class Step
4
+ include Observable
5
+ include Observing # Steps observe themselves for run hooks
6
+ include RuntimeLogging
7
+
4
8
  # The incoming data stream. Can be anything that quacks like an IO
5
9
  attr_accessor :incoming
6
10
 
@@ -11,15 +15,17 @@ module Rodimus
11
15
  # This is initialized by the Transformation when the step begins to run.
12
16
  attr_accessor :shared_data
13
17
 
18
+ def initialize
19
+ observers << self
20
+ observers << Benchmark.new if Rodimus.configuration.benchmarking
21
+ end
22
+
14
23
  def close_descriptors
15
24
  [incoming, outgoing].reject(&:nil?).each do |descriptor|
16
25
  descriptor.close if descriptor.respond_to?(:close)
17
26
  end
18
27
  end
19
28
 
20
- # Override this for custom cleanup functionality.
21
- def finalize; end
22
-
23
29
  # Override this for custom output handling functionality per-row.
24
30
  def handle_output(transformed_row)
25
31
  outgoing.puts(transformed_row)
@@ -31,16 +37,17 @@ module Rodimus
31
37
  end
32
38
 
33
39
  def run
34
- Rodimus.logger.info "Running #{self}"
40
+ notify(self, :before_run)
35
41
  @row_count = 1
36
42
  incoming.each do |row|
43
+ notify(self, :before_row)
37
44
  transformed_row = process_row(row)
38
45
  handle_output(transformed_row)
39
46
  Rodimus.logger.info(self) { "#{@row_count} rows processed" } if @row_count % 50000 == 0
40
47
  @row_count += 1
48
+ notify(self, :after_row)
41
49
  end
42
- finalize
43
- Rodimus.logger.info "Finished #{self}"
50
+ notify(self, :after_run)
44
51
  ensure
45
52
  close_descriptors
46
53
  end
@@ -3,6 +3,10 @@ require 'drb'
3
3
  module Rodimus
4
4
 
5
5
  class Transformation
6
+ include Observable
7
+ include Observing # Transformations observe themselves for run hooks
8
+ include RuntimeLogging
9
+
6
10
  attr_reader :drb_server, :pids, :steps
7
11
 
8
12
  # User-data accessible across all running steps.
@@ -12,9 +16,11 @@ module Rodimus
12
16
  @steps = []
13
17
  @pids = []
14
18
  @shared_data = {} # TODO: This needs to be thread safe
19
+ observers << self
15
20
  end
16
21
 
17
22
  def run
23
+ notify(self, :before_run)
18
24
  @drb_server = DRb.start_service(nil, shared_data)
19
25
  pids.clear
20
26
  prepare
@@ -30,6 +36,7 @@ module Rodimus
30
36
  ensure
31
37
  Process.waitall
32
38
  drb_server.stop_service
39
+ notify(self, :after_run)
33
40
  end
34
41
 
35
42
  def to_s
@@ -39,7 +46,6 @@ module Rodimus
39
46
  private
40
47
 
41
48
  def prepare
42
- Rodimus.logger.info "Preparing #{self}..."
43
49
  # [1, 2, 3, 4] => [1, 2], [2, 3], [3, 4]
44
50
  steps.inject do |first, second|
45
51
  read, write = IO.pipe
@@ -1,3 +1,3 @@
1
1
  module Rodimus
2
- VERSION = "0.1.2"
2
+ VERSION = "1.0.0"
3
3
  end
@@ -0,0 +1,26 @@
1
+ require 'minitest/autorun'
2
+ require 'rodimus'
3
+
4
+ class TestObserving < MiniTest::Unit::TestCase
5
+ def setup
6
+ @observer = Class.new do
7
+ include Rodimus::Observing
8
+
9
+ attr_reader :called
10
+
11
+ def initialize
12
+ @called = false
13
+ end
14
+
15
+ def before_run_test
16
+ @called = true
17
+ end
18
+ end.new
19
+ end
20
+
21
+ def test_hook_discovery
22
+ refute @observer.called
23
+ @observer.on_notify(self, :before_run)
24
+ assert @observer.called
25
+ end
26
+ end
@@ -0,0 +1,44 @@
1
+ require 'minitest/autorun'
2
+ require 'rodimus'
3
+
4
+ Rodimus.configure do |config|
5
+ config.logger = Logger.new(nil)
6
+ end
7
+
8
+ class TestIO < IO # Because we can't read closed StringIOs
9
+ attr_reader :history
10
+
11
+ def initialize
12
+ @history = []
13
+ end
14
+
15
+ def close; nil; end
16
+
17
+ def puts(string)
18
+ history << string
19
+ end
20
+ end
21
+
22
+ class TestStep < MiniTest::Unit::TestCase
23
+ def setup
24
+ @test_string = "row 1\nrow 2"
25
+ @incoming = StringIO.new(@test_string)
26
+ @outgoing = TestIO.new
27
+ @step = Rodimus::Step.new
28
+ @step.incoming = @incoming
29
+ @step.outgoing = @outgoing
30
+ end
31
+
32
+ def test_streaming_rows
33
+ @step.run
34
+ assert_equal @test_string, @outgoing.history.join
35
+ end
36
+
37
+ def test_process_row
38
+ @step.define_singleton_method(:process_row) do |row|
39
+ row.upcase
40
+ end
41
+ @step.run
42
+ assert_equal @test_string.upcase, @outgoing.history.join
43
+ end
44
+ end
@@ -11,16 +11,13 @@ module Rodimus
11
11
  def test_forking_processes
12
12
  incoming = StringIO.new
13
13
  transformation = Transformation.new
14
- steps = []
15
14
  number_of_steps = 2 + rand(5)
16
- number_of_steps.times { steps << Object.new }
17
- steps.each do |step|
18
- step.extend(Rodimus::Step)
19
- transformation.steps << step
15
+ number_of_steps.times do
16
+ transformation.steps << Rodimus::Step.new
20
17
  end
21
- steps.first.incoming = incoming
18
+ transformation.steps.first.incoming = incoming
22
19
  transformation.run
23
- assert_equal(steps.count, transformation.pids.count)
20
+ assert_equal(transformation.steps.count, transformation.pids.count)
24
21
  end
25
22
  end
26
23
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rodimus
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brandon Rice
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-22 00:00:00.000000000 Z
11
+ date: 2014-07-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -55,15 +55,25 @@ files:
55
55
  - README.md
56
56
  - Rakefile
57
57
  - examples/csv_input_stdout.rb
58
+ - examples/rails_log/file_output.rb
59
+ - examples/rails_log/log_input.rb
60
+ - examples/rails_log/parse_connection.rb
61
+ - examples/rails_log/rails_example.log
62
+ - examples/rails_log/run.rb
58
63
  - examples/worldbank-sample.csv
59
64
  - lib/rodimus.rb
65
+ - lib/rodimus/benchmark.rb
60
66
  - lib/rodimus/configuration.rb
67
+ - lib/rodimus/observable.rb
68
+ - lib/rodimus/observing.rb
69
+ - lib/rodimus/runtime_logging.rb
61
70
  - lib/rodimus/step.rb
62
71
  - lib/rodimus/transformation.rb
63
72
  - lib/rodimus/version.rb
64
73
  - rodimus.gemspec
65
- - test/step_test.rb
66
- - test/transformation_test.rb
74
+ - test/rodimus/observing_test.rb
75
+ - test/rodimus/step_test.rb
76
+ - test/rodimus/transformation_test.rb
67
77
  homepage: https://github.com/nevern02/rodimus
68
78
  licenses:
69
79
  - MIT
@@ -90,5 +100,6 @@ specification_version: 4
90
100
  summary: An ETL (Extract-Transform-Load) library that uses a forking process model
91
101
  for concurrency.
92
102
  test_files:
93
- - test/step_test.rb
94
- - test/transformation_test.rb
103
+ - test/rodimus/observing_test.rb
104
+ - test/rodimus/step_test.rb
105
+ - test/rodimus/transformation_test.rb
@@ -1,48 +0,0 @@
1
- require 'minitest/autorun'
2
- require 'rodimus'
3
-
4
- module Rodimus
5
- Rodimus.configure do |config|
6
- config.logger = Logger.new(nil)
7
- end
8
-
9
- class TestIO < IO
10
- attr_reader :history
11
-
12
- def initialize
13
- @history = []
14
- end
15
-
16
- def close; nil; end
17
-
18
- def puts(string)
19
- history << string
20
- end
21
- end
22
-
23
- class TestStep < MiniTest::Unit::TestCase
24
- def setup
25
- @test_string = "row 1\nrow 2"
26
- @incoming = StringIO.new(@test_string)
27
- @outgoing = TestIO.new
28
- @step = Object.new
29
- @step.extend(Rodimus::Step)
30
- @step.incoming = @incoming
31
- @step.outgoing = @outgoing
32
- end
33
-
34
- def test_streaming_rows
35
- @step.run
36
- assert_equal @test_string, @outgoing.history.join
37
- end
38
-
39
- def test_process_row
40
- @step.define_singleton_method(:process_row) do |row|
41
- row.upcase
42
- end
43
- @step.run
44
- assert_equal @test_string.upcase, @outgoing.history.join
45
- end
46
- end
47
-
48
- end