rodimus 0.1.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/README.md +3 -0
- data/Rakefile +1 -1
- data/examples/csv_input_stdout.rb +8 -8
- data/examples/rails_log/file_output.rb +16 -0
- data/examples/rails_log/log_input.rb +8 -0
- data/examples/rails_log/parse_connection.rb +33 -0
- data/examples/rails_log/rails_example.log +6043 -0
- data/examples/rails_log/run.rb +16 -0
- data/lib/rodimus.rb +4 -0
- data/lib/rodimus/benchmark.rb +48 -0
- data/lib/rodimus/configuration.rb +2 -1
- data/lib/rodimus/observable.rb +17 -0
- data/lib/rodimus/observing.rb +17 -0
- data/lib/rodimus/runtime_logging.rb +22 -0
- data/lib/rodimus/step.rb +14 -7
- data/lib/rodimus/transformation.rb +7 -1
- data/lib/rodimus/version.rb +1 -1
- data/test/rodimus/observing_test.rb +26 -0
- data/test/rodimus/step_test.rb +44 -0
- data/test/{transformation_test.rb → rodimus/transformation_test.rb} +4 -7
- metadata +17 -6
- data/test/step_test.rb +0 -48
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'rodimus'
|
2
|
+
require_relative 'log_input'
|
3
|
+
require_relative 'parse_connection'
|
4
|
+
require_relative 'file_output'
|
5
|
+
|
6
|
+
log = File.expand_path('../rails_example.log', __FILE__)
|
7
|
+
t = Rodimus::Transformation.new
|
8
|
+
step1 = LogInput.new(log)
|
9
|
+
step2 = ParseConnection.new
|
10
|
+
step3 = FileOutput.new
|
11
|
+
t.steps << step1
|
12
|
+
t.steps << step2
|
13
|
+
t.steps << step3
|
14
|
+
t.run
|
15
|
+
|
16
|
+
puts "Transformation complete!"
|
data/lib/rodimus.rb
CHANGED
@@ -0,0 +1,48 @@
|
|
1
|
+
module Rodimus
|
2
|
+
|
3
|
+
class Benchmark
|
4
|
+
include Observing
|
5
|
+
|
6
|
+
attr_reader :stats
|
7
|
+
|
8
|
+
def on_notify(subject, event_type)
|
9
|
+
case event_type
|
10
|
+
when :before_run
|
11
|
+
initialize_stats
|
12
|
+
when :after_run
|
13
|
+
finalize_stats(subject)
|
14
|
+
when :before_row
|
15
|
+
before_row
|
16
|
+
when :after_row
|
17
|
+
after_row
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def after_row
|
24
|
+
row_run_time = (Time.now.to_f - @start_time).round(4)
|
25
|
+
stats[:total] = (stats[:total] + row_run_time).round(4)
|
26
|
+
stats[:min] = row_run_time if stats[:min] > row_run_time
|
27
|
+
stats[:max] = row_run_time if stats[:max] < row_run_time
|
28
|
+
end
|
29
|
+
|
30
|
+
def before_row
|
31
|
+
stats[:count] += 1
|
32
|
+
@start_time = Time.now.to_f
|
33
|
+
end
|
34
|
+
|
35
|
+
def finalize_stats(subject)
|
36
|
+
if stats[:count] > 0
|
37
|
+
stats[:average] = (stats[:total] / stats[:count]).round(4)
|
38
|
+
end
|
39
|
+
|
40
|
+
Rodimus.logger.info "#{subject} benchmarks: #{stats}"
|
41
|
+
end
|
42
|
+
|
43
|
+
def initialize_stats
|
44
|
+
@stats = {count: 0, total: 0, min: 1, max: 0, average: 0}
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Rodimus
|
2
|
+
|
3
|
+
module Observing
|
4
|
+
def on_notify(subject, event_type)
|
5
|
+
discovered_hooks(event_type).each do |hook|
|
6
|
+
self.send(hook)
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def discovered_hooks(matcher)
|
13
|
+
methods.grep(/^#{matcher}/)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Rodimus
|
2
|
+
|
3
|
+
module RuntimeLogging
|
4
|
+
attr_reader :start_time
|
5
|
+
|
6
|
+
def before_run_record_time
|
7
|
+
@start_time = Time.now.to_i
|
8
|
+
Rodimus.logger.info "Running #{self}"
|
9
|
+
end
|
10
|
+
|
11
|
+
def after_run_record_time
|
12
|
+
run_time = Time.now.to_i - start_time
|
13
|
+
remaining_seconds = run_time % 3600
|
14
|
+
elapsed_hours = run_time / 3600
|
15
|
+
elapsed_minuntes = remaining_seconds / 60
|
16
|
+
elapsed_seconds = remaining_seconds % 60
|
17
|
+
|
18
|
+
Rodimus.logger.info "Finished #{self} after #{elapsed_hours} hours, #{elapsed_minuntes} minutes, #{elapsed_seconds} seconds."
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
data/lib/rodimus/step.rb
CHANGED
@@ -1,6 +1,10 @@
|
|
1
1
|
module Rodimus
|
2
2
|
|
3
|
-
|
3
|
+
class Step
|
4
|
+
include Observable
|
5
|
+
include Observing # Steps observe themselves for run hooks
|
6
|
+
include RuntimeLogging
|
7
|
+
|
4
8
|
# The incoming data stream. Can be anything that quacks like an IO
|
5
9
|
attr_accessor :incoming
|
6
10
|
|
@@ -11,15 +15,17 @@ module Rodimus
|
|
11
15
|
# This is initialized by the Transformation when the step begins to run.
|
12
16
|
attr_accessor :shared_data
|
13
17
|
|
18
|
+
def initialize
|
19
|
+
observers << self
|
20
|
+
observers << Benchmark.new if Rodimus.configuration.benchmarking
|
21
|
+
end
|
22
|
+
|
14
23
|
def close_descriptors
|
15
24
|
[incoming, outgoing].reject(&:nil?).each do |descriptor|
|
16
25
|
descriptor.close if descriptor.respond_to?(:close)
|
17
26
|
end
|
18
27
|
end
|
19
28
|
|
20
|
-
# Override this for custom cleanup functionality.
|
21
|
-
def finalize; end
|
22
|
-
|
23
29
|
# Override this for custom output handling functionality per-row.
|
24
30
|
def handle_output(transformed_row)
|
25
31
|
outgoing.puts(transformed_row)
|
@@ -31,16 +37,17 @@ module Rodimus
|
|
31
37
|
end
|
32
38
|
|
33
39
|
def run
|
34
|
-
|
40
|
+
notify(self, :before_run)
|
35
41
|
@row_count = 1
|
36
42
|
incoming.each do |row|
|
43
|
+
notify(self, :before_row)
|
37
44
|
transformed_row = process_row(row)
|
38
45
|
handle_output(transformed_row)
|
39
46
|
Rodimus.logger.info(self) { "#{@row_count} rows processed" } if @row_count % 50000 == 0
|
40
47
|
@row_count += 1
|
48
|
+
notify(self, :after_row)
|
41
49
|
end
|
42
|
-
|
43
|
-
Rodimus.logger.info "Finished #{self}"
|
50
|
+
notify(self, :after_run)
|
44
51
|
ensure
|
45
52
|
close_descriptors
|
46
53
|
end
|
@@ -3,6 +3,10 @@ require 'drb'
|
|
3
3
|
module Rodimus
|
4
4
|
|
5
5
|
class Transformation
|
6
|
+
include Observable
|
7
|
+
include Observing # Transformations observe themselves for run hooks
|
8
|
+
include RuntimeLogging
|
9
|
+
|
6
10
|
attr_reader :drb_server, :pids, :steps
|
7
11
|
|
8
12
|
# User-data accessible across all running steps.
|
@@ -12,9 +16,11 @@ module Rodimus
|
|
12
16
|
@steps = []
|
13
17
|
@pids = []
|
14
18
|
@shared_data = {} # TODO: This needs to be thread safe
|
19
|
+
observers << self
|
15
20
|
end
|
16
21
|
|
17
22
|
def run
|
23
|
+
notify(self, :before_run)
|
18
24
|
@drb_server = DRb.start_service(nil, shared_data)
|
19
25
|
pids.clear
|
20
26
|
prepare
|
@@ -30,6 +36,7 @@ module Rodimus
|
|
30
36
|
ensure
|
31
37
|
Process.waitall
|
32
38
|
drb_server.stop_service
|
39
|
+
notify(self, :after_run)
|
33
40
|
end
|
34
41
|
|
35
42
|
def to_s
|
@@ -39,7 +46,6 @@ module Rodimus
|
|
39
46
|
private
|
40
47
|
|
41
48
|
def prepare
|
42
|
-
Rodimus.logger.info "Preparing #{self}..."
|
43
49
|
# [1, 2, 3, 4] => [1, 2], [2, 3], [3, 4]
|
44
50
|
steps.inject do |first, second|
|
45
51
|
read, write = IO.pipe
|
data/lib/rodimus/version.rb
CHANGED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'rodimus'
|
3
|
+
|
4
|
+
class TestObserving < MiniTest::Unit::TestCase
|
5
|
+
def setup
|
6
|
+
@observer = Class.new do
|
7
|
+
include Rodimus::Observing
|
8
|
+
|
9
|
+
attr_reader :called
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@called = false
|
13
|
+
end
|
14
|
+
|
15
|
+
def before_run_test
|
16
|
+
@called = true
|
17
|
+
end
|
18
|
+
end.new
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_hook_discovery
|
22
|
+
refute @observer.called
|
23
|
+
@observer.on_notify(self, :before_run)
|
24
|
+
assert @observer.called
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'rodimus'
|
3
|
+
|
4
|
+
Rodimus.configure do |config|
|
5
|
+
config.logger = Logger.new(nil)
|
6
|
+
end
|
7
|
+
|
8
|
+
class TestIO < IO # Because we can't read closed StringIOs
|
9
|
+
attr_reader :history
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@history = []
|
13
|
+
end
|
14
|
+
|
15
|
+
def close; nil; end
|
16
|
+
|
17
|
+
def puts(string)
|
18
|
+
history << string
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class TestStep < MiniTest::Unit::TestCase
|
23
|
+
def setup
|
24
|
+
@test_string = "row 1\nrow 2"
|
25
|
+
@incoming = StringIO.new(@test_string)
|
26
|
+
@outgoing = TestIO.new
|
27
|
+
@step = Rodimus::Step.new
|
28
|
+
@step.incoming = @incoming
|
29
|
+
@step.outgoing = @outgoing
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_streaming_rows
|
33
|
+
@step.run
|
34
|
+
assert_equal @test_string, @outgoing.history.join
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_process_row
|
38
|
+
@step.define_singleton_method(:process_row) do |row|
|
39
|
+
row.upcase
|
40
|
+
end
|
41
|
+
@step.run
|
42
|
+
assert_equal @test_string.upcase, @outgoing.history.join
|
43
|
+
end
|
44
|
+
end
|
@@ -11,16 +11,13 @@ module Rodimus
|
|
11
11
|
def test_forking_processes
|
12
12
|
incoming = StringIO.new
|
13
13
|
transformation = Transformation.new
|
14
|
-
steps = []
|
15
14
|
number_of_steps = 2 + rand(5)
|
16
|
-
number_of_steps.times
|
17
|
-
|
18
|
-
step.extend(Rodimus::Step)
|
19
|
-
transformation.steps << step
|
15
|
+
number_of_steps.times do
|
16
|
+
transformation.steps << Rodimus::Step.new
|
20
17
|
end
|
21
|
-
steps.first.incoming = incoming
|
18
|
+
transformation.steps.first.incoming = incoming
|
22
19
|
transformation.run
|
23
|
-
assert_equal(steps.count, transformation.pids.count)
|
20
|
+
assert_equal(transformation.steps.count, transformation.pids.count)
|
24
21
|
end
|
25
22
|
end
|
26
23
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rodimus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brandon Rice
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-07-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -55,15 +55,25 @@ files:
|
|
55
55
|
- README.md
|
56
56
|
- Rakefile
|
57
57
|
- examples/csv_input_stdout.rb
|
58
|
+
- examples/rails_log/file_output.rb
|
59
|
+
- examples/rails_log/log_input.rb
|
60
|
+
- examples/rails_log/parse_connection.rb
|
61
|
+
- examples/rails_log/rails_example.log
|
62
|
+
- examples/rails_log/run.rb
|
58
63
|
- examples/worldbank-sample.csv
|
59
64
|
- lib/rodimus.rb
|
65
|
+
- lib/rodimus/benchmark.rb
|
60
66
|
- lib/rodimus/configuration.rb
|
67
|
+
- lib/rodimus/observable.rb
|
68
|
+
- lib/rodimus/observing.rb
|
69
|
+
- lib/rodimus/runtime_logging.rb
|
61
70
|
- lib/rodimus/step.rb
|
62
71
|
- lib/rodimus/transformation.rb
|
63
72
|
- lib/rodimus/version.rb
|
64
73
|
- rodimus.gemspec
|
65
|
-
- test/
|
66
|
-
- test/
|
74
|
+
- test/rodimus/observing_test.rb
|
75
|
+
- test/rodimus/step_test.rb
|
76
|
+
- test/rodimus/transformation_test.rb
|
67
77
|
homepage: https://github.com/nevern02/rodimus
|
68
78
|
licenses:
|
69
79
|
- MIT
|
@@ -90,5 +100,6 @@ specification_version: 4
|
|
90
100
|
summary: An ETL (Extract-Transform-Load) library that uses a forking process model
|
91
101
|
for concurrency.
|
92
102
|
test_files:
|
93
|
-
- test/
|
94
|
-
- test/
|
103
|
+
- test/rodimus/observing_test.rb
|
104
|
+
- test/rodimus/step_test.rb
|
105
|
+
- test/rodimus/transformation_test.rb
|
data/test/step_test.rb
DELETED
@@ -1,48 +0,0 @@
|
|
1
|
-
require 'minitest/autorun'
|
2
|
-
require 'rodimus'
|
3
|
-
|
4
|
-
module Rodimus
|
5
|
-
Rodimus.configure do |config|
|
6
|
-
config.logger = Logger.new(nil)
|
7
|
-
end
|
8
|
-
|
9
|
-
class TestIO < IO
|
10
|
-
attr_reader :history
|
11
|
-
|
12
|
-
def initialize
|
13
|
-
@history = []
|
14
|
-
end
|
15
|
-
|
16
|
-
def close; nil; end
|
17
|
-
|
18
|
-
def puts(string)
|
19
|
-
history << string
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
class TestStep < MiniTest::Unit::TestCase
|
24
|
-
def setup
|
25
|
-
@test_string = "row 1\nrow 2"
|
26
|
-
@incoming = StringIO.new(@test_string)
|
27
|
-
@outgoing = TestIO.new
|
28
|
-
@step = Object.new
|
29
|
-
@step.extend(Rodimus::Step)
|
30
|
-
@step.incoming = @incoming
|
31
|
-
@step.outgoing = @outgoing
|
32
|
-
end
|
33
|
-
|
34
|
-
def test_streaming_rows
|
35
|
-
@step.run
|
36
|
-
assert_equal @test_string, @outgoing.history.join
|
37
|
-
end
|
38
|
-
|
39
|
-
def test_process_row
|
40
|
-
@step.define_singleton_method(:process_row) do |row|
|
41
|
-
row.upcase
|
42
|
-
end
|
43
|
-
@step.run
|
44
|
-
assert_equal @test_string.upcase, @outgoing.history.join
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
end
|