rodimus 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/.travis.yml +4 -2
- data/README.md +1 -1
- data/Rakefile +7 -0
- data/lib/rodimus.rb +1 -0
- data/lib/rodimus/buffered_step.rb +34 -0
- data/lib/rodimus/simulation.rb +26 -0
- data/lib/rodimus/simulation/row_generator.rb +19 -0
- data/lib/rodimus/simulation/step1.rb +18 -0
- data/lib/rodimus/simulation/step2.rb +19 -0
- data/lib/rodimus/version.rb +1 -1
- data/rodimus.gemspec +5 -3
- data/test/rodimus/buffered_step_test.rb +33 -0
- data/test/rodimus/observable_test.rb +2 -3
- data/test/rodimus/observing_test.rb +2 -3
- data/test/rodimus/step_test.rb +2 -21
- data/test/rodimus/transformation_test.rb +2 -3
- data/test/test_helper.rb +20 -0
- metadata +47 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 90a48da418f9255e96fa21a4c77c4f08ad4b706e
|
4
|
+
data.tar.gz: 72e832c9f3dbba8f5cb969883c071153a339666f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 95bda7a98743242537fe2a55a7f2ec154c1e285b7069f0cc3618e80a5aac3a4df8dc356f4b968b91c7d69c5d79697c7469920895f8046b0885c65d8bc134316b
|
7
|
+
data.tar.gz: 0617781ea52018694d4c54c2aacdee034eb07b8cc6df98f232f620bb92f84a597633b8ac7baa0ed5f494ecd95aeb8579f4c6fe2bd99c5166d0b44a0e2691b4fb
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
ruby-2.
|
1
|
+
ruby-2.2.0
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -38,7 +38,7 @@ destination.
|
|
38
38
|
In Rodimus, you create a transformation object, and then you add
|
39
39
|
one or more steps to its array of steps. You typically create steps by writing
|
40
40
|
your own classes that inherit from Rodimus::Step. When the transformation is
|
41
|
-
subsequently run, a new process is forked for each step. On
|
41
|
+
subsequently run, a new process is forked for each step. On platforms that support
|
42
42
|
native threads (JRuby, Rubinius), threads are used instead of forking processes.
|
43
43
|
All processes are connected together using pipes except for the first and last
|
44
44
|
steps (those being the source and destination steps). Each step then consumes
|
data/Rakefile
CHANGED
@@ -8,7 +8,14 @@ task :default => :test
|
|
8
8
|
|
9
9
|
Rake::TestTask.new do |t|
|
10
10
|
t.libs.push 'lib'
|
11
|
+
t.libs.push 'test'
|
11
12
|
t.test_files = FileList['test/**/*_test.rb']
|
12
13
|
t.verbose = true
|
13
14
|
end
|
14
15
|
|
16
|
+
desc 'Run a simulation on a large data set'
|
17
|
+
task :simulate, :rows do |t, args|
|
18
|
+
require 'rodimus/simulation'
|
19
|
+
rows = (args[:rows] || 50_000).to_i
|
20
|
+
Rodimus::Simulation.run(rows)
|
21
|
+
end
|
data/lib/rodimus.rb
CHANGED
@@ -0,0 +1,34 @@
|
|
1
|
+
module Rodimus
|
2
|
+
|
3
|
+
class BufferedStep < Step
|
4
|
+
# The maximum size of the buffer
|
5
|
+
attr_accessor :buffer_size
|
6
|
+
attr_reader :buffer
|
7
|
+
|
8
|
+
def initialize(buffer_size = 100)
|
9
|
+
super()
|
10
|
+
@buffer_size = buffer_size
|
11
|
+
@buffer = []
|
12
|
+
end
|
13
|
+
|
14
|
+
def close_descriptors # override
|
15
|
+
flush if buffer.any?
|
16
|
+
super
|
17
|
+
end
|
18
|
+
|
19
|
+
def handle_output(transformed_row) # override
|
20
|
+
buffer << transformed_row
|
21
|
+
|
22
|
+
if buffer.length >= buffer_size
|
23
|
+
flush
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Flush the contents of the buffer to the outgoing data stream
|
28
|
+
def flush
|
29
|
+
outgoing.puts(buffer.join("\n"))
|
30
|
+
@buffer = []
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'rodimus/simulation/row_generator'
|
2
|
+
require 'rodimus/simulation/step1'
|
3
|
+
require 'rodimus/simulation/step2'
|
4
|
+
|
5
|
+
|
6
|
+
module Rodimus
|
7
|
+
module Simulation
|
8
|
+
def self.run(rows = 50_000)
|
9
|
+
Rodimus.configure do |config|
|
10
|
+
config.benchmarking = true
|
11
|
+
end
|
12
|
+
|
13
|
+
transformation = Transformation.new
|
14
|
+
Rodimus.logger.info "Generating data."
|
15
|
+
generator = RowGenerator.new(rows)
|
16
|
+
step1 = Step1.new(generator)
|
17
|
+
step2 = Step2.new
|
18
|
+
|
19
|
+
transformation.steps << step1
|
20
|
+
transformation.steps << step2
|
21
|
+
|
22
|
+
Rodimus.logger.info "Running transformation."
|
23
|
+
transformation.run
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'faker'
|
2
|
+
require 'forwardable'
|
3
|
+
|
4
|
+
module Rodimus
|
5
|
+
module Simulation
|
6
|
+
|
7
|
+
class RowGenerator
|
8
|
+
extend Forwardable
|
9
|
+
|
10
|
+
def_delegator :@rows, :each
|
11
|
+
|
12
|
+
def initialize(count = 50_000)
|
13
|
+
@count = count
|
14
|
+
@rows = count.times.map { |i| Faker::Lorem.sentence }
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module Rodimus
|
4
|
+
module Simulation
|
5
|
+
|
6
|
+
class Step2 < Rodimus::Step
|
7
|
+
def before_run_set_output
|
8
|
+
fd = IO.sysopen('/dev/null', 'w')
|
9
|
+
@outgoing = IO.new(fd, 'w')
|
10
|
+
end
|
11
|
+
|
12
|
+
def process_row(row)
|
13
|
+
row = JSON.parse(row)
|
14
|
+
row.map { |i| i.split('').sort.join('') }
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
data/lib/rodimus/version.rb
CHANGED
data/rodimus.gemspec
CHANGED
@@ -12,13 +12,15 @@ Gem::Specification.new do |spec|
|
|
12
12
|
spec.description = "An ETL (Extract-Transform-Load) library that uses a forking process model for concurrency."
|
13
13
|
spec.homepage = "https://github.com/nevern02/rodimus"
|
14
14
|
spec.license = "MIT"
|
15
|
-
spec.required_ruby_version = ">= 1.9.
|
15
|
+
spec.required_ruby_version = ">= 1.9.3"
|
16
16
|
|
17
17
|
spec.files = `git ls-files -z`.split("\x0")
|
18
18
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
19
19
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
20
|
spec.require_paths = ["lib"]
|
21
21
|
|
22
|
-
spec.add_development_dependency "bundler", "~> 1.
|
23
|
-
spec.add_development_dependency "rake"
|
22
|
+
spec.add_development_dependency "bundler", "~> 1.8"
|
23
|
+
spec.add_development_dependency "rake", "~> 10.4"
|
24
|
+
spec.add_development_dependency "faker", "~> 1.4"
|
25
|
+
spec.add_development_dependency "minitest", "~> 5.4"
|
24
26
|
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class TestBufferedStep < MiniTest::Test
|
4
|
+
attr_reader :incoming, :outgoing, :step
|
5
|
+
|
6
|
+
def setup
|
7
|
+
rows = "row 1\nrow 2\nrow 3"
|
8
|
+
@incoming = StringIO.new(rows)
|
9
|
+
@outgoing = TestIO.new
|
10
|
+
@step = Rodimus::BufferedStep.new
|
11
|
+
|
12
|
+
step.incoming = incoming
|
13
|
+
step.outgoing = outgoing
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_buffering_rows
|
17
|
+
step.buffer_size = 3
|
18
|
+
expected_write_count = 1 # writing 3 rows
|
19
|
+
|
20
|
+
step.run
|
21
|
+
|
22
|
+
assert_equal expected_write_count, outgoing.history.length
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_flushing_rows
|
26
|
+
step.buffer_size = 2
|
27
|
+
expected_write_count = 2
|
28
|
+
|
29
|
+
step.run
|
30
|
+
|
31
|
+
assert_equal expected_write_count, outgoing.history.length
|
32
|
+
end
|
33
|
+
end
|
data/test/rodimus/step_test.rb
CHANGED
@@ -1,25 +1,6 @@
|
|
1
|
-
require '
|
2
|
-
require 'rodimus'
|
1
|
+
require 'test_helper'
|
3
2
|
|
4
|
-
|
5
|
-
config.logger = Logger.new(nil)
|
6
|
-
end
|
7
|
-
|
8
|
-
class TestIO < IO # Because we can't read closed StringIOs
|
9
|
-
attr_reader :history
|
10
|
-
|
11
|
-
def initialize
|
12
|
-
@history = []
|
13
|
-
end
|
14
|
-
|
15
|
-
def close; nil; end
|
16
|
-
|
17
|
-
def puts(string)
|
18
|
-
history << string
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
class TestStep < MiniTest::Unit::TestCase
|
3
|
+
class TestStep < MiniTest::Test
|
23
4
|
def setup
|
24
5
|
@test_string = "row 1\nrow 2"
|
25
6
|
@incoming = StringIO.new(@test_string)
|
@@ -1,9 +1,8 @@
|
|
1
|
-
require '
|
2
|
-
require 'rodimus'
|
1
|
+
require 'test_helper'
|
3
2
|
|
4
3
|
module Rodimus
|
5
4
|
|
6
|
-
class TestTransformation < MiniTest::
|
5
|
+
class TestTransformation < MiniTest::Test
|
7
6
|
Rodimus.configure do |config|
|
8
7
|
config.logger = Logger.new(nil)
|
9
8
|
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'rodimus'
|
3
|
+
|
4
|
+
Rodimus.configure do |config|
|
5
|
+
config.logger = Logger.new(nil)
|
6
|
+
end
|
7
|
+
|
8
|
+
class TestIO < IO # Because we can't read closed StringIOs
|
9
|
+
attr_reader :history
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@history = []
|
13
|
+
end
|
14
|
+
|
15
|
+
def close; nil; end
|
16
|
+
|
17
|
+
def puts(string)
|
18
|
+
history << string
|
19
|
+
end
|
20
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rodimus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brandon Rice
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-03-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -16,28 +16,56 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.8'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
26
|
+
version: '1.8'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.4'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.4'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: faker
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
32
46
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
47
|
+
version: '1.4'
|
34
48
|
type: :development
|
35
49
|
prerelease: false
|
36
50
|
version_requirements: !ruby/object:Gem::Requirement
|
37
51
|
requirements:
|
38
|
-
- - "
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.4'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '5.4'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
39
67
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
68
|
+
version: '5.4'
|
41
69
|
description: An ETL (Extract-Transform-Load) library that uses a forking process model
|
42
70
|
for concurrency.
|
43
71
|
email:
|
@@ -63,18 +91,25 @@ files:
|
|
63
91
|
- examples/worldbank-sample.csv
|
64
92
|
- lib/rodimus.rb
|
65
93
|
- lib/rodimus/benchmark.rb
|
94
|
+
- lib/rodimus/buffered_step.rb
|
66
95
|
- lib/rodimus/configuration.rb
|
67
96
|
- lib/rodimus/observable.rb
|
68
97
|
- lib/rodimus/observing.rb
|
69
98
|
- lib/rodimus/runtime_logging.rb
|
99
|
+
- lib/rodimus/simulation.rb
|
100
|
+
- lib/rodimus/simulation/row_generator.rb
|
101
|
+
- lib/rodimus/simulation/step1.rb
|
102
|
+
- lib/rodimus/simulation/step2.rb
|
70
103
|
- lib/rodimus/step.rb
|
71
104
|
- lib/rodimus/transformation.rb
|
72
105
|
- lib/rodimus/version.rb
|
73
106
|
- rodimus.gemspec
|
107
|
+
- test/rodimus/buffered_step_test.rb
|
74
108
|
- test/rodimus/observable_test.rb
|
75
109
|
- test/rodimus/observing_test.rb
|
76
110
|
- test/rodimus/step_test.rb
|
77
111
|
- test/rodimus/transformation_test.rb
|
112
|
+
- test/test_helper.rb
|
78
113
|
homepage: https://github.com/nevern02/rodimus
|
79
114
|
licenses:
|
80
115
|
- MIT
|
@@ -87,7 +122,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
87
122
|
requirements:
|
88
123
|
- - ">="
|
89
124
|
- !ruby/object:Gem::Version
|
90
|
-
version: 1.9.
|
125
|
+
version: 1.9.3
|
91
126
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
127
|
requirements:
|
93
128
|
- - ">="
|
@@ -95,13 +130,15 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
95
130
|
version: '0'
|
96
131
|
requirements: []
|
97
132
|
rubyforge_project:
|
98
|
-
rubygems_version: 2.
|
133
|
+
rubygems_version: 2.4.5
|
99
134
|
signing_key:
|
100
135
|
specification_version: 4
|
101
136
|
summary: An ETL (Extract-Transform-Load) library that uses a forking process model
|
102
137
|
for concurrency.
|
103
138
|
test_files:
|
139
|
+
- test/rodimus/buffered_step_test.rb
|
104
140
|
- test/rodimus/observable_test.rb
|
105
141
|
- test/rodimus/observing_test.rb
|
106
142
|
- test/rodimus/step_test.rb
|
107
143
|
- test/rodimus/transformation_test.rb
|
144
|
+
- test/test_helper.rb
|