rodimus 1.2.0 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/.travis.yml +4 -2
- data/README.md +1 -1
- data/Rakefile +7 -0
- data/lib/rodimus.rb +1 -0
- data/lib/rodimus/buffered_step.rb +34 -0
- data/lib/rodimus/simulation.rb +26 -0
- data/lib/rodimus/simulation/row_generator.rb +19 -0
- data/lib/rodimus/simulation/step1.rb +18 -0
- data/lib/rodimus/simulation/step2.rb +19 -0
- data/lib/rodimus/version.rb +1 -1
- data/rodimus.gemspec +5 -3
- data/test/rodimus/buffered_step_test.rb +33 -0
- data/test/rodimus/observable_test.rb +2 -3
- data/test/rodimus/observing_test.rb +2 -3
- data/test/rodimus/step_test.rb +2 -21
- data/test/rodimus/transformation_test.rb +2 -3
- data/test/test_helper.rb +20 -0
- metadata +47 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 90a48da418f9255e96fa21a4c77c4f08ad4b706e
|
4
|
+
data.tar.gz: 72e832c9f3dbba8f5cb969883c071153a339666f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 95bda7a98743242537fe2a55a7f2ec154c1e285b7069f0cc3618e80a5aac3a4df8dc356f4b968b91c7d69c5d79697c7469920895f8046b0885c65d8bc134316b
|
7
|
+
data.tar.gz: 0617781ea52018694d4c54c2aacdee034eb07b8cc6df98f232f620bb92f84a597633b8ac7baa0ed5f494ecd95aeb8579f4c6fe2bd99c5166d0b44a0e2691b4fb
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
ruby-2.
|
1
|
+
ruby-2.2.0
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -38,7 +38,7 @@ destination.
|
|
38
38
|
In Rodimus, you create a transformation object, and then you add
|
39
39
|
one or more steps to its array of steps. You typically create steps by writing
|
40
40
|
your own classes that inherit from Rodimus::Step. When the transformation is
|
41
|
-
subsequently run, a new process is forked for each step. On
|
41
|
+
subsequently run, a new process is forked for each step. On platforms that support
|
42
42
|
native threads (JRuby, Rubinius), threads are used instead of forking processes.
|
43
43
|
All processes are connected together using pipes except for the first and last
|
44
44
|
steps (those being the source and destination steps). Each step then consumes
|
data/Rakefile
CHANGED
@@ -8,7 +8,14 @@ task :default => :test
|
|
8
8
|
|
9
9
|
Rake::TestTask.new do |t|
|
10
10
|
t.libs.push 'lib'
|
11
|
+
t.libs.push 'test'
|
11
12
|
t.test_files = FileList['test/**/*_test.rb']
|
12
13
|
t.verbose = true
|
13
14
|
end
|
14
15
|
|
16
|
+
desc 'Run a simulation on a large data set'
|
17
|
+
task :simulate, :rows do |t, args|
|
18
|
+
require 'rodimus/simulation'
|
19
|
+
rows = (args[:rows] || 50_000).to_i
|
20
|
+
Rodimus::Simulation.run(rows)
|
21
|
+
end
|
data/lib/rodimus.rb
CHANGED
@@ -0,0 +1,34 @@
|
|
1
|
+
module Rodimus
|
2
|
+
|
3
|
+
class BufferedStep < Step
|
4
|
+
# The maximum size of the buffer
|
5
|
+
attr_accessor :buffer_size
|
6
|
+
attr_reader :buffer
|
7
|
+
|
8
|
+
def initialize(buffer_size = 100)
|
9
|
+
super()
|
10
|
+
@buffer_size = buffer_size
|
11
|
+
@buffer = []
|
12
|
+
end
|
13
|
+
|
14
|
+
def close_descriptors # override
|
15
|
+
flush if buffer.any?
|
16
|
+
super
|
17
|
+
end
|
18
|
+
|
19
|
+
def handle_output(transformed_row) # override
|
20
|
+
buffer << transformed_row
|
21
|
+
|
22
|
+
if buffer.length >= buffer_size
|
23
|
+
flush
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Flush the contents of the buffer to the outgoing data stream
|
28
|
+
def flush
|
29
|
+
outgoing.puts(buffer.join("\n"))
|
30
|
+
@buffer = []
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'rodimus/simulation/row_generator'
|
2
|
+
require 'rodimus/simulation/step1'
|
3
|
+
require 'rodimus/simulation/step2'
|
4
|
+
|
5
|
+
|
6
|
+
module Rodimus
|
7
|
+
module Simulation
|
8
|
+
def self.run(rows = 50_000)
|
9
|
+
Rodimus.configure do |config|
|
10
|
+
config.benchmarking = true
|
11
|
+
end
|
12
|
+
|
13
|
+
transformation = Transformation.new
|
14
|
+
Rodimus.logger.info "Generating data."
|
15
|
+
generator = RowGenerator.new(rows)
|
16
|
+
step1 = Step1.new(generator)
|
17
|
+
step2 = Step2.new
|
18
|
+
|
19
|
+
transformation.steps << step1
|
20
|
+
transformation.steps << step2
|
21
|
+
|
22
|
+
Rodimus.logger.info "Running transformation."
|
23
|
+
transformation.run
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'faker'
|
2
|
+
require 'forwardable'
|
3
|
+
|
4
|
+
module Rodimus
|
5
|
+
module Simulation
|
6
|
+
|
7
|
+
class RowGenerator
|
8
|
+
extend Forwardable
|
9
|
+
|
10
|
+
def_delegator :@rows, :each
|
11
|
+
|
12
|
+
def initialize(count = 50_000)
|
13
|
+
@count = count
|
14
|
+
@rows = count.times.map { |i| Faker::Lorem.sentence }
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module Rodimus
|
4
|
+
module Simulation
|
5
|
+
|
6
|
+
class Step2 < Rodimus::Step
|
7
|
+
def before_run_set_output
|
8
|
+
fd = IO.sysopen('/dev/null', 'w')
|
9
|
+
@outgoing = IO.new(fd, 'w')
|
10
|
+
end
|
11
|
+
|
12
|
+
def process_row(row)
|
13
|
+
row = JSON.parse(row)
|
14
|
+
row.map { |i| i.split('').sort.join('') }
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
data/lib/rodimus/version.rb
CHANGED
data/rodimus.gemspec
CHANGED
@@ -12,13 +12,15 @@ Gem::Specification.new do |spec|
|
|
12
12
|
spec.description = "An ETL (Extract-Transform-Load) library that uses a forking process model for concurrency."
|
13
13
|
spec.homepage = "https://github.com/nevern02/rodimus"
|
14
14
|
spec.license = "MIT"
|
15
|
-
spec.required_ruby_version = ">= 1.9.
|
15
|
+
spec.required_ruby_version = ">= 1.9.3"
|
16
16
|
|
17
17
|
spec.files = `git ls-files -z`.split("\x0")
|
18
18
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
19
19
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
20
|
spec.require_paths = ["lib"]
|
21
21
|
|
22
|
-
spec.add_development_dependency "bundler", "~> 1.
|
23
|
-
spec.add_development_dependency "rake"
|
22
|
+
spec.add_development_dependency "bundler", "~> 1.8"
|
23
|
+
spec.add_development_dependency "rake", "~> 10.4"
|
24
|
+
spec.add_development_dependency "faker", "~> 1.4"
|
25
|
+
spec.add_development_dependency "minitest", "~> 5.4"
|
24
26
|
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class TestBufferedStep < MiniTest::Test
|
4
|
+
attr_reader :incoming, :outgoing, :step
|
5
|
+
|
6
|
+
def setup
|
7
|
+
rows = "row 1\nrow 2\nrow 3"
|
8
|
+
@incoming = StringIO.new(rows)
|
9
|
+
@outgoing = TestIO.new
|
10
|
+
@step = Rodimus::BufferedStep.new
|
11
|
+
|
12
|
+
step.incoming = incoming
|
13
|
+
step.outgoing = outgoing
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_buffering_rows
|
17
|
+
step.buffer_size = 3
|
18
|
+
expected_write_count = 1 # writing 3 rows
|
19
|
+
|
20
|
+
step.run
|
21
|
+
|
22
|
+
assert_equal expected_write_count, outgoing.history.length
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_flushing_rows
|
26
|
+
step.buffer_size = 2
|
27
|
+
expected_write_count = 2
|
28
|
+
|
29
|
+
step.run
|
30
|
+
|
31
|
+
assert_equal expected_write_count, outgoing.history.length
|
32
|
+
end
|
33
|
+
end
|
data/test/rodimus/step_test.rb
CHANGED
@@ -1,25 +1,6 @@
|
|
1
|
-
require '
|
2
|
-
require 'rodimus'
|
1
|
+
require 'test_helper'
|
3
2
|
|
4
|
-
|
5
|
-
config.logger = Logger.new(nil)
|
6
|
-
end
|
7
|
-
|
8
|
-
class TestIO < IO # Because we can't read closed StringIOs
|
9
|
-
attr_reader :history
|
10
|
-
|
11
|
-
def initialize
|
12
|
-
@history = []
|
13
|
-
end
|
14
|
-
|
15
|
-
def close; nil; end
|
16
|
-
|
17
|
-
def puts(string)
|
18
|
-
history << string
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
class TestStep < MiniTest::Unit::TestCase
|
3
|
+
class TestStep < MiniTest::Test
|
23
4
|
def setup
|
24
5
|
@test_string = "row 1\nrow 2"
|
25
6
|
@incoming = StringIO.new(@test_string)
|
@@ -1,9 +1,8 @@
|
|
1
|
-
require '
|
2
|
-
require 'rodimus'
|
1
|
+
require 'test_helper'
|
3
2
|
|
4
3
|
module Rodimus
|
5
4
|
|
6
|
-
class TestTransformation < MiniTest::
|
5
|
+
class TestTransformation < MiniTest::Test
|
7
6
|
Rodimus.configure do |config|
|
8
7
|
config.logger = Logger.new(nil)
|
9
8
|
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'rodimus'
|
3
|
+
|
4
|
+
Rodimus.configure do |config|
|
5
|
+
config.logger = Logger.new(nil)
|
6
|
+
end
|
7
|
+
|
8
|
+
class TestIO < IO # Because we can't read closed StringIOs
|
9
|
+
attr_reader :history
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@history = []
|
13
|
+
end
|
14
|
+
|
15
|
+
def close; nil; end
|
16
|
+
|
17
|
+
def puts(string)
|
18
|
+
history << string
|
19
|
+
end
|
20
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rodimus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brandon Rice
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-03-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -16,28 +16,56 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.8'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
26
|
+
version: '1.8'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.4'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.4'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: faker
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
32
46
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
47
|
+
version: '1.4'
|
34
48
|
type: :development
|
35
49
|
prerelease: false
|
36
50
|
version_requirements: !ruby/object:Gem::Requirement
|
37
51
|
requirements:
|
38
|
-
- - "
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.4'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '5.4'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
39
67
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
68
|
+
version: '5.4'
|
41
69
|
description: An ETL (Extract-Transform-Load) library that uses a forking process model
|
42
70
|
for concurrency.
|
43
71
|
email:
|
@@ -63,18 +91,25 @@ files:
|
|
63
91
|
- examples/worldbank-sample.csv
|
64
92
|
- lib/rodimus.rb
|
65
93
|
- lib/rodimus/benchmark.rb
|
94
|
+
- lib/rodimus/buffered_step.rb
|
66
95
|
- lib/rodimus/configuration.rb
|
67
96
|
- lib/rodimus/observable.rb
|
68
97
|
- lib/rodimus/observing.rb
|
69
98
|
- lib/rodimus/runtime_logging.rb
|
99
|
+
- lib/rodimus/simulation.rb
|
100
|
+
- lib/rodimus/simulation/row_generator.rb
|
101
|
+
- lib/rodimus/simulation/step1.rb
|
102
|
+
- lib/rodimus/simulation/step2.rb
|
70
103
|
- lib/rodimus/step.rb
|
71
104
|
- lib/rodimus/transformation.rb
|
72
105
|
- lib/rodimus/version.rb
|
73
106
|
- rodimus.gemspec
|
107
|
+
- test/rodimus/buffered_step_test.rb
|
74
108
|
- test/rodimus/observable_test.rb
|
75
109
|
- test/rodimus/observing_test.rb
|
76
110
|
- test/rodimus/step_test.rb
|
77
111
|
- test/rodimus/transformation_test.rb
|
112
|
+
- test/test_helper.rb
|
78
113
|
homepage: https://github.com/nevern02/rodimus
|
79
114
|
licenses:
|
80
115
|
- MIT
|
@@ -87,7 +122,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
87
122
|
requirements:
|
88
123
|
- - ">="
|
89
124
|
- !ruby/object:Gem::Version
|
90
|
-
version: 1.9.
|
125
|
+
version: 1.9.3
|
91
126
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
127
|
requirements:
|
93
128
|
- - ">="
|
@@ -95,13 +130,15 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
95
130
|
version: '0'
|
96
131
|
requirements: []
|
97
132
|
rubyforge_project:
|
98
|
-
rubygems_version: 2.
|
133
|
+
rubygems_version: 2.4.5
|
99
134
|
signing_key:
|
100
135
|
specification_version: 4
|
101
136
|
summary: An ETL (Extract-Transform-Load) library that uses a forking process model
|
102
137
|
for concurrency.
|
103
138
|
test_files:
|
139
|
+
- test/rodimus/buffered_step_test.rb
|
104
140
|
- test/rodimus/observable_test.rb
|
105
141
|
- test/rodimus/observing_test.rb
|
106
142
|
- test/rodimus/step_test.rb
|
107
143
|
- test/rodimus/transformation_test.rb
|
144
|
+
- test/test_helper.rb
|