rodimus 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/README.md +27 -1
- data/examples/csv_input_stdout.rb +37 -0
- data/examples/worldbank-sample.csv +49 -0
- data/lib/rodimus/step.rb +4 -1
- data/lib/rodimus/transformation.rb +4 -2
- data/lib/rodimus/version.rb +1 -1
- data/rodimus.gemspec +9 -9
- data/test/transformation_test.rb +27 -0
- metadata +7 -18
- data/examples/mongo_input.rb +0 -40
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 86ccd8d549d00dbd8a6c1ba56ce9eb1fe41aa496
|
4
|
+
data.tar.gz: efdf0cac67bec3a2c2cd469164c9f7bdc812ebd2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fbfe473e374f6aa661d3cffca3c313b5811f3f332bdfd2860919022806c679383d7b20053ef5e56c4ee2a2fb8f13814c678b14463d0ba17a27b7dc9b76714344
|
7
|
+
data.tar.gz: 1d45b630f57c78f956f7b170ca5d2e8533bda42e88ad3240df112c197579bb95a6032e08deadc1ffe653a1a393eb61553f475815d4d178a1b5c82251afd478b7
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -31,7 +31,33 @@ Or install it yourself as:
|
|
31
31
|
|
32
32
|
## Usage
|
33
33
|
|
34
|
-
See the examples directory for the quickest path to success.
|
34
|
+
tl;dr: See the examples directory for the quickest path to success.
|
35
|
+
|
36
|
+
A transformation is an operation that consists of many steps. Each step may
|
37
|
+
manipulate the data in some way. Typically, the first step is reserved for
|
38
|
+
reading from your data source, and the last step is used to write to the new
|
39
|
+
destination.
|
40
|
+
|
41
|
+
In Rodimus, you create a transformation object, and then you add
|
42
|
+
one or more steps to its array of steps. You typically create steps by writing
|
43
|
+
your own classes that include the Rodimus::Step mixin. When the transformation is
|
44
|
+
subsequently run, a new process is forked for each step. All processes are
|
45
|
+
connected together using pipes except for the first and last steps (those being the
|
46
|
+
source and destination steps). Each step then consumes rows of data from its
|
47
|
+
incoming pipe and performs some operation on it before writing it to the
|
48
|
+
outgoing pipe.
|
49
|
+
|
50
|
+
There are several methods on the Rodimus::Step mixin that are able to be
|
51
|
+
overridden for custom processing behavior before, during, or after the each
|
52
|
+
row is handled. If those aren't enough, you're also free to manipulate the
|
53
|
+
input/output objects (i.e. to redirect to standard out).
|
54
|
+
|
55
|
+
The Rodimus approach is to provide a minimal, flexible framework upon which
|
56
|
+
custom ETL solutions can be built. ETL is complex, and there tend to be many
|
57
|
+
subtle differences between projects which can make things like establishing
|
58
|
+
conventions and encouraging code reuse difficult. Rodimus is an attempt to
|
59
|
+
codify those things which are probably useful to a majority of ETL projects
|
60
|
+
with as little overhead as possible.
|
35
61
|
|
36
62
|
## Contributing
|
37
63
|
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'rodimus'
|
2
|
+
require 'csv'
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
class CsvInput
|
6
|
+
include Rodimus::Step
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@incoming = CSV.open('worldbank-sample.csv')
|
10
|
+
@incoming.readline # skip the headers
|
11
|
+
end
|
12
|
+
|
13
|
+
def process_row(row)
|
14
|
+
row.to_json
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class FormattedText
|
19
|
+
include Rodimus::Step
|
20
|
+
|
21
|
+
def initialize
|
22
|
+
@outgoing = STDOUT.dup
|
23
|
+
end
|
24
|
+
|
25
|
+
def process_row(row)
|
26
|
+
data = JSON.parse(row)
|
27
|
+
"In #{data.first} during #{data[1]}, CO2 emissions were #{data[2]} metric tons per capita."
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
t = Rodimus::Transformation.new
|
32
|
+
s1 = CsvInput.new
|
33
|
+
s2 = FormattedText.new
|
34
|
+
t.steps << s1
|
35
|
+
t.steps << s2
|
36
|
+
t.run
|
37
|
+
puts "Transformation complete!"
|
@@ -0,0 +1,49 @@
|
|
1
|
+
Country,Year,CO2 emissions (metric tons per capita),Electric power consumption (kWh per capita),Energy use (kg of oil equivalent per capita),"Fertility rate, total (births per woman)","GNI per capita, Atlas method (current US$)","Internet users (per 1,000 people)","Life expectancy at birth, total (years)",Military expenditure (% of GDP),"Population, total","Prevalence of HIV, total (% of population ages 15-49)"
|
2
|
+
Belarus,2000,5.91,2988.71,2459.67,1.29,1.38E+03,18.69,68.01,1.26,1.00E+07,
|
3
|
+
Belarus,2001,5.87,2996.81,2476.16,,1300,43.15,,1.44,9970260,
|
4
|
+
Belarus,2002,6.03,2982.77,2539.95,1.25,1370,89.8,68.21,1.4,9925000,
|
5
|
+
Belarus,2003,6.33,3039.1,2628.83,1.25,1610,162.76,,1.3,9873968,0.31
|
6
|
+
Belarus,2004,,3143.58,2725.44,1.24,2150,250.51,68.39,1.36,9824469,
|
7
|
+
Belarus,2005,,,,1.24,2760,347.23,68.48,1.24,9775591,0.34
|
8
|
+
Philippines,2000,1.03,514.02,559.83,,1.06E+03,20.33,69.53,1.08,7.58E+07,
|
9
|
+
Philippines,2001,0.99,535.18,539.4,,1.05E+03,25.89,,0.99,7.72E+07,
|
10
|
+
Philippines,2002,0.99,539.74,537.47,3.5,1.03E+03,44.47,70.19,0.98,7.87E+07,
|
11
|
+
Philippines,2003,0.96,574.47,524.9,,1.08E+03,49.9,,1.03,8.02E+07,0.1
|
12
|
+
Philippines,2004,,597.06,542.39,,1.20E+03,53.91,,0.9,8.16E+07,
|
13
|
+
Philippines,2005,,,,3.2,1.29E+03,,71.04,0.82,8.31E+07,0.1
|
14
|
+
Morocco,2000,1.2,489.04,350.32,2.62,1.19E+03,7.03,68.81,4.13,2.85E+07,
|
15
|
+
Morocco,2001,1.32,508.1,370.93,2.5,1.20E+03,13.87,,4.08,2.88E+07,
|
16
|
+
Morocco,2002,1.32,526.4,373.38,2.5,1.19E+03,23.99,69.48,4.27,2.92E+07,
|
17
|
+
Morocco,2003,1.28,562.93,370.01,2.5,1.34E+03,33.87,,4.22,2.95E+07,0.09
|
18
|
+
Morocco,2004,,594.3,383.8,2.43,1.57E+03,117.3,,4.54,2.98E+07,
|
19
|
+
Morocco,2005,,,,2.4,1.75E+03,152.61,70.38,4.28,3.01E+07,0.1
|
20
|
+
Afghanistan,2000,0.04,,,,,,,,,
|
21
|
+
Afghanistan,2001,0.03,,,,,,,,,
|
22
|
+
Afghanistan,2002,0.03,,,,,0.04,,,,
|
23
|
+
Afghanistan,2003,0.03,,,,,0.73,,,,0.1
|
24
|
+
Afghanistan,2004,,,,,,0.87,,,,
|
25
|
+
Afghanistan,2005,,,,,,1,,,,0.1
|
26
|
+
Nicaragua,2000,0.77,349.37,558.5,3.54,760,10.16,68.87,0.78,4920286,
|
27
|
+
Nicaragua,2001,0.8,349.6,559.75,,760,15.03,,0.71,4991475,
|
28
|
+
Nicaragua,2002,0.78,373.83,552.24,3.3,760,17.82,69.48,0.87,5050368,
|
29
|
+
Nicaragua,2003,0.77,373.59,613.75,,790,19.62,,0.87,5096507,0.21
|
30
|
+
Nicaragua,2004,,416.57,643.39,,870,24.4,,0.7,5122841,
|
31
|
+
Nicaragua,2005,,,,3.08,950,27.19,70.39,0.69,5149311,0.24
|
32
|
+
"Korea, Dem. Rep.",2000,3.52,747.14,903.3,2.02E+00,,,63.1,,2.19E+07,
|
33
|
+
"Korea, Dem. Rep.",2001,3.63,772.66,928.63,,,0,,,2.20E+07,
|
34
|
+
"Korea, Dem. Rep.",2002,3.43,751.85,882.16,2.00E+00,,,63.04,,2.21E+07,
|
35
|
+
"Korea, Dem. Rep.",2003,3.48,793.87,894.09,,,,,,2.23E+07,0.2
|
36
|
+
"Korea, Dem. Rep.",2004,,826.54,910.17,,,,,,2.24E+07,
|
37
|
+
"Korea, Dem. Rep.",2005,,,,1.96E+00,,,63.92,,2.25E+07,0.2
|
38
|
+
Kyrgyz Republic,2000,0.94,1687.38,497.43,2.4,280,10.5,68.56,2.85,4915300,
|
39
|
+
Kyrgyz Republic,2001,0.78,1443.05,451.08,2.4,280,30.39,68.71,2.35,4954800,
|
40
|
+
Kyrgyz Republic,2002,0.99,1365.26,507.89,2.5,290,30.44,68.11,2.73,4993200,
|
41
|
+
Kyrgyz Republic,2003,1.06,1646.69,531.5,2.5,340,39.69,68.26,2.87,5038600,0.1
|
42
|
+
Kyrgyz Republic,2004,,1421.22,545.87,2.45,400,51.64,68.15,2.85,5092802,
|
43
|
+
Kyrgyz Republic,2005,,,,2.41,450,54.44,68.34,,5143500,0.14
|
44
|
+
Middle East & North Africa,2000,3.21,1075.27,1076.66,3.27,1.66E+03,9.02,68.14,4.32,2.79E+08,
|
45
|
+
Middle East & North Africa,2001,3.21,1122.92,1104.58,,1.71E+03,11.96,,4.59,2.84E+08,
|
46
|
+
Middle East & North Africa,2002,3.5,1176.9,1145.34,3.11,1.70E+03,28.94,68.79,4.18,2.90E+08,
|
47
|
+
Middle East & North Africa,2003,3.43,1221.77,1157.82,,1.81E+03,41.89,,4.29,2.95E+08,0.12
|
48
|
+
Middle East & North Africa,2004,,1290.24,1190.15,,1.98E+03,60.28,,4.28,3.00E+08,
|
49
|
+
Middle East & North Africa,2005,,,,2.97,2.22E+03,88.9,69.67,3.73,3.06E+08,0.15
|
data/lib/rodimus/step.rb
CHANGED
@@ -32,9 +32,12 @@ module Rodimus
|
|
32
32
|
|
33
33
|
def run
|
34
34
|
Rodimus.logger.info "Running #{self}"
|
35
|
+
@row_count = 1
|
35
36
|
incoming.each do |row|
|
36
37
|
transformed_row = process_row(row)
|
37
38
|
handle_output(transformed_row)
|
39
|
+
Rodimus.logger.info(self) { "#{@row_count} rows processed" } if @row_count % 50000 == 0
|
40
|
+
@row_count += 1
|
38
41
|
end
|
39
42
|
finalize
|
40
43
|
Rodimus.logger.info "Finished #{self}"
|
@@ -43,7 +46,7 @@ module Rodimus
|
|
43
46
|
end
|
44
47
|
|
45
48
|
def to_s
|
46
|
-
"#{self.class} connected to input: #{incoming} and output: #{outgoing}"
|
49
|
+
"#{self.class} connected to input: #{incoming || 'nil'} and output: #{outgoing || 'nil'}"
|
47
50
|
end
|
48
51
|
end
|
49
52
|
|
@@ -3,22 +3,24 @@ require 'drb'
|
|
3
3
|
module Rodimus
|
4
4
|
|
5
5
|
class Transformation
|
6
|
-
attr_reader :drb_server, :steps
|
6
|
+
attr_reader :drb_server, :pids, :steps
|
7
7
|
|
8
8
|
# User-data accessible across all running steps.
|
9
9
|
attr_reader :shared_data
|
10
10
|
|
11
11
|
def initialize
|
12
12
|
@steps = []
|
13
|
+
@pids = []
|
13
14
|
@shared_data = {} # TODO: This needs to be thread safe
|
14
15
|
end
|
15
16
|
|
16
17
|
def run
|
17
18
|
@drb_server = DRb.start_service(nil, shared_data)
|
19
|
+
pids.clear
|
18
20
|
prepare
|
19
21
|
|
20
22
|
steps.each do |step|
|
21
|
-
fork do
|
23
|
+
pids << fork do
|
22
24
|
DRb.start_service # the parent DRb thread dies across the fork
|
23
25
|
step.shared_data = DRbObject.new_with_uri(drb_server.uri)
|
24
26
|
step.run
|
data/lib/rodimus/version.rb
CHANGED
data/rodimus.gemspec
CHANGED
@@ -4,14 +4,15 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
require 'rodimus/version'
|
5
5
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name
|
8
|
-
spec.version
|
9
|
-
spec.authors
|
10
|
-
spec.email
|
11
|
-
spec.summary
|
12
|
-
spec.description
|
13
|
-
spec.homepage
|
14
|
-
spec.license
|
7
|
+
spec.name = "rodimus"
|
8
|
+
spec.version = Rodimus::VERSION
|
9
|
+
spec.authors = ["Brandon Rice"]
|
10
|
+
spec.email = ["brice84@gmail.com"]
|
11
|
+
spec.summary = "An ETL (Extract-Transform-Load) library that uses a forking process model for concurrency."
|
12
|
+
spec.description = "An ETL (Extract-Transform-Load) library that uses a forking process model for concurrency."
|
13
|
+
spec.homepage = "https://github.com/nevern02/rodimus"
|
14
|
+
spec.license = "MIT"
|
15
|
+
spec.required_ruby_version = ">= 1.9.2"
|
15
16
|
|
16
17
|
spec.files = `git ls-files -z`.split("\x0")
|
17
18
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
@@ -20,5 +21,4 @@ Gem::Specification.new do |spec|
|
|
20
21
|
|
21
22
|
spec.add_development_dependency "bundler", "~> 1.5"
|
22
23
|
spec.add_development_dependency "rake"
|
23
|
-
spec.add_development_dependency "mongo"
|
24
24
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'rodimus'
|
3
|
+
|
4
|
+
module Rodimus
|
5
|
+
|
6
|
+
class TestTransformation < MiniTest::Unit::TestCase
|
7
|
+
Rodimus.configure do |config|
|
8
|
+
config.logger = Logger.new(nil)
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_forking_processes
|
12
|
+
incoming = StringIO.new
|
13
|
+
transformation = Transformation.new
|
14
|
+
steps = []
|
15
|
+
number_of_steps = 2 + rand(5)
|
16
|
+
number_of_steps.times { steps << Object.new }
|
17
|
+
steps.each do |step|
|
18
|
+
step.extend(Rodimus::Step)
|
19
|
+
transformation.steps << step
|
20
|
+
end
|
21
|
+
steps.first.incoming = incoming
|
22
|
+
transformation.run
|
23
|
+
assert_equal(steps.count, transformation.pids.count)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rodimus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brandon Rice
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-05-
|
11
|
+
date: 2014-05-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -38,20 +38,6 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: mongo
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
41
|
description: An ETL (Extract-Transform-Load) library that uses a forking process model
|
56
42
|
for concurrency.
|
57
43
|
email:
|
@@ -68,7 +54,8 @@ files:
|
|
68
54
|
- LICENSE.txt
|
69
55
|
- README.md
|
70
56
|
- Rakefile
|
71
|
-
- examples/
|
57
|
+
- examples/csv_input_stdout.rb
|
58
|
+
- examples/worldbank-sample.csv
|
72
59
|
- lib/rodimus.rb
|
73
60
|
- lib/rodimus/configuration.rb
|
74
61
|
- lib/rodimus/step.rb
|
@@ -76,6 +63,7 @@ files:
|
|
76
63
|
- lib/rodimus/version.rb
|
77
64
|
- rodimus.gemspec
|
78
65
|
- test/step_test.rb
|
66
|
+
- test/transformation_test.rb
|
79
67
|
homepage: https://github.com/nevern02/rodimus
|
80
68
|
licenses:
|
81
69
|
- MIT
|
@@ -88,7 +76,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
88
76
|
requirements:
|
89
77
|
- - ">="
|
90
78
|
- !ruby/object:Gem::Version
|
91
|
-
version:
|
79
|
+
version: 1.9.2
|
92
80
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
93
81
|
requirements:
|
94
82
|
- - ">="
|
@@ -103,3 +91,4 @@ summary: An ETL (Extract-Transform-Load) library that uses a forking process mod
|
|
103
91
|
for concurrency.
|
104
92
|
test_files:
|
105
93
|
- test/step_test.rb
|
94
|
+
- test/transformation_test.rb
|
data/examples/mongo_input.rb
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
require 'rodimus'
|
2
|
-
require 'mongo'
|
3
|
-
require 'json'
|
4
|
-
|
5
|
-
class MongoInput
|
6
|
-
attr_reader :client, :db, :collection
|
7
|
-
|
8
|
-
include Rodimus::Step
|
9
|
-
|
10
|
-
def initialize
|
11
|
-
@client = Mongo::MongoClient.new('localhost', 27017)
|
12
|
-
@db = client['inventory_events']
|
13
|
-
@collection = db['model_events']
|
14
|
-
@incoming = collection.find.limit(4)
|
15
|
-
end
|
16
|
-
|
17
|
-
def process_row(row)
|
18
|
-
row.to_json
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
class TempfileOut
|
23
|
-
include Rodimus::Step
|
24
|
-
|
25
|
-
def initialize
|
26
|
-
@outgoing = File.new('output.txt', 'w')
|
27
|
-
end
|
28
|
-
|
29
|
-
def process_row(row)
|
30
|
-
JSON.parse(row).keys.join(',')
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
t = Rodimus::Transformation.new
|
35
|
-
s1 = MongoInput.new
|
36
|
-
s2 = TempfileOut.new
|
37
|
-
t.steps << s1
|
38
|
-
t.steps << s2
|
39
|
-
t.run
|
40
|
-
puts "Transformation to #{s2.outgoing.path} complete!"
|