rodimus 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7fc221f69a67693a1f94810a09b297e345a9248c
4
+ data.tar.gz: b2fc619efd740b162e0968d92c30ce4f8eb9562d
5
+ SHA512:
6
+ metadata.gz: 9ce961dff9778ea69aaf5593084840f7239228bb9e720a31325e961db15ee8fde1aa1d57ab5b7e53bfc390fdeb2cfc1bd019f9d68930855306ccc1d11b37dc06
7
+ data.tar.gz: 2b4e42e96f08dbd9c42b8a486cbcebd2a20ed7fe9c61cc3beeb16d8fcb772caeacf621e605f0ff8f6d426099b05205e1d06d371816ef61dd68d4d746b22e0633
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .*.swp
data/.ruby-gemset ADDED
@@ -0,0 +1 @@
1
+ rodimus
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ ruby-2.1.2
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in rb_etl.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Brandon Rice
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,41 @@
1
+ # Rodimus
2
+
3
+ ETL stands for Extract-Transform-Load. Sometimes, you have data in Source A
4
+ that needs to be moved to Destination B. Along the way, it needs to be
5
+ manipulated in some way. This is a common scenario when working with a data
6
+ warehouse. There are lots of ETL solutions in the wild, but very few of them
7
+ are open source. None of them (that I know of) are Ruby. So, I started
8
+ hacking on one for my own use.
9
+
10
+ __Why the name?__ Rodimus Prime is one of the leaders of the Autobots, and he
11
+ has a cool name. Naming a data transformation library after a Transformer
12
+ increases the coolness factor. It's science.
13
+
14
+ __NOTE:__ This library is still in the earliest phases of development. Things
15
+ are prone to change suddenly and rapidly. Use at your own risk.
16
+
17
+ ## Installation
18
+
19
+ Add this line to your application's Gemfile:
20
+
21
+ gem 'rodimus'
22
+
23
+ And then execute:
24
+
25
+ $ bundle
26
+
27
+ Or install it yourself as:
28
+
29
+ $ gem install rodimus
30
+
31
+ ## Usage
32
+
33
+ See the examples directory for the quickest path to success.
34
+
35
+ ## Contributing
36
+
37
+ 1. Fork it ( http://github.com/nevern02/rodimus/fork )
38
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
39
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
40
+ 4. Push to the branch (`git push origin my-new-feature`)
41
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+ $: << File.expand_path('../lib', __FILE__)
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rake/testtask'
5
+ require 'rodimus'
6
+
7
+ task :default => :test
8
+
9
+ Rake::TestTask.new do |t|
10
+ t.libs.push 'lib'
11
+ t.test_files = FileList['test/*_test.rb']
12
+ t.verbose = true
13
+ end
14
+
@@ -0,0 +1,40 @@
1
+ require 'rodimus'
2
+ require 'mongo'
3
+ require 'json'
4
+
5
+ class MongoInput
6
+ attr_reader :client, :db, :collection
7
+
8
+ include Rodimus::Step
9
+
10
+ def initialize
11
+ @client = Mongo::MongoClient.new('localhost', 27017)
12
+ @db = client['inventory_events']
13
+ @collection = db['model_events']
14
+ @incoming = collection.find.limit(4)
15
+ end
16
+
17
+ def process_row(row)
18
+ row.to_json
19
+ end
20
+ end
21
+
22
+ class TempfileOut
23
+ include Rodimus::Step
24
+
25
+ def initialize
26
+ @outgoing = File.new('output.txt', 'w')
27
+ end
28
+
29
+ def process_row(row)
30
+ JSON.parse(row).keys.join(',')
31
+ end
32
+ end
33
+
34
+ t = Rodimus::Transformation.new
35
+ s1 = MongoInput.new
36
+ s2 = TempfileOut.new
37
+ t.steps << s1
38
+ t.steps << s2
39
+ t.run
40
+ puts "Transformation to #{s2.outgoing.path} complete!"
@@ -0,0 +1,30 @@
1
+ module Rodimus
2
+
3
+ module Step
4
+ attr_accessor :incoming, :outgoing
5
+
6
+ def run
7
+ incoming.each do |row|
8
+ transformed_row = process_row(row)
9
+ handle_output(transformed_row)
10
+ end
11
+ finalize
12
+ end
13
+
14
+ private
15
+
16
+ # Override this for custom functionality
17
+ def finalize; end
18
+
19
+ # Override this for custom functionality
20
+ def handle_output(transformed_row)
21
+ outgoing.puts(transformed_row)
22
+ end
23
+
24
+ # Override this for custom functionality
25
+ def process_row(row)
26
+ row.to_s
27
+ end
28
+ end
29
+
30
+ end
@@ -0,0 +1,37 @@
1
+ module Rodimus
2
+
3
+ class Transformation
4
+ attr_reader :steps
5
+
6
+ def initialize
7
+ @steps = []
8
+ end
9
+
10
+ def run
11
+ prepare
12
+
13
+ steps.each do |step|
14
+ fork do
15
+ step.run
16
+ end
17
+ step.incoming && step.incoming.close
18
+ step.outgoing && step.outgoing.close
19
+ end
20
+
21
+ Process.waitall
22
+ end
23
+
24
+ private
25
+
26
+ def prepare
27
+ # [1, 2, 3, 4] => [1, 2], [2, 3], [3, 4]
28
+ steps.inject do |first, second|
29
+ read, write = IO.pipe
30
+ first.outgoing = write
31
+ second.incoming = read
32
+ second
33
+ end
34
+ end
35
+ end
36
+
37
+ end
@@ -0,0 +1,3 @@
1
+ module Rodimus
2
+ VERSION = "0.0.1"
3
+ end
data/lib/rodimus.rb ADDED
@@ -0,0 +1,6 @@
1
+ require 'rodimus/version'
2
+ require 'rodimus/step'
3
+ require 'rodimus/transformation'
4
+
5
+ module Rodimus
6
+ end
data/rodimus.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'rodimus/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "rodimus"
8
+ spec.version = Rodimus::VERSION
9
+ spec.authors = ["Brandon Rice"]
10
+ spec.email = ["brice84@gmail.com"]
11
+ spec.summary = "An ETL (Extract-Transform-Load) library that uses a forking process model for concurrency."
12
+ spec.description = "ETL is hard. There are lots of solutions, but few are open-source and none (that I know of) are Ruby."
13
+ spec.homepage = "https://github.com/nevern02/rodimus"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.5"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "mongo"
24
+ end
data/test/step_test.rb ADDED
@@ -0,0 +1,40 @@
1
+ require 'minitest/autorun'
2
+ require 'rodimus'
3
+
4
+ module Rodimus
5
+
6
+ class TestStep < MiniTest::Unit::TestCase
7
+ def test_streaming_rows
8
+ test_string = "row 1\nrow 2"
9
+ step = Object.new
10
+ step.extend(Rodimus::Step)
11
+ step.define_singleton_method(:transform) { |i| i }
12
+ incoming = StringIO.new(test_string)
13
+ outgoing = StringIO.new
14
+ step.incoming = incoming
15
+ step.outgoing = outgoing
16
+ step.run
17
+ outgoing.rewind
18
+ assert_equal test_string, outgoing.read.chomp
19
+ end
20
+
21
+ def test_transformation_called
22
+ test_string = "row 1\nrow 2"
23
+ step = Class.new do
24
+ include Rodimus::Step
25
+
26
+ def process_row(row)
27
+ row.upcase
28
+ end
29
+ end.new
30
+ incoming = StringIO.new(test_string)
31
+ outgoing = StringIO.new
32
+ step.incoming = incoming
33
+ step.outgoing = outgoing
34
+ step.run
35
+ outgoing.rewind
36
+ assert_equal test_string.upcase, outgoing.read.chomp
37
+ end
38
+ end
39
+
40
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rodimus
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Brandon Rice
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: mongo
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: ETL is hard. There are lots of solutions, but few are open-source and
56
+ none (that I know of) are Ruby.
57
+ email:
58
+ - brice84@gmail.com
59
+ executables: []
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - ".gitignore"
64
+ - ".ruby-gemset"
65
+ - ".ruby-version"
66
+ - Gemfile
67
+ - LICENSE.txt
68
+ - README.md
69
+ - Rakefile
70
+ - examples/mongo_input.rb
71
+ - lib/rodimus.rb
72
+ - lib/rodimus/step.rb
73
+ - lib/rodimus/transformation.rb
74
+ - lib/rodimus/version.rb
75
+ - rodimus.gemspec
76
+ - test/step_test.rb
77
+ homepage: https://github.com/nevern02/rodimus
78
+ licenses:
79
+ - MIT
80
+ metadata: {}
81
+ post_install_message:
82
+ rdoc_options: []
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ required_rubygems_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ requirements: []
96
+ rubyforge_project:
97
+ rubygems_version: 2.2.2
98
+ signing_key:
99
+ specification_version: 4
100
+ summary: An ETL (Extract-Transform-Load) library that uses a forking process model
101
+ for concurrency.
102
+ test_files:
103
+ - test/step_test.rb