rector 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/.travis.yml ADDED
@@ -0,0 +1,7 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - jruby-19mode
6
+ - rbx-19mode
7
+ script: bundle exec rspec spec
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in rector.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,74 @@
1
+ # Rector
2
+
3
+ [![Build Status](https://secure.travis-ci.org/alindeman/rector.png)](http://travis-ci.org/alindeman/rector)
4
+
5
+ ** RECTOR IS CURRENTLY VAPORWARE; THIS README IS SIMPLY MY THOUGHTS ON
6
+ HOW IT MIGHT WORK **
7
+
8
+ Rector allows coordination of a number of jobs spawned with a mechanism
9
+ like Resque (though any job manager will do). If you are able to parallelize
10
+ the processing of a task, yet all these tasks are generating metrics,
11
+ statistics, or other data that need to be combined, Rector might be for you.
12
+
13
+ ## Requirements
14
+
15
+ * Ruby >= 1.9.2 (or 1.9 mode of JRuby or Rubinius)
16
+
17
+ ## Configuration
18
+
19
+ Rector currently supports Redis as a backend for job coordination and
20
+ data storage.
21
+
22
+ ### Redis Server
23
+
24
+ ```ruby
25
+ Rector.configure do |c|
26
+ c.redis = Redis.new(:host => "10.0.1.1", :port => 6380)
27
+ end
28
+ ```
29
+
30
+ ## Job Creation (Master)
31
+
32
+ Rector requires that some process be designated as the "master" process.
33
+ This is usually the process that is also responsible for spawning the
34
+ worker jobs.
35
+
36
+ ```ruby
37
+ job = Rector::Job.new
38
+
39
+ # e.g., processing files in parallel
40
+ files.each do |file|
41
+ worker = job.workers.create
42
+
43
+ # e.g., using Resque for job management; Rector doesn't really care
44
+ Resque.enqueue(WordCounterJob, worker.id, file)
45
+ end
46
+
47
+ # wait for all the workers to complete
48
+ job.join
49
+
50
+ # get aggregated data from all the jobs
51
+ job.data.each do |word, count|
52
+ puts "#{word} was seen #{count} times across all files"
53
+ end
54
+
55
+ job.cleanup
56
+ ```
57
+
58
+ ## Job Processing (Workers)
59
+
60
+ ```ruby
61
+ class ProcessFileJob
62
+ def self.perform(worker_id, file)
63
+ worker = Rector::Worker.new(worker_id)
64
+
65
+ words = File.read(file).split(/\W/)
66
+ words.reject(&:blank?).each do |word|
67
+ worker.data[word] ||= 0
68
+ worker.data[word] += 1
69
+ end
70
+
71
+ worker.finish
72
+ end
73
+ end
74
+ ```
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/lib/rector.rb ADDED
@@ -0,0 +1,27 @@
1
+ require "redis"
2
+ require "redis-namespace"
3
+
4
+ require_relative "rector/configuration"
5
+ require_relative "rector/worker"
6
+ require_relative "rector/job"
7
+ require_relative "rector/backends"
8
+
9
+ module Rector
10
+ class << self
11
+ def configuration
12
+ @configuration ||= Rector::Configuration.new
13
+ end
14
+
15
+ def reset
16
+ @configuration = nil
17
+ end
18
+
19
+ def configure
20
+ yield configuration
21
+ end
22
+
23
+ def backend_for(job_id)
24
+ Rector::Backends::Redis.new(job_id)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1 @@
1
+ require_relative "backends/redis"
@@ -0,0 +1,74 @@
1
+ module Rector
2
+ module Backends
3
+ class Redis
4
+ KEY_LIST_SET = "__keys__"
5
+ WORKER_LIST_SET = "__workers__"
6
+
7
+ attr_reader :job_id
8
+
9
+ def initialize(job_id)
10
+ @job_id = job_id
11
+ end
12
+
13
+ def update_job_data_from_hash(hsh)
14
+ redis.multi do
15
+ redis.sadd(KEY_LIST_SET, *hsh.keys)
16
+
17
+ hsh.each do |key, val|
18
+ case val
19
+ when Numeric
20
+ redis.incrby(key, val)
21
+ when Set
22
+ redis.sadd(key, *val)
23
+ when Enumerable
24
+ redis.rpush(key, *val)
25
+ end
26
+ end
27
+ end
28
+ end
29
+
30
+ def read_job_data_to_hash
31
+ Hash[keys.map { |k| [k, read(k)] }]
32
+ end
33
+
34
+ def add_worker(worker_id)
35
+ redis.sadd(WORKER_LIST_SET, worker_id)
36
+ end
37
+
38
+ def finish_worker(worker_id)
39
+ redis.srem(WORKER_LIST_SET, worker_id)
40
+ end
41
+
42
+ def workers_working?
43
+ redis.scard(WORKER_LIST_SET).to_i > 0
44
+ end
45
+
46
+ def cleanup
47
+ redis.del(*keys)
48
+ redis.del(KEY_LIST_SET, WORKER_LIST_SET)
49
+ end
50
+
51
+ private
52
+
53
+ def redis
54
+ @redis ||=
55
+ ::Redis::Namespace.new(@job_id, redis: Rector.configuration.redis)
56
+ end
57
+
58
+ def keys
59
+ redis.smembers(KEY_LIST_SET)
60
+ end
61
+
62
+ def read(key)
63
+ case redis.type(key)
64
+ when "string"
65
+ redis.get(key).to_i
66
+ when "set"
67
+ Set.new(redis.smembers(key))
68
+ when "list"
69
+ redis.lrange(key, 0, -1)
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,5 @@
1
+ require "ostruct"
2
+
3
+ module Rector
4
+ Configuration = ::OpenStruct
5
+ end
data/lib/rector/job.rb ADDED
@@ -0,0 +1,54 @@
1
+ require "delegate"
2
+ require "securerandom"
3
+
4
+ module Rector
5
+ class Job
6
+ class WorkerCollection < SimpleDelegator
7
+ def initialize(job)
8
+ @job = job
9
+
10
+ # Wraps an array
11
+ super(Array.new)
12
+ end
13
+
14
+ def create
15
+ Rector::Worker.new(@job.allocate_worker_id).tap do |worker|
16
+ self << worker
17
+ end
18
+ end
19
+ end
20
+
21
+ attr_reader :workers
22
+
23
+ def initialize
24
+ @workers = WorkerCollection.new(self)
25
+ @backend = Rector.backend_for(id)
26
+ end
27
+
28
+ def id
29
+ # TODO: Obviously there's a small chance of jobs overlapping here
30
+ # Can do something more reliable for ID generation?
31
+ @id ||= SecureRandom.hex(10)
32
+ end
33
+
34
+ def allocate_worker_id
35
+ # TODO: Obviously there's a small chance of jobs overlapping here
36
+ # Can do something more reliable for ID generation?
37
+ "#{id}:#{SecureRandom.hex(8)}"
38
+ end
39
+
40
+ def join
41
+ while @backend.workers_working?
42
+ sleep 5
43
+ end
44
+ end
45
+
46
+ def data
47
+ @data ||= @backend.read_job_data_to_hash
48
+ end
49
+
50
+ def cleanup
51
+ @backend.cleanup
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,22 @@
1
+ module Rector
2
+ class Worker
3
+ attr_reader :id, :data
4
+
5
+ def initialize(id)
6
+ @id = id
7
+ @data = Hash.new
8
+
9
+ @backend = Rector.backend_for(job_id)
10
+ @backend.add_worker(@id)
11
+ end
12
+
13
+ def job_id
14
+ @id.split(":").first
15
+ end
16
+
17
+ def finish
18
+ @backend.update_job_data_from_hash(@data)
19
+ @backend.finish_worker(@id)
20
+ end
21
+ end
22
+ end
data/rector.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "rector"
6
+ s.version = "0.0.1"
7
+ s.authors = ["Andy Lindeman"]
8
+ s.email = ["alindeman@gmail.com"]
9
+ s.homepage = "https://github.com/alindeman/rector"
10
+ s.summary = %q{Rector coordinates parallelized jobs that generate metrics or other data together}
11
+ s.description = <<-EOF
12
+ Rector allows coordination of a number of jobs spawned with a mechanism
13
+ like Resque (though any job manager will do). If you are able to parallelize
14
+ the processing of a task, yet all these tasks are generating metrics,
15
+ statistics, or other data that need to be combined, Rector might be for you.
16
+ EOF
17
+
18
+ s.files = `git ls-files`.split("\n")
19
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
21
+ s.require_paths = ["lib"]
22
+
23
+ s.add_dependency "redis"
24
+ s.add_dependency "redis-namespace"
25
+
26
+ s.add_development_dependency "rake"
27
+ s.add_development_dependency "rspec", ">=2.8.0"
28
+ s.add_development_dependency "mocha", ">=0.10.0"
29
+ end
@@ -0,0 +1,108 @@
1
+ require "spec_helper"
2
+
3
+ describe Rector::Backends::Redis do
4
+ let(:redis) { stub_everything("redis") }
5
+
6
+ before do
7
+ def redis.multi
8
+ yield
9
+ end
10
+
11
+ Rector.configure do |c|
12
+ c.redis = redis
13
+ end
14
+ end
15
+
16
+ let(:job_id) { "abc123" }
17
+ subject { described_class.new(job_id) }
18
+
19
+ describe "writing" do
20
+ it "stores a list of keys" do
21
+ hsh = {
22
+ "foo" => 1,
23
+ "bar" => 2
24
+ }
25
+
26
+ redis.expects(:sadd).with("#{job_id}:__keys__", "foo", "bar")
27
+ subject.update_job_data_from_hash(hsh)
28
+ end
29
+
30
+ it "stores integers" do
31
+ hsh = { "foo" => 1 }
32
+
33
+ redis.expects(:incrby).with("#{job_id}:foo", 1)
34
+ subject.update_job_data_from_hash(hsh)
35
+ end
36
+
37
+ it "stores lists" do
38
+ hsh = { "foo" => ["a", "b", "c"] }
39
+
40
+ redis.expects(:rpush).with("#{job_id}:foo", "a", "b", "c")
41
+ subject.update_job_data_from_hash(hsh)
42
+ end
43
+
44
+ it "stores sets" do
45
+ hsh = { "foo" => Set.new(["a", "b", "c"]) }
46
+
47
+ redis.expects(:sadd).with("#{job_id}:foo", "a", "b", "c")
48
+ subject.update_job_data_from_hash(hsh)
49
+ end
50
+ end
51
+
52
+ describe "reading" do
53
+ it "reads integers" do
54
+ redis.stubs(:smembers).with("#{job_id}:__keys__").returns(["foo"])
55
+
56
+ redis.stubs(:type).with("#{job_id}:foo").returns("string")
57
+ redis.stubs(:get).with("#{job_id}:foo").returns("5")
58
+
59
+ subject.read_job_data_to_hash.should == { "foo" => 5 }
60
+ end
61
+
62
+ it "reads lists" do
63
+ redis.stubs(:smembers).with("#{job_id}:__keys__").returns(["foo"])
64
+
65
+ redis.stubs(:type).with("#{job_id}:foo").returns("list")
66
+ redis.stubs(:lrange).with("#{job_id}:foo", 0, -1).returns(["bar"])
67
+
68
+ subject.read_job_data_to_hash.should == { "foo" => ["bar"] }
69
+ end
70
+
71
+ it "reads sets" do
72
+ redis.stubs(:smembers).with("#{job_id}:__keys__").returns(["foo"])
73
+
74
+ redis.stubs(:type).with("#{job_id}:foo").returns("set")
75
+ redis.stubs(:smembers).with("#{job_id}:foo").returns(["bar"])
76
+
77
+ subject.read_job_data_to_hash.should == { "foo" => Set.new(["bar"]) }
78
+ end
79
+ end
80
+
81
+ describe "workers" do
82
+ it "adds a worker to a set" do
83
+ redis.expects(:sadd).with("#{job_id}:__workers__", "1234:5678")
84
+ subject.add_worker("1234:5678")
85
+ end
86
+
87
+ it "removes a worker from the set when it is finished" do
88
+ redis.expects(:srem).with("#{job_id}:__workers__", "1234:5678")
89
+ subject.finish_worker("1234:5678")
90
+ end
91
+
92
+ it "knows if workers are still working" do
93
+ redis.stubs(:scard).with("#{job_id}:__workers__").returns("1")
94
+ subject.workers_working?.should be_true
95
+
96
+ redis.stubs(:scard).with("#{job_id}:__workers__").returns("0")
97
+ subject.workers_working?.should be_false
98
+ end
99
+ end
100
+
101
+ it "cleans up when requests" do
102
+ redis.stubs(:smembers).returns(["a", "b"])
103
+ redis.expects(:del).with("#{job_id}:a", "#{job_id}:b")
104
+ redis.expects(:del).with("#{job_id}:__keys__", "#{job_id}:__workers__")
105
+
106
+ subject.cleanup
107
+ end
108
+ end
@@ -0,0 +1,9 @@
1
+ require "spec_helper"
2
+
3
+ describe Rector::Configuration do
4
+ it "accepts and stores arbitrary configuration items" do
5
+ subject.backend = :redis
6
+
7
+ subject.backend.should == :redis
8
+ end
9
+ end
@@ -0,0 +1,33 @@
1
+ require "spec_helper"
2
+ require "timeout"
3
+
4
+ describe Rector::Job do
5
+ let(:backend) { stub_everything("backend") }
6
+ before do
7
+ Rector.stubs(:backend_for).returns(backend)
8
+ end
9
+
10
+ it "constructs workers" do
11
+ worker = subject.workers.create
12
+ worker.should be_a(Rector::Worker)
13
+ end
14
+
15
+ it "waits for workers to complete" do
16
+ backend.expects(:workers_working?).at_least_once.returns(true).then.returns(false)
17
+ subject.stubs(:sleep)
18
+
19
+ Timeout.timeout(2) do
20
+ subject.join
21
+ end
22
+ end
23
+
24
+ it "loads data from the backend" do
25
+ backend.stubs(:read_job_data_to_hash).returns("foo" => "bar")
26
+ subject.data.should == { "foo" => "bar" }
27
+ end
28
+
29
+ it "delegates to the backend for cleanup" do
30
+ backend.expects(:cleanup)
31
+ subject.cleanup
32
+ end
33
+ end
@@ -0,0 +1,18 @@
1
+ require "spec_helper"
2
+
3
+ describe Rector do
4
+ it "allows configuration by yielding a block to #configure" do
5
+ described_class.configure do |c|
6
+ c.foo = :bar
7
+ end
8
+
9
+ described_class.configuration.foo.should == :bar
10
+ end
11
+
12
+ it "creates backend objects for jobs" do
13
+ backend = described_class.backend_for("abc123")
14
+
15
+ backend.should be_a(Rector::Backends::Redis)
16
+ backend.job_id.should == "abc123"
17
+ end
18
+ end
@@ -0,0 +1,38 @@
1
+ require "spec_helper"
2
+
3
+ describe Rector::Worker do
4
+ let(:worker_id) { "zyx987:abc123" }
5
+ subject { described_class.new(worker_id) }
6
+
7
+ let(:backend) { stub_everything("backend") }
8
+ before do
9
+ Rector.stubs(:backend_for).returns(backend)
10
+ end
11
+
12
+ it "is initialized with a worker ID" do
13
+ subject.id.should == worker_id
14
+ end
15
+
16
+ it "knows its job ID" do
17
+ subject.job_id.should == "zyx987"
18
+ end
19
+
20
+ it "notifies the backend of workers being created" do
21
+ backend.expects(:add_worker).with(worker_id)
22
+ subject
23
+ end
24
+
25
+ describe "#finish" do
26
+ it "notifies the backend" do
27
+ backend.expects(:finish_worker).with(worker_id)
28
+ subject.finish
29
+ end
30
+
31
+ it "saves data" do
32
+ subject.data["foo"] = "bar"
33
+
34
+ backend.expects(:update_job_data_from_hash).with("foo" => "bar")
35
+ subject.finish
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,12 @@
1
+ require "rspec"
2
+ require "mocha"
3
+
4
+ require_relative "../lib/rector"
5
+
6
+ RSpec.configure do |c|
7
+ c.mock_with :mocha
8
+
9
+ c.before do
10
+ Rector.reset
11
+ end
12
+ end
metadata ADDED
@@ -0,0 +1,134 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rector
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Andy Lindeman
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-02-06 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: redis
16
+ requirement: &70197558869260 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70197558869260
25
+ - !ruby/object:Gem::Dependency
26
+ name: redis-namespace
27
+ requirement: &70197558868820 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *70197558868820
36
+ - !ruby/object:Gem::Dependency
37
+ name: rake
38
+ requirement: &70197558868400 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *70197558868400
47
+ - !ruby/object:Gem::Dependency
48
+ name: rspec
49
+ requirement: &70197558867900 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 2.8.0
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *70197558867900
58
+ - !ruby/object:Gem::Dependency
59
+ name: mocha
60
+ requirement: &70197558867340 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: 0.10.0
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *70197558867340
69
+ description: ! " Rector allows coordination of a number of jobs spawned with a
70
+ mechanism\n like Resque (though any job manager will do). If you are able to
71
+ parallelize\n the processing of a task, yet all these tasks are generating metrics,\n
72
+ \ statistics, or other data that need to be combined, Rector might be for you.\n"
73
+ email:
74
+ - alindeman@gmail.com
75
+ executables: []
76
+ extensions: []
77
+ extra_rdoc_files: []
78
+ files:
79
+ - .gitignore
80
+ - .travis.yml
81
+ - Gemfile
82
+ - README.md
83
+ - Rakefile
84
+ - lib/rector.rb
85
+ - lib/rector/backends.rb
86
+ - lib/rector/backends/redis.rb
87
+ - lib/rector/configuration.rb
88
+ - lib/rector/job.rb
89
+ - lib/rector/worker.rb
90
+ - rector.gemspec
91
+ - spec/lib/backends/redis_spec.rb
92
+ - spec/lib/configuration_spec.rb
93
+ - spec/lib/job_spec.rb
94
+ - spec/lib/rector_spec.rb
95
+ - spec/lib/worker_spec.rb
96
+ - spec/spec_helper.rb
97
+ homepage: https://github.com/alindeman/rector
98
+ licenses: []
99
+ post_install_message:
100
+ rdoc_options: []
101
+ require_paths:
102
+ - lib
103
+ required_ruby_version: !ruby/object:Gem::Requirement
104
+ none: false
105
+ requirements:
106
+ - - ! '>='
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ segments:
110
+ - 0
111
+ hash: -3768330157712875762
112
+ required_rubygems_version: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ segments:
119
+ - 0
120
+ hash: -3768330157712875762
121
+ requirements: []
122
+ rubyforge_project:
123
+ rubygems_version: 1.8.14
124
+ signing_key:
125
+ specification_version: 3
126
+ summary: Rector coordinates parallelized jobs that generate metrics or other data
127
+ together
128
+ test_files:
129
+ - spec/lib/backends/redis_spec.rb
130
+ - spec/lib/configuration_spec.rb
131
+ - spec/lib/job_spec.rb
132
+ - spec/lib/rector_spec.rb
133
+ - spec/lib/worker_spec.rb
134
+ - spec/spec_helper.rb