rector 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/.travis.yml ADDED
@@ -0,0 +1,7 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - jruby-19mode
6
+ - rbx-19mode
7
+ script: bundle exec rspec spec
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in rector.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,74 @@
1
+ # Rector
2
+
3
+ [![Build Status](https://secure.travis-ci.org/alindeman/rector.png)](http://travis-ci.org/alindeman/rector)
4
+
5
+ ** RECTOR IS CURRENTLY VAPORWARE; THIS README IS SIMPLY MY THOUGHTS ON
6
+ HOW IT MIGHT WORK **
7
+
8
+ Rector allows coordination of a number of jobs spawned with a mechanism
9
+ like Resque (though any job manager will do). If you are able to parallelize
10
+ the processing of a task, yet all these tasks are generating metrics,
11
+ statistics, or other data that need to be combined, Rector might be for you.
12
+
13
+ ## Requirements
14
+
15
+ * Ruby >= 1.9.2 (or 1.9 mode of JRuby or Rubinius)
16
+
17
+ ## Configuration
18
+
19
+ Rector currently supports Redis as a backend for job coordination and
20
+ data storage.
21
+
22
+ ### Redis Server
23
+
24
+ ```ruby
25
+ Rector.configure do |c|
26
+ c.redis = Redis.new(:host => "10.0.1.1", :port => 6380)
27
+ end
28
+ ```
29
+
30
+ ## Job Creation (Master)
31
+
32
+ Rector requires that some process be designated as the "master" process.
33
+ This is usually the process that is also responsible for spawning the
34
+ worker jobs.
35
+
36
+ ```ruby
37
+ job = Rector::Job.new
38
+
39
+ # e.g., processing files in parallel
40
+ files.each do |file|
41
+ worker = job.workers.create
42
+
43
+ # e.g., using Resque for job management; Rector doesn't really care
44
+ Resque.enqueue(WordCounterJob, worker.id, file)
45
+ end
46
+
47
+ # wait for all the workers to complete
48
+ job.join
49
+
50
+ # get aggregated data from all the jobs
51
+ job.data.each do |word, count|
52
+ puts "#{word} was seen #{count} times across all files"
53
+ end
54
+
55
+ job.cleanup
56
+ ```
57
+
58
+ ## Job Processing (Workers)
59
+
60
+ ```ruby
61
+ class ProcessFileJob
62
+ def self.perform(worker_id, file)
63
+ worker = Rector::Worker.new(worker_id)
64
+
65
+ words = File.read(file).split(/\W/)
66
+ words.reject(&:blank?).each do |word|
67
+ worker.data[word] ||= 0
68
+ worker.data[word] += 1
69
+ end
70
+
71
+ worker.finish
72
+ end
73
+ end
74
+ ```
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/lib/rector.rb ADDED
@@ -0,0 +1,27 @@
1
+ require "redis"
2
+ require "redis-namespace"
3
+
4
+ require_relative "rector/configuration"
5
+ require_relative "rector/worker"
6
+ require_relative "rector/job"
7
+ require_relative "rector/backends"
8
+
9
+ module Rector
10
+ class << self
11
+ def configuration
12
+ @configuration ||= Rector::Configuration.new
13
+ end
14
+
15
+ def reset
16
+ @configuration = nil
17
+ end
18
+
19
+ def configure
20
+ yield configuration
21
+ end
22
+
23
+ def backend_for(job_id)
24
+ Rector::Backends::Redis.new(job_id)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1 @@
1
+ require_relative "backends/redis"
@@ -0,0 +1,74 @@
1
+ module Rector
2
+ module Backends
3
+ class Redis
4
+ KEY_LIST_SET = "__keys__"
5
+ WORKER_LIST_SET = "__workers__"
6
+
7
+ attr_reader :job_id
8
+
9
+ def initialize(job_id)
10
+ @job_id = job_id
11
+ end
12
+
13
+ def update_job_data_from_hash(hsh)
14
+ redis.multi do
15
+ redis.sadd(KEY_LIST_SET, *hsh.keys)
16
+
17
+ hsh.each do |key, val|
18
+ case val
19
+ when Numeric
20
+ redis.incrby(key, val)
21
+ when Set
22
+ redis.sadd(key, *val)
23
+ when Enumerable
24
+ redis.rpush(key, *val)
25
+ end
26
+ end
27
+ end
28
+ end
29
+
30
+ def read_job_data_to_hash
31
+ Hash[keys.map { |k| [k, read(k)] }]
32
+ end
33
+
34
+ def add_worker(worker_id)
35
+ redis.sadd(WORKER_LIST_SET, worker_id)
36
+ end
37
+
38
+ def finish_worker(worker_id)
39
+ redis.srem(WORKER_LIST_SET, worker_id)
40
+ end
41
+
42
+ def workers_working?
43
+ redis.scard(WORKER_LIST_SET).to_i > 0
44
+ end
45
+
46
+ def cleanup
47
+ redis.del(*keys)
48
+ redis.del(KEY_LIST_SET, WORKER_LIST_SET)
49
+ end
50
+
51
+ private
52
+
53
+ def redis
54
+ @redis ||=
55
+ ::Redis::Namespace.new(@job_id, redis: Rector.configuration.redis)
56
+ end
57
+
58
+ def keys
59
+ redis.smembers(KEY_LIST_SET)
60
+ end
61
+
62
+ def read(key)
63
+ case redis.type(key)
64
+ when "string"
65
+ redis.get(key).to_i
66
+ when "set"
67
+ Set.new(redis.smembers(key))
68
+ when "list"
69
+ redis.lrange(key, 0, -1)
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,5 @@
1
+ require "ostruct"
2
+
3
+ module Rector
4
+ Configuration = ::OpenStruct
5
+ end
data/lib/rector/job.rb ADDED
@@ -0,0 +1,54 @@
1
+ require "delegate"
2
+ require "securerandom"
3
+
4
+ module Rector
5
+ class Job
6
+ class WorkerCollection < SimpleDelegator
7
+ def initialize(job)
8
+ @job = job
9
+
10
+ # Wraps an array
11
+ super(Array.new)
12
+ end
13
+
14
+ def create
15
+ Rector::Worker.new(@job.allocate_worker_id).tap do |worker|
16
+ self << worker
17
+ end
18
+ end
19
+ end
20
+
21
+ attr_reader :workers
22
+
23
+ def initialize
24
+ @workers = WorkerCollection.new(self)
25
+ @backend = Rector.backend_for(id)
26
+ end
27
+
28
+ def id
29
+ # TODO: Obviously there's a small chance of jobs overlapping here
30
+ # Can do something more reliable for ID generation?
31
+ @id ||= SecureRandom.hex(10)
32
+ end
33
+
34
+ def allocate_worker_id
35
+ # TODO: Obviously there's a small chance of jobs overlapping here
36
+ # Can do something more reliable for ID generation?
37
+ "#{id}:#{SecureRandom.hex(8)}"
38
+ end
39
+
40
+ def join
41
+ while @backend.workers_working?
42
+ sleep 5
43
+ end
44
+ end
45
+
46
+ def data
47
+ @data ||= @backend.read_job_data_to_hash
48
+ end
49
+
50
+ def cleanup
51
+ @backend.cleanup
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,22 @@
1
+ module Rector
2
+ class Worker
3
+ attr_reader :id, :data
4
+
5
+ def initialize(id)
6
+ @id = id
7
+ @data = Hash.new
8
+
9
+ @backend = Rector.backend_for(job_id)
10
+ @backend.add_worker(@id)
11
+ end
12
+
13
+ def job_id
14
+ @id.split(":").first
15
+ end
16
+
17
+ def finish
18
+ @backend.update_job_data_from_hash(@data)
19
+ @backend.finish_worker(@id)
20
+ end
21
+ end
22
+ end
data/rector.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "rector"
6
+ s.version = "0.0.1"
7
+ s.authors = ["Andy Lindeman"]
8
+ s.email = ["alindeman@gmail.com"]
9
+ s.homepage = "https://github.com/alindeman/rector"
10
+ s.summary = %q{Rector coordinates parallelized jobs that generate metrics or other data together}
11
+ s.description = <<-EOF
12
+ Rector allows coordination of a number of jobs spawned with a mechanism
13
+ like Resque (though any job manager will do). If you are able to parallelize
14
+ the processing of a task, yet all these tasks are generating metrics,
15
+ statistics, or other data that need to be combined, Rector might be for you.
16
+ EOF
17
+
18
+ s.files = `git ls-files`.split("\n")
19
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
21
+ s.require_paths = ["lib"]
22
+
23
+ s.add_dependency "redis"
24
+ s.add_dependency "redis-namespace"
25
+
26
+ s.add_development_dependency "rake"
27
+ s.add_development_dependency "rspec", ">=2.8.0"
28
+ s.add_development_dependency "mocha", ">=0.10.0"
29
+ end
@@ -0,0 +1,108 @@
1
+ require "spec_helper"
2
+
3
+ describe Rector::Backends::Redis do
4
+ let(:redis) { stub_everything("redis") }
5
+
6
+ before do
7
+ def redis.multi
8
+ yield
9
+ end
10
+
11
+ Rector.configure do |c|
12
+ c.redis = redis
13
+ end
14
+ end
15
+
16
+ let(:job_id) { "abc123" }
17
+ subject { described_class.new(job_id) }
18
+
19
+ describe "writing" do
20
+ it "stores a list of keys" do
21
+ hsh = {
22
+ "foo" => 1,
23
+ "bar" => 2
24
+ }
25
+
26
+ redis.expects(:sadd).with("#{job_id}:__keys__", "foo", "bar")
27
+ subject.update_job_data_from_hash(hsh)
28
+ end
29
+
30
+ it "stores integers" do
31
+ hsh = { "foo" => 1 }
32
+
33
+ redis.expects(:incrby).with("#{job_id}:foo", 1)
34
+ subject.update_job_data_from_hash(hsh)
35
+ end
36
+
37
+ it "stores lists" do
38
+ hsh = { "foo" => ["a", "b", "c"] }
39
+
40
+ redis.expects(:rpush).with("#{job_id}:foo", "a", "b", "c")
41
+ subject.update_job_data_from_hash(hsh)
42
+ end
43
+
44
+ it "stores sets" do
45
+ hsh = { "foo" => Set.new(["a", "b", "c"]) }
46
+
47
+ redis.expects(:sadd).with("#{job_id}:foo", "a", "b", "c")
48
+ subject.update_job_data_from_hash(hsh)
49
+ end
50
+ end
51
+
52
+ describe "reading" do
53
+ it "reads integers" do
54
+ redis.stubs(:smembers).with("#{job_id}:__keys__").returns(["foo"])
55
+
56
+ redis.stubs(:type).with("#{job_id}:foo").returns("string")
57
+ redis.stubs(:get).with("#{job_id}:foo").returns("5")
58
+
59
+ subject.read_job_data_to_hash.should == { "foo" => 5 }
60
+ end
61
+
62
+ it "reads lists" do
63
+ redis.stubs(:smembers).with("#{job_id}:__keys__").returns(["foo"])
64
+
65
+ redis.stubs(:type).with("#{job_id}:foo").returns("list")
66
+ redis.stubs(:lrange).with("#{job_id}:foo", 0, -1).returns(["bar"])
67
+
68
+ subject.read_job_data_to_hash.should == { "foo" => ["bar"] }
69
+ end
70
+
71
+ it "reads sets" do
72
+ redis.stubs(:smembers).with("#{job_id}:__keys__").returns(["foo"])
73
+
74
+ redis.stubs(:type).with("#{job_id}:foo").returns("set")
75
+ redis.stubs(:smembers).with("#{job_id}:foo").returns(["bar"])
76
+
77
+ subject.read_job_data_to_hash.should == { "foo" => Set.new(["bar"]) }
78
+ end
79
+ end
80
+
81
+ describe "workers" do
82
+ it "adds a worker to a set" do
83
+ redis.expects(:sadd).with("#{job_id}:__workers__", "1234:5678")
84
+ subject.add_worker("1234:5678")
85
+ end
86
+
87
+ it "removes a worker from the set when it is finished" do
88
+ redis.expects(:srem).with("#{job_id}:__workers__", "1234:5678")
89
+ subject.finish_worker("1234:5678")
90
+ end
91
+
92
+ it "knows if workers are still working" do
93
+ redis.stubs(:scard).with("#{job_id}:__workers__").returns("1")
94
+ subject.workers_working?.should be_true
95
+
96
+ redis.stubs(:scard).with("#{job_id}:__workers__").returns("0")
97
+ subject.workers_working?.should be_false
98
+ end
99
+ end
100
+
101
+ it "cleans up when requests" do
102
+ redis.stubs(:smembers).returns(["a", "b"])
103
+ redis.expects(:del).with("#{job_id}:a", "#{job_id}:b")
104
+ redis.expects(:del).with("#{job_id}:__keys__", "#{job_id}:__workers__")
105
+
106
+ subject.cleanup
107
+ end
108
+ end
@@ -0,0 +1,9 @@
1
+ require "spec_helper"
2
+
3
+ describe Rector::Configuration do
4
+ it "accepts and stores arbitrary configuration items" do
5
+ subject.backend = :redis
6
+
7
+ subject.backend.should == :redis
8
+ end
9
+ end
@@ -0,0 +1,33 @@
1
+ require "spec_helper"
2
+ require "timeout"
3
+
4
+ describe Rector::Job do
5
+ let(:backend) { stub_everything("backend") }
6
+ before do
7
+ Rector.stubs(:backend_for).returns(backend)
8
+ end
9
+
10
+ it "constructs workers" do
11
+ worker = subject.workers.create
12
+ worker.should be_a(Rector::Worker)
13
+ end
14
+
15
+ it "waits for workers to complete" do
16
+ backend.expects(:workers_working?).at_least_once.returns(true).then.returns(false)
17
+ subject.stubs(:sleep)
18
+
19
+ Timeout.timeout(2) do
20
+ subject.join
21
+ end
22
+ end
23
+
24
+ it "loads data from the backend" do
25
+ backend.stubs(:read_job_data_to_hash).returns("foo" => "bar")
26
+ subject.data.should == { "foo" => "bar" }
27
+ end
28
+
29
+ it "delegates to the backend for cleanup" do
30
+ backend.expects(:cleanup)
31
+ subject.cleanup
32
+ end
33
+ end
@@ -0,0 +1,18 @@
1
+ require "spec_helper"
2
+
3
+ describe Rector do
4
+ it "allows configuration by yielding a block to #configure" do
5
+ described_class.configure do |c|
6
+ c.foo = :bar
7
+ end
8
+
9
+ described_class.configuration.foo.should == :bar
10
+ end
11
+
12
+ it "creates backend objects for jobs" do
13
+ backend = described_class.backend_for("abc123")
14
+
15
+ backend.should be_a(Rector::Backends::Redis)
16
+ backend.job_id.should == "abc123"
17
+ end
18
+ end
@@ -0,0 +1,38 @@
1
+ require "spec_helper"
2
+
3
+ describe Rector::Worker do
4
+ let(:worker_id) { "zyx987:abc123" }
5
+ subject { described_class.new(worker_id) }
6
+
7
+ let(:backend) { stub_everything("backend") }
8
+ before do
9
+ Rector.stubs(:backend_for).returns(backend)
10
+ end
11
+
12
+ it "is initialized with a worker ID" do
13
+ subject.id.should == worker_id
14
+ end
15
+
16
+ it "knows its job ID" do
17
+ subject.job_id.should == "zyx987"
18
+ end
19
+
20
+ it "notifies the backend of workers being created" do
21
+ backend.expects(:add_worker).with(worker_id)
22
+ subject
23
+ end
24
+
25
+ describe "#finish" do
26
+ it "notifies the backend" do
27
+ backend.expects(:finish_worker).with(worker_id)
28
+ subject.finish
29
+ end
30
+
31
+ it "saves data" do
32
+ subject.data["foo"] = "bar"
33
+
34
+ backend.expects(:update_job_data_from_hash).with("foo" => "bar")
35
+ subject.finish
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,12 @@
1
+ require "rspec"
2
+ require "mocha"
3
+
4
+ require_relative "../lib/rector"
5
+
6
+ RSpec.configure do |c|
7
+ c.mock_with :mocha
8
+
9
+ c.before do
10
+ Rector.reset
11
+ end
12
+ end
metadata ADDED
@@ -0,0 +1,134 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rector
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Andy Lindeman
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-02-06 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: redis
16
+ requirement: &70197558869260 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70197558869260
25
+ - !ruby/object:Gem::Dependency
26
+ name: redis-namespace
27
+ requirement: &70197558868820 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *70197558868820
36
+ - !ruby/object:Gem::Dependency
37
+ name: rake
38
+ requirement: &70197558868400 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *70197558868400
47
+ - !ruby/object:Gem::Dependency
48
+ name: rspec
49
+ requirement: &70197558867900 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 2.8.0
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *70197558867900
58
+ - !ruby/object:Gem::Dependency
59
+ name: mocha
60
+ requirement: &70197558867340 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: 0.10.0
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *70197558867340
69
+ description: ! " Rector allows coordination of a number of jobs spawned with a
70
+ mechanism\n like Resque (though any job manager will do). If you are able to
71
+ parallelize\n the processing of a task, yet all these tasks are generating metrics,\n
72
+ \ statistics, or other data that need to be combined, Rector might be for you.\n"
73
+ email:
74
+ - alindeman@gmail.com
75
+ executables: []
76
+ extensions: []
77
+ extra_rdoc_files: []
78
+ files:
79
+ - .gitignore
80
+ - .travis.yml
81
+ - Gemfile
82
+ - README.md
83
+ - Rakefile
84
+ - lib/rector.rb
85
+ - lib/rector/backends.rb
86
+ - lib/rector/backends/redis.rb
87
+ - lib/rector/configuration.rb
88
+ - lib/rector/job.rb
89
+ - lib/rector/worker.rb
90
+ - rector.gemspec
91
+ - spec/lib/backends/redis_spec.rb
92
+ - spec/lib/configuration_spec.rb
93
+ - spec/lib/job_spec.rb
94
+ - spec/lib/rector_spec.rb
95
+ - spec/lib/worker_spec.rb
96
+ - spec/spec_helper.rb
97
+ homepage: https://github.com/alindeman/rector
98
+ licenses: []
99
+ post_install_message:
100
+ rdoc_options: []
101
+ require_paths:
102
+ - lib
103
+ required_ruby_version: !ruby/object:Gem::Requirement
104
+ none: false
105
+ requirements:
106
+ - - ! '>='
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ segments:
110
+ - 0
111
+ hash: -3768330157712875762
112
+ required_rubygems_version: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ segments:
119
+ - 0
120
+ hash: -3768330157712875762
121
+ requirements: []
122
+ rubyforge_project:
123
+ rubygems_version: 1.8.14
124
+ signing_key:
125
+ specification_version: 3
126
+ summary: Rector coordinates parallelized jobs that generate metrics or other data
127
+ together
128
+ test_files:
129
+ - spec/lib/backends/redis_spec.rb
130
+ - spec/lib/configuration_spec.rb
131
+ - spec/lib/job_spec.rb
132
+ - spec/lib/rector_spec.rb
133
+ - spec/lib/worker_spec.rb
134
+ - spec/spec_helper.rb