cyclop 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,7 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ docs/
6
+ *.log
7
+ *.yml
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
data/README.md ADDED
@@ -0,0 +1,88 @@
1
+ Cyclop
2
+ ======
3
+
4
+ Job queue with MongoDB with emphasis on never losing any task even if worker fails hard (segfault).
5
+
6
+ Dependencies
7
+ ------------
8
+
9
+ * Ruby >= 1.9.2
10
+ * gem "mongo", "~> 1.3.1"
11
+ * gem "posix-spawn", "~> 0.3.6"
12
+
13
+ Usage
14
+ -----
15
+
16
+ * Give Cyclop access to mongo:
17
+
18
+ Cyclop.db = Mongo::Connection.new["database_name"]
19
+
20
+ or with Replica-Sets
21
+
22
+ Cyclop.db = Mongo::ReplSetConnection.new["database_name"]
23
+
24
+ or if you're using MongoMapper:
25
+
26
+ Cyclop.db = MongoMapper.database
27
+
28
+ or if you're using Mongoid:
29
+
30
+ Cyclop.db = Mongoid.database
31
+
32
+ * Queue a new task:
33
+
34
+ Cyclop.push({
35
+ queue: :upload,
36
+ job_params: {
37
+ url: "http://example.com",
38
+ },
39
+ })
40
+
41
+ * Queue a new task to process in 5 minutes, to retry 3 times in case of error with a 1 minute delay between each:
42
+
43
+ Cyclop.push({
44
+ queue: :convert,
45
+ job_params: {
46
+ tmp_file: "/tmp/uploaded_file_32.png",
47
+ },
48
+ delay: 300,
49
+ retries: 3,
50
+ splay: 60,
51
+ })
52
+
53
+ * Get next job:
54
+
55
+ Cyclop.next
56
+
57
+ * Get next job on specific queues:
58
+
59
+ Cyclop.next :upload, :convert
60
+
61
+ * Get next job on specific queues for a specific host:
62
+
63
+ Cyclop.next :upload, :convert, host: "tartarus.local"
64
+
65
+ * Get failed jobs (limit to 30):
66
+
67
+ Cyclop.failed limit: 30
68
+
69
+ * Get failed jobs (skip first 10, limit to 30):
70
+
71
+ Cyclop.failed skip: 10, limit: 30
72
+
73
+ * Requeue a failed job:
74
+
75
+ job = Cyclop.failed.first
76
+ job.requeue!
77
+
78
+ * Start a worker:
79
+
80
+ cyclop -c config.yml
81
+
82
+ About
83
+ -----
84
+
85
+ License
86
+ -------
87
+
88
+ cyclop is Copyright © 2011 TalentBox SA. It is free software, and may be redistributed under the terms specified in the LICENSE file.
data/Rakefile ADDED
@@ -0,0 +1,59 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rake/clean'
5
+
6
+ # Original Author: Ryan Tomayko
7
+ # Copied from https://github.com/rtomayko/rocco/blob/master/Rakefile
8
+ begin
9
+ require 'rocco/tasks'
10
+ Rocco::make 'docs/'
11
+
12
+ desc "Build Cyclop Docs"
13
+ task :docs => :rocco
14
+
15
+ desc 'Build docs and open in browser for the reading'
16
+ task :read => :docs do
17
+ sh 'open docs/lib/cyclop.html'
18
+ end
19
+
20
+ # Make index.html meta redirect to lib/cyclop.html
21
+ file 'docs/index.html' do |f|
22
+ sh %Q{echo '<html><head><meta http-equiv="refresh" content="1;url=http://talentbox.github.com/cyclop/lib/cyclop.html"></head><body></body></html>' > docs/index.html}
23
+ end
24
+ task :docs => 'docs/index.html'
25
+ CLEAN.include 'docs/index.html'
26
+
27
+ # GITHUB PAGES ===============================================================
28
+ desc 'Update gh-pages branch'
29
+ task :pages => ['docs/.git', :docs] do
30
+ rev = `git rev-parse --short HEAD`.strip
31
+ Dir.chdir 'docs' do
32
+ sh "git add *.html"
33
+ sh "git add lib/*.html"
34
+ sh "git add lib/cyclop/*.html"
35
+ sh "git commit -m 'rebuild pages from #{rev}'" do |ok,res|
36
+ if ok
37
+ verbose { puts "gh-pages updated" }
38
+ sh "git push -q o HEAD:gh-pages"
39
+ end
40
+ end
41
+ end
42
+ end
43
+
44
+ # Update the pages/ directory clone
45
+ file 'docs/.git' => ['docs/', '.git/refs/heads/gh-pages'] do |f|
46
+ sh "cd docs && git init -q && git remote add o ../.git" if !File.exist?(f.name)
47
+ sh "cd docs && git fetch -q o && git reset -q --hard o/gh-pages && touch ."
48
+ end
49
+ CLOBBER.include 'docs/.git'
50
+ rescue LoadError
51
+ warn "#$! -- rocco tasks not loaded."
52
+ task :rocco
53
+ end
54
+
55
+ require 'rspec/core/rake_task'
56
+
57
+ RSpec::Core::RakeTask.new('spec')
58
+
59
+ task :default => :spec
data/bin/cyclop ADDED
@@ -0,0 +1,81 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "optparse"
4
+ require "cyclop"
5
+
6
+ options = {}
7
+ opts = OptionParser.new do |opts|
8
+ opts.banner = "Usage: cyclop [options]"
9
+ opts.on("-c", "--config CONFIG", "Read config from CONFIG file") do |c|
10
+ options[:config] = c
11
+ end
12
+ opts.on("-h", "--help", "Display more help") do |h|
13
+ options[:help] = h
14
+ end
15
+ opts.on("-V", "--version", "Display version") do |v|
16
+ puts Cyclop::VERSION
17
+ exit
18
+ end
19
+ end
20
+ opts.parse!
21
+
22
+ if options[:config] && !options[:help]
23
+ unless File.exists?(options[:config]) && File.readable?(options[:config])
24
+ puts "Config file '#{options[:config]}' do not exists or is not readable."
25
+ exit(1)
26
+ end
27
+
28
+ config = nil
29
+ begin
30
+ config = YAML::load File.read options[:config]
31
+ rescue ArgumentError
32
+ end
33
+ unless config
34
+ puts "Config file '#{options[:config]}' format is invalid."
35
+ exit(1)
36
+ end
37
+
38
+ begin
39
+ master = Cyclop::Worker.new config
40
+ rescue ArgumentError
41
+ puts $!.message
42
+ else
43
+ master.run
44
+ end
45
+ else
46
+ puts opts
47
+ if options[:help]
48
+ puts <<-EOS
49
+
50
+ Minimal valid YAML config file:
51
+
52
+ mongo:
53
+ database: "cyclop"
54
+
55
+ Example YAML config file with more options:
56
+
57
+ # Optional: only process jobs from the specified queues
58
+ queues: [
59
+ "email", "cache"
60
+ ]
61
+ # Load actions in this directory (default to ./actions)
62
+ actions: "/app/actions"
63
+ sleep_interval: 0.5 # in seconds
64
+ log_file: "/var/log/cyclop.log" # will log to STDOUT if missing
65
+ mongo:
66
+ database: "cyclop"
67
+ log: true # for debug only
68
+
69
+ # Without replica sets
70
+ host: "127.0.0.1" # will default to "127.0.0.1"
71
+ port: 27017 # will default to 27017
72
+
73
+ # Uncomment the following lines if you use a replica set
74
+ # hosts: [
75
+ # ["127.0.0.1", 27017], ["10.0.0.2", 27017]
76
+ # ]
77
+ # rs_name: "production"
78
+ # read_secondary: true
79
+ EOS
80
+ end
81
+ end
data/cyclop.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "cyclop/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "cyclop"
7
+ s.version = Cyclop::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Joseph HALTER", "Jonathan TRON"]
10
+ s.email = ["joseph.halter@thetalentbox.com", "jonathan.tron@thetalentbox.com"]
11
+ s.homepage = "https://github.com/TalentBox/cyclop"
12
+ s.summary = "Job queue with MongoDB"
13
+ s.description = "Job queue with MongoDB with emphasis on never losing any task even if worker fails hard (segfault)."
14
+
15
+ s.rubyforge_project = "cyclop"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ s.add_runtime_dependency("bson_ext", ["~> 1.3.1"])
23
+ s.add_runtime_dependency("mongo", ["~> 1.3.1"])
24
+ s.add_runtime_dependency("posix-spawn", ["~> 0.3.6"])
25
+
26
+ s.add_development_dependency("rake", ["~> 0.8.7"])
27
+ s.add_development_dependency("rspec", ["~> 2.6.0"])
28
+ s.add_development_dependency("rocco", ["~> 0.7"])
29
+ end
@@ -0,0 +1,34 @@
1
+ module Cyclop
2
+ class Action
3
+ @@actions = Set.new
4
+ def self.inherited(klass)
5
+ @@actions << klass
6
+ end
7
+
8
+ def self.find_by_queue(queue)
9
+ actions = @@actions.select{|action| action.queues.include? queue }
10
+ if @@actions.empty?
11
+ raise Cyclop::NoActionFound, "No action defined"
12
+ elsif actions.empty?
13
+ queues = @@actions.collect(&:queues).flatten.uniq.collect(&:inspect)
14
+ raise Cyclop::NoActionFound, "No action found for #{queue.inspect} queue. Valid queues: #{queues.join(", ")}"
15
+ elsif actions.size>1
16
+ raise Cyclop::ActionQueueClash, "\"#{queue}\" queue belongs to multiple actions: #{actions.collect{|a| a.name}.join(", ")}"
17
+ else
18
+ actions.first
19
+ end
20
+ end
21
+
22
+ def self.queues
23
+ []
24
+ end
25
+
26
+ def self.perform(*args)
27
+ raise NotImplementedError
28
+ end
29
+
30
+ def self.to_s
31
+ "#{name}: #{queues.inspect}"
32
+ end
33
+ end
34
+ end
data/lib/cyclop/job.rb ADDED
@@ -0,0 +1,211 @@
1
+ module Cyclop
2
+ class Job
3
+ # Unique identifier
4
+ attr_accessor :_id
5
+ # Queue name
6
+ attr_accessor :queue
7
+ # Parameters sent to `#perform`
8
+ attr_accessor :job_params
9
+ # Delay in seconds
10
+ attr_accessor :delay
11
+ # Time until we do start the job
12
+ attr_accessor :delayed_until
13
+ # Number of retries before being marked as failed
14
+ attr_accessor :retries
15
+ # Time in seconds between retry
16
+ attr_accessor :splay
17
+ # Host it's added under
18
+ attr_accessor :created_by
19
+ # Time it was created
20
+ attr_accessor :created_at
21
+ # Time it was last updated
22
+ attr_accessor :updated_at
23
+ # Worker unique identifier
24
+ attr_accessor :locked_by
25
+ # Time when worker started
26
+ attr_accessor :locked_at
27
+ # Mark as failed
28
+ attr_accessor :failed
29
+ # Number of attempts
30
+ attr_accessor :attempts
31
+ # Backtraces of unsuccessful attempts
32
+ attr_accessor :errors
33
+
34
+ def initialize(attrs={})
35
+ raise ArgumentError, ":queue is required" unless attrs["queue"] || attrs[:queue]
36
+ self.attributes = attrs
37
+ end
38
+
39
+ # Create a new job and save it to the queue specified in `opts[:queue]`
40
+ def self.create(opts={})
41
+ job = new opts
42
+ job.save
43
+ job
44
+ end
45
+
46
+ # Get the next job from any `opts[:queues]` and mark it as locked
47
+ def self.next(opts={})
48
+ raise ArgumentError, "locked_by is required" unless opts[:locked_by]
49
+
50
+ time_now = Time.now.utc
51
+
52
+ conditions = {query: {}}
53
+ # Not failed jobs only
54
+ conditions[:query][:failed] = false
55
+ # Only jobs generated by the specified host if present
56
+ conditions[:query][:created_by] = opts[:host] if opts[:host]
57
+ # Skip delayed jobs
58
+ conditions[:query][:delayed_until] = {"$lte" => time_now}
59
+ # Filter by queue if present
60
+ conditions[:query][:queue] = {"$in" => opts[:queues]} if opts[:queues] && !opts[:queues].empty?
61
+ # Skip locked jobs
62
+ conditions[:query]["$or"] = [{locked_at: {"$lte" => time_now - 1800}}, {locked_at: nil}]
63
+ # Last chance to skip dead jobs
64
+ conditions[:query]["$where"] = "this.attempts <= this.retries"
65
+
66
+ # Set `locked_by` with worker id and increment the number of attempts
67
+ conditions[:update] = {
68
+ "$set" => {
69
+ locked_by: opts[:locked_by],
70
+ locked_at: time_now,
71
+ },
72
+ "$inc" => {
73
+ attempts: 1
74
+ }
75
+ }
76
+
77
+ # Sort by `created_at`
78
+ conditions[:sort] = [:created_at, :asc]
79
+
80
+ # Returns the modified job
81
+ conditions[:new] = true
82
+
83
+ new collection.find_and_modify conditions
84
+ rescue Mongo::OperationFailure
85
+ nil
86
+ end
87
+
88
+ # Get failed jobs from any `opts[:queues]`
89
+ def self.failed(opts={})
90
+ selector = {}
91
+ # Failed or dead jobs only
92
+ selector["$or"] = [
93
+ {failed: true},
94
+ {"$where" => "this.attempts > this.retries"},
95
+ ]
96
+ # Filter by queue if present
97
+ selector[:queue] = {"$in" => opts[:queues]} if opts[:queues] && !opts[:queues].empty?
98
+
99
+ options = {}
100
+ options[:skip] = opts[:skip] if opts[:skip]
101
+ options[:limit] = opts[:limit] if opts[:limit]
102
+
103
+ collection.find(selector, options).collect{|attrs| new attrs}
104
+ end
105
+
106
+ def self.find(id)
107
+ if doc = collection.find_one(id)
108
+ new doc
109
+ end
110
+ end
111
+
112
+ # Save to queue
113
+ def save
114
+ self.updated_at = Time.now.utc
115
+ if persisted?
116
+ raise NotImplementedError
117
+ else
118
+ self.created_at = updated_at
119
+ self.delayed_until = ::Time.at(created_at.to_i + delay).utc
120
+ self._id = collection.insert attributes, safe: true
121
+ end
122
+ true
123
+ rescue Mongo::OperationFailure
124
+ false
125
+ end
126
+
127
+ def reload
128
+ self.attributes = collection.find_one _id
129
+ self
130
+ end
131
+
132
+ # If we have an id the object is persisted
133
+ def persisted?
134
+ !!_id
135
+ end
136
+
137
+ def ==(other)
138
+ other._id == _id
139
+ end
140
+
141
+ # Remove successfully processed job from the queue
142
+ def complete!
143
+ collection.remove _id: _id, locked_by: Cyclop.master_id
144
+ end
145
+
146
+ # Release job for further processing
147
+ def release!(exception = nil)
148
+ now = ::Time.at(Time.now.to_i).utc
149
+ selector = {_id: _id, locked_by: Cyclop.master_id}
150
+ set = if attempts<=retries
151
+ {locked_by: nil, locked_at: nil, delayed_until: now+splay}
152
+ else
153
+ {failed: true}
154
+ end
155
+ update = {"$set" => set}
156
+ update["$push"] = {
157
+ :errors => {
158
+ :locked_by => locked_by,
159
+ :locked_at => locked_at,
160
+ :class => exception.class.name,
161
+ :message => exception.message,
162
+ :backtrace => exception.backtrace,
163
+ :created_at => now,
164
+ },
165
+ } if exception
166
+ collection.update selector, update, :safe => true
167
+ end
168
+
169
+ private
170
+ def self.collection
171
+ @@collection ||= Cyclop.db ?
172
+ Cyclop.db["cyclop_jobs"] : raise(Cyclop::DatabaseNotAvailable)
173
+ end
174
+
175
+ def collection
176
+ self.class.collection
177
+ end
178
+
179
+ def attributes
180
+ {
181
+ queue: queue,
182
+ job_params: job_params,
183
+ delay: delay,
184
+ delayed_until: delayed_until,
185
+ retries: retries,
186
+ splay: splay,
187
+ created_by: created_by,
188
+ created_at: created_at,
189
+ updated_at: updated_at,
190
+ locked_by: locked_by,
191
+ locked_at: locked_at,
192
+ failed: failed,
193
+ attempts: attempts,
194
+ errors: errors,
195
+ }
196
+ end
197
+
198
+ def attributes=(attrs)
199
+ attrs.each do |key, value|
200
+ send "#{key}=", value
201
+ end
202
+ self.delay ||= 0
203
+ self.retries ||= 0
204
+ self.splay ||= 60
205
+ self.created_by ||= Cyclop.host
206
+ self.failed ||= false
207
+ self.attempts ||= 0
208
+ self.errors ||= []
209
+ end
210
+ end
211
+ end
@@ -0,0 +1,3 @@
1
+ module Cyclop
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,128 @@
1
+ module Cyclop
2
+ class Worker
3
+ # Queues to process
4
+ attr_accessor :queues
5
+ # Logger for master
6
+ attr_accessor :logger
7
+ # How much time to sleep between poll
8
+ attr_accessor :sleep_interval
9
+ # Path to actions directory
10
+ attr_accessor :actions
11
+
12
+ def initialize(config={})
13
+ raise ArgumentError, 'mongo["database"] is required' unless config["mongo"] && config["mongo"]["database"]
14
+
15
+ self.queues = config["queues"] || []
16
+ self.logger = Logger.new(config["log_file"] || $stdout)
17
+ self.sleep_interval = config["sleep_interval"] || 1
18
+ self.actions = config["actions"] || "./actions"
19
+ connection = if config["mongo"]["hosts"]
20
+ Mongo::ReplSetConnection.new(
21
+ *config["mongo"]["hosts"],
22
+ rs_name: config["mongo"]["rs_name"],
23
+ read_secondary: !!config["mongo"]["read_secondary"],
24
+ logger: (logger if config["mongo"]["log"]),
25
+ )
26
+ else
27
+ Mongo::Connection.new(
28
+ (config["mongo"]["host"] || "127.0.0.1"),
29
+ (config["mongo"]["port"] || 27017),
30
+ logger: (logger if config["mongo"]["log"]),
31
+ )
32
+ end
33
+ Cyclop.db = connection.db config["mongo"]["database"]
34
+ end
35
+
36
+ # Start processing jobs
37
+ def run
38
+ register_signal_handlers
39
+ loop do
40
+ if @stop
41
+ log "Shutting down..."
42
+ break
43
+ end
44
+ if job = next_job
45
+ @sleeping = false
46
+ if @pid = fork
47
+ msg = "Forked process #{@pid} to work on job #{job.queue}-#{job._id}..."
48
+ log msg
49
+ procline msg
50
+ Process.wait
51
+ log "Child process #{@pid} ended with status: #{$?}"
52
+ if $?.exitstatus==0
53
+ job.complete!
54
+ else
55
+ job.release!
56
+ end
57
+ else
58
+ procline "Processing #{job.queue}-#{job._id} (started at #{Time.now.utc})"
59
+ exit! perform job
60
+ end
61
+ else
62
+ log "No more job to process, start sleeping..." unless @sleeping
63
+ @sleeping = true
64
+ sleep sleep_interval
65
+ end
66
+ end
67
+ end
68
+
69
+ # Called inside forked process
70
+ #
71
+ # Parameters:
72
+ #
73
+ # * (Cyclop::Job) job - the job to process
74
+ #
75
+ def perform(job)
76
+ load_actions
77
+ Cyclop::Action.find_by_queue(job.queue).perform(*job.job_params)
78
+ 0
79
+ rescue Exception => e
80
+ log e.to_s
81
+ job.release! e
82
+ 1
83
+ end
84
+
85
+ # Gracefull shutdown
86
+ def stop
87
+ @stop = true
88
+ end
89
+
90
+ # Forced shutdown
91
+ def stop!
92
+ if @pid
93
+ Process.kill "TERM", @pid
94
+ Process.wait
95
+ end
96
+ exit!
97
+ end
98
+
99
+ private
100
+
101
+ # Trap signals
102
+ #
103
+ # QUIT - graceful shutdown
104
+ # INT - first gracefull shutdown, second time force shutdown
105
+ # TERM - force shutdown
106
+ def register_signal_handlers
107
+ trap("QUIT") { stop }
108
+ trap("INT") { @stop ? stop! : stop }
109
+ trap("TERM") { stop! }
110
+ end
111
+
112
+ def next_job
113
+ Cyclop.next *queues
114
+ end
115
+
116
+ def procline(line)
117
+ $0 = "cyclop-#{Cyclop::VERSION}: #{line}"
118
+ end
119
+
120
+ def log(message)
121
+ logger << "#{Time.now}: #{message}\n"
122
+ end
123
+
124
+ def load_actions
125
+ Dir["#{actions}/*.rb"].each{|action| require action}
126
+ end
127
+ end
128
+ end