cyclop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,7 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ docs/
6
+ *.log
7
+ *.yml
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
data/README.md ADDED
@@ -0,0 +1,88 @@
1
+ Cyclop
2
+ ======
3
+
4
+ Job queue with MongoDB with emphasis on never losing any task even if worker fails hard (segfault).
5
+
6
+ Dependencies
7
+ ------------
8
+
9
+ * Ruby >= 1.9.2
10
+ * gem "mongo", "~> 1.3.1"
11
+ * gem "posix-spawn", "~> 0.3.6"
12
+
13
+ Usage
14
+ -----
15
+
16
+ * Give Cyclop access to mongo:
17
+
18
+ Cyclop.db = Mongo::Connection.new["database_name"]
19
+
20
+ or with Replica-Sets
21
+
22
+ Cyclop.db = Mongo::ReplSetConnection.new["database_name"]
23
+
24
+ or if you're using MongoMapper:
25
+
26
+ Cyclop.db = MongoMapper.database
27
+
28
+ or if you're using Mongoid:
29
+
30
+ Cyclop.db = Mongoid.database
31
+
32
+ * Queue a new task:
33
+
34
+ Cyclop.push({
35
+ queue: :upload,
36
+ job_params: {
37
+ url: "http://example.com",
38
+ },
39
+ })
40
+
41
+ * Queue a new task to process in 5 minutes, to retry 3 times in case of error with a 1 minute delay between each:
42
+
43
+ Cyclop.push({
44
+ queue: :convert,
45
+ job_params: {
46
+ tmp_file: "/tmp/uploaded_file_32.png",
47
+ },
48
+ delay: 300,
49
+ retries: 3,
50
+ splay: 60,
51
+ })
52
+
53
+ * Get next job:
54
+
55
+ Cyclop.next
56
+
57
+ * Get next job on specific queues:
58
+
59
+ Cyclop.next :upload, :convert
60
+
61
+ * Get next job on specific queues for a specific host:
62
+
63
+ Cyclop.next :upload, :convert, host: "tartarus.local"
64
+
65
+ * Get failed jobs (limit to 30):
66
+
67
+ Cyclop.failed limit: 30
68
+
69
+ * Get failed jobs (skip first 10, limit to 30):
70
+
71
+ Cyclop.failed skip: 10, limit: 30
72
+
73
+ * Requeue a failed job:
74
+
75
+ job = Cyclop.failed.first
76
+ job.requeue!
77
+
78
+ * Start a worker:
79
+
80
+ cyclop -c config.yml
81
+
82
+ About
83
+ -----
84
+
85
+ License
86
+ -------
87
+
88
+ cyclop is Copyright © 2011 TalentBox SA. It is free software, and may be redistributed under the terms specified in the LICENSE file.
data/Rakefile ADDED
@@ -0,0 +1,59 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rake/clean'
5
+
6
+ # Original Author: Ryan Tomayko
7
+ # Copied from https://github.com/rtomayko/rocco/blob/master/Rakefile
8
+ begin
9
+ require 'rocco/tasks'
10
+ Rocco::make 'docs/'
11
+
12
+ desc "Build Cyclop Docs"
13
+ task :docs => :rocco
14
+
15
+ desc 'Build docs and open in browser for the reading'
16
+ task :read => :docs do
17
+ sh 'open docs/lib/cyclop.html'
18
+ end
19
+
20
+ # Make index.html meta redirect to lib/cyclop.html
21
+ file 'docs/index.html' do |f|
22
+ sh %Q{echo '<html><head><meta http-equiv="refresh" content="1;url=http://talentbox.github.com/cyclop/lib/cyclop.html"></head><body></body></html>' > docs/index.html}
23
+ end
24
+ task :docs => 'docs/index.html'
25
+ CLEAN.include 'docs/index.html'
26
+
27
+ # GITHUB PAGES ===============================================================
28
+ desc 'Update gh-pages branch'
29
+ task :pages => ['docs/.git', :docs] do
30
+ rev = `git rev-parse --short HEAD`.strip
31
+ Dir.chdir 'docs' do
32
+ sh "git add *.html"
33
+ sh "git add lib/*.html"
34
+ sh "git add lib/cyclop/*.html"
35
+ sh "git commit -m 'rebuild pages from #{rev}'" do |ok,res|
36
+ if ok
37
+ verbose { puts "gh-pages updated" }
38
+ sh "git push -q o HEAD:gh-pages"
39
+ end
40
+ end
41
+ end
42
+ end
43
+
44
+ # Update the pages/ directory clone
45
+ file 'docs/.git' => ['docs/', '.git/refs/heads/gh-pages'] do |f|
46
+ sh "cd docs && git init -q && git remote add o ../.git" if !File.exist?(f.name)
47
+ sh "cd docs && git fetch -q o && git reset -q --hard o/gh-pages && touch ."
48
+ end
49
+ CLOBBER.include 'docs/.git'
50
+ rescue LoadError
51
+ warn "#$! -- rocco tasks not loaded."
52
+ task :rocco
53
+ end
54
+
55
+ require 'rspec/core/rake_task'
56
+
57
+ RSpec::Core::RakeTask.new('spec')
58
+
59
+ task :default => :spec
data/bin/cyclop ADDED
@@ -0,0 +1,81 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "optparse"
4
+ require "cyclop"
5
+
6
+ options = {}
7
+ opts = OptionParser.new do |opts|
8
+ opts.banner = "Usage: cyclop [options]"
9
+ opts.on("-c", "--config CONFIG", "Read config from CONFIG file") do |c|
10
+ options[:config] = c
11
+ end
12
+ opts.on("-h", "--help", "Display more help") do |h|
13
+ options[:help] = h
14
+ end
15
+ opts.on("-V", "--version", "Display version") do |v|
16
+ puts Cyclop::VERSION
17
+ exit
18
+ end
19
+ end
20
+ opts.parse!
21
+
22
+ if options[:config] && !options[:help]
23
+ unless File.exists?(options[:config]) && File.readable?(options[:config])
24
+ puts "Config file '#{options[:config]}' do not exists or is not readable."
25
+ exit(1)
26
+ end
27
+
28
+ config = nil
29
+ begin
30
+ config = YAML::load File.read options[:config]
31
+ rescue ArgumentError
32
+ end
33
+ unless config
34
+ puts "Config file '#{options[:config]}' format is invalid."
35
+ exit(1)
36
+ end
37
+
38
+ begin
39
+ master = Cyclop::Worker.new config
40
+ rescue ArgumentError
41
+ puts $!.message
42
+ else
43
+ master.run
44
+ end
45
+ else
46
+ puts opts
47
+ if options[:help]
48
+ puts <<-EOS
49
+
50
+ Minimal valid YAML config file:
51
+
52
+ mongo:
53
+ database: "cyclop"
54
+
55
+ Example YAML config file with more options:
56
+
57
+ # Optional: only process jobs from the specified queues
58
+ queues: [
59
+ "email", "cache"
60
+ ]
61
+ # Load actions in this directory (default to ./actions)
62
+ actions: "/app/actions"
63
+ sleep_interval: 0.5 # in seconds
64
+ log_file: "/var/log/cyclop.log" # will log to STDOUT if missing
65
+ mongo:
66
+ database: "cyclop"
67
+ log: true # for debug only
68
+
69
+ # Without replica sets
70
+ host: "127.0.0.1" # will default to "127.0.0.1"
71
+ port: 27017 # will default to 27017
72
+
73
+ # Uncomment the following lines if you use a replica set
74
+ # hosts: [
75
+ # ["127.0.0.1", 27017], ["10.0.0.2", 27017]
76
+ # ]
77
+ # rs_name: "production"
78
+ # read_secondary: true
79
+ EOS
80
+ end
81
+ end
data/cyclop.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "cyclop/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "cyclop"
7
+ s.version = Cyclop::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Joseph HALTER", "Jonathan TRON"]
10
+ s.email = ["joseph.halter@thetalentbox.com", "jonathan.tron@thetalentbox.com"]
11
+ s.homepage = "https://github.com/TalentBox/cyclop"
12
+ s.summary = "Job queue with MongoDB"
13
+ s.description = "Job queue with MongoDB with emphasis on never losing any task even if worker fails hard (segfault)."
14
+
15
+ s.rubyforge_project = "cyclop"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ s.add_runtime_dependency("bson_ext", ["~> 1.3.1"])
23
+ s.add_runtime_dependency("mongo", ["~> 1.3.1"])
24
+ s.add_runtime_dependency("posix-spawn", ["~> 0.3.6"])
25
+
26
+ s.add_development_dependency("rake", ["~> 0.8.7"])
27
+ s.add_development_dependency("rspec", ["~> 2.6.0"])
28
+ s.add_development_dependency("rocco", ["~> 0.7"])
29
+ end
@@ -0,0 +1,34 @@
1
+ module Cyclop
2
+ class Action
3
+ @@actions = Set.new
4
+ def self.inherited(klass)
5
+ @@actions << klass
6
+ end
7
+
8
+ def self.find_by_queue(queue)
9
+ actions = @@actions.select{|action| action.queues.include? queue }
10
+ if @@actions.empty?
11
+ raise Cyclop::NoActionFound, "No action defined"
12
+ elsif actions.empty?
13
+ queues = @@actions.collect(&:queues).flatten.uniq.collect(&:inspect)
14
+ raise Cyclop::NoActionFound, "No action found for #{queue.inspect} queue. Valid queues: #{queues.join(", ")}"
15
+ elsif actions.size>1
16
+ raise Cyclop::ActionQueueClash, "\"#{queue}\" queue belongs to multiple actions: #{actions.collect{|a| a.name}.join(", ")}"
17
+ else
18
+ actions.first
19
+ end
20
+ end
21
+
22
+ def self.queues
23
+ []
24
+ end
25
+
26
+ def self.perform(*args)
27
+ raise NotImplementedError
28
+ end
29
+
30
+ def self.to_s
31
+ "#{name}: #{queues.inspect}"
32
+ end
33
+ end
34
+ end
data/lib/cyclop/job.rb ADDED
@@ -0,0 +1,211 @@
1
+ module Cyclop
2
+ class Job
3
+ # Unique identifier
4
+ attr_accessor :_id
5
+ # Queue name
6
+ attr_accessor :queue
7
+ # Parameters sent to `#perform`
8
+ attr_accessor :job_params
9
+ # Delay in seconds
10
+ attr_accessor :delay
11
+ # Time until we do start the job
12
+ attr_accessor :delayed_until
13
+ # Number of retries before being marked as failed
14
+ attr_accessor :retries
15
+ # Time in seconds between retry
16
+ attr_accessor :splay
17
+ # Host it's added under
18
+ attr_accessor :created_by
19
+ # Time it was created
20
+ attr_accessor :created_at
21
+ # Time it was last updated
22
+ attr_accessor :updated_at
23
+ # Worker unique identifier
24
+ attr_accessor :locked_by
25
+ # Time when worker started
26
+ attr_accessor :locked_at
27
+ # Mark as failed
28
+ attr_accessor :failed
29
+ # Number of attempts
30
+ attr_accessor :attempts
31
+ # Backtraces of unsuccessful attempts
32
+ attr_accessor :errors
33
+
34
+ def initialize(attrs={})
35
+ raise ArgumentError, ":queue is required" unless attrs["queue"] || attrs[:queue]
36
+ self.attributes = attrs
37
+ end
38
+
39
+ # Create a new job and save it to the queue specified in `opts[:queue]`
40
+ def self.create(opts={})
41
+ job = new opts
42
+ job.save
43
+ job
44
+ end
45
+
46
+ # Get the next job from any `opts[:queues]` and mark it as locked
47
+ def self.next(opts={})
48
+ raise ArgumentError, "locked_by is required" unless opts[:locked_by]
49
+
50
+ time_now = Time.now.utc
51
+
52
+ conditions = {query: {}}
53
+ # Not failed jobs only
54
+ conditions[:query][:failed] = false
55
+ # Only jobs generated by the specified host if present
56
+ conditions[:query][:created_by] = opts[:host] if opts[:host]
57
+ # Skip delayed jobs
58
+ conditions[:query][:delayed_until] = {"$lte" => time_now}
59
+ # Filter by queue if present
60
+ conditions[:query][:queue] = {"$in" => opts[:queues]} if opts[:queues] && !opts[:queues].empty?
61
+ # Skip locked jobs
62
+ conditions[:query]["$or"] = [{locked_at: {"$lte" => time_now - 1800}}, {locked_at: nil}]
63
+ # Last chance to skip dead jobs
64
+ conditions[:query]["$where"] = "this.attempts <= this.retries"
65
+
66
+ # Set `locked_by` with worker id and increment the number of attempts
67
+ conditions[:update] = {
68
+ "$set" => {
69
+ locked_by: opts[:locked_by],
70
+ locked_at: time_now,
71
+ },
72
+ "$inc" => {
73
+ attempts: 1
74
+ }
75
+ }
76
+
77
+ # Sort by `created_at`
78
+ conditions[:sort] = [:created_at, :asc]
79
+
80
+ # Returns the modified job
81
+ conditions[:new] = true
82
+
83
+ new collection.find_and_modify conditions
84
+ rescue Mongo::OperationFailure
85
+ nil
86
+ end
87
+
88
+ # Get failed jobs from any `opts[:queues]`
89
+ def self.failed(opts={})
90
+ selector = {}
91
+ # Failed or dead jobs only
92
+ selector["$or"] = [
93
+ {failed: true},
94
+ {"$where" => "this.attempts > this.retries"},
95
+ ]
96
+ # Filter by queue if present
97
+ selector[:queue] = {"$in" => opts[:queues]} if opts[:queues] && !opts[:queues].empty?
98
+
99
+ options = {}
100
+ options[:skip] = opts[:skip] if opts[:skip]
101
+ options[:limit] = opts[:limit] if opts[:limit]
102
+
103
+ collection.find(selector, options).collect{|attrs| new attrs}
104
+ end
105
+
106
+ def self.find(id)
107
+ if doc = collection.find_one(id)
108
+ new doc
109
+ end
110
+ end
111
+
112
+ # Save to queue
113
+ def save
114
+ self.updated_at = Time.now.utc
115
+ if persisted?
116
+ raise NotImplementedError
117
+ else
118
+ self.created_at = updated_at
119
+ self.delayed_until = ::Time.at(created_at.to_i + delay).utc
120
+ self._id = collection.insert attributes, safe: true
121
+ end
122
+ true
123
+ rescue Mongo::OperationFailure
124
+ false
125
+ end
126
+
127
+ def reload
128
+ self.attributes = collection.find_one _id
129
+ self
130
+ end
131
+
132
+ # If we have an id the object is persisted
133
+ def persisted?
134
+ !!_id
135
+ end
136
+
137
+ def ==(other)
138
+ other._id == _id
139
+ end
140
+
141
+ # Remove successfully processed job from the queue
142
+ def complete!
143
+ collection.remove _id: _id, locked_by: Cyclop.master_id
144
+ end
145
+
146
+ # Release job for further processing
147
+ def release!(exception = nil)
148
+ now = ::Time.at(Time.now.to_i).utc
149
+ selector = {_id: _id, locked_by: Cyclop.master_id}
150
+ set = if attempts<=retries
151
+ {locked_by: nil, locked_at: nil, delayed_until: now+splay}
152
+ else
153
+ {failed: true}
154
+ end
155
+ update = {"$set" => set}
156
+ update["$push"] = {
157
+ :errors => {
158
+ :locked_by => locked_by,
159
+ :locked_at => locked_at,
160
+ :class => exception.class.name,
161
+ :message => exception.message,
162
+ :backtrace => exception.backtrace,
163
+ :created_at => now,
164
+ },
165
+ } if exception
166
+ collection.update selector, update, :safe => true
167
+ end
168
+
169
+ private
170
+ def self.collection
171
+ @@collection ||= Cyclop.db ?
172
+ Cyclop.db["cyclop_jobs"] : raise(Cyclop::DatabaseNotAvailable)
173
+ end
174
+
175
+ def collection
176
+ self.class.collection
177
+ end
178
+
179
+ def attributes
180
+ {
181
+ queue: queue,
182
+ job_params: job_params,
183
+ delay: delay,
184
+ delayed_until: delayed_until,
185
+ retries: retries,
186
+ splay: splay,
187
+ created_by: created_by,
188
+ created_at: created_at,
189
+ updated_at: updated_at,
190
+ locked_by: locked_by,
191
+ locked_at: locked_at,
192
+ failed: failed,
193
+ attempts: attempts,
194
+ errors: errors,
195
+ }
196
+ end
197
+
198
+ def attributes=(attrs)
199
+ attrs.each do |key, value|
200
+ send "#{key}=", value
201
+ end
202
+ self.delay ||= 0
203
+ self.retries ||= 0
204
+ self.splay ||= 60
205
+ self.created_by ||= Cyclop.host
206
+ self.failed ||= false
207
+ self.attempts ||= 0
208
+ self.errors ||= []
209
+ end
210
+ end
211
+ end
@@ -0,0 +1,3 @@
1
+ module Cyclop
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,128 @@
1
+ module Cyclop
2
+ class Worker
3
+ # Queues to process
4
+ attr_accessor :queues
5
+ # Logger for master
6
+ attr_accessor :logger
7
+ # How much time to sleep between poll
8
+ attr_accessor :sleep_interval
9
+ # Path to actions directory
10
+ attr_accessor :actions
11
+
12
+ def initialize(config={})
13
+ raise ArgumentError, 'mongo["database"] is required' unless config["mongo"] && config["mongo"]["database"]
14
+
15
+ self.queues = config["queues"] || []
16
+ self.logger = Logger.new(config["log_file"] || $stdout)
17
+ self.sleep_interval = config["sleep_interval"] || 1
18
+ self.actions = config["actions"] || "./actions"
19
+ connection = if config["mongo"]["hosts"]
20
+ Mongo::ReplSetConnection.new(
21
+ *config["mongo"]["hosts"],
22
+ rs_name: config["mongo"]["rs_name"],
23
+ read_secondary: !!config["mongo"]["read_secondary"],
24
+ logger: (logger if config["mongo"]["log"]),
25
+ )
26
+ else
27
+ Mongo::Connection.new(
28
+ (config["mongo"]["host"] || "127.0.0.1"),
29
+ (config["mongo"]["port"] || 27017),
30
+ logger: (logger if config["mongo"]["log"]),
31
+ )
32
+ end
33
+ Cyclop.db = connection.db config["mongo"]["database"]
34
+ end
35
+
36
+ # Start processing jobs
37
+ def run
38
+ register_signal_handlers
39
+ loop do
40
+ if @stop
41
+ log "Shutting down..."
42
+ break
43
+ end
44
+ if job = next_job
45
+ @sleeping = false
46
+ if @pid = fork
47
+ msg = "Forked process #{@pid} to work on job #{job.queue}-#{job._id}..."
48
+ log msg
49
+ procline msg
50
+ Process.wait
51
+ log "Child process #{@pid} ended with status: #{$?}"
52
+ if $?.exitstatus==0
53
+ job.complete!
54
+ else
55
+ job.release!
56
+ end
57
+ else
58
+ procline "Processing #{job.queue}-#{job._id} (started at #{Time.now.utc})"
59
+ exit! perform job
60
+ end
61
+ else
62
+ log "No more job to process, start sleeping..." unless @sleeping
63
+ @sleeping = true
64
+ sleep sleep_interval
65
+ end
66
+ end
67
+ end
68
+
69
+ # Called inside forked process
70
+ #
71
+ # Parameters:
72
+ #
73
+ # * (Cyclop::Job) job - the job to process
74
+ #
75
+ def perform(job)
76
+ load_actions
77
+ Cyclop::Action.find_by_queue(job.queue).perform(*job.job_params)
78
+ 0
79
+ rescue Exception => e
80
+ log e.to_s
81
+ job.release! e
82
+ 1
83
+ end
84
+
85
+ # Gracefull shutdown
86
+ def stop
87
+ @stop = true
88
+ end
89
+
90
+ # Forced shutdown
91
+ def stop!
92
+ if @pid
93
+ Process.kill "TERM", @pid
94
+ Process.wait
95
+ end
96
+ exit!
97
+ end
98
+
99
+ private
100
+
101
+ # Trap signals
102
+ #
103
+ # QUIT - graceful shutdown
104
+ # INT - first gracefull shutdown, second time force shutdown
105
+ # TERM - force shutdown
106
+ def register_signal_handlers
107
+ trap("QUIT") { stop }
108
+ trap("INT") { @stop ? stop! : stop }
109
+ trap("TERM") { stop! }
110
+ end
111
+
112
+ def next_job
113
+ Cyclop.next *queues
114
+ end
115
+
116
+ def procline(line)
117
+ $0 = "cyclop-#{Cyclop::VERSION}: #{line}"
118
+ end
119
+
120
+ def log(message)
121
+ logger << "#{Time.now}: #{message}\n"
122
+ end
123
+
124
+ def load_actions
125
+ Dir["#{actions}/*.rb"].each{|action| require action}
126
+ end
127
+ end
128
+ end