mongojob 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ module MongoJob
2
+ module Model
3
+ class Queue
4
+ include MongoMapper::Document
5
+ extend MongoJob::Mixins::Document
6
+
7
+
8
+ key :_id, String # name of the queue
9
+
10
+ timestamps!
11
+
12
+ many :jobs, foreign_key: 'queue_name', class_name: 'MongoJob::Model::Job'
13
+
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,35 @@
1
+ module MongoJob
2
+ module Model
3
+ class Worker
4
+ include MongoMapper::Document
5
+ extend MongoJob::Mixins::Document
6
+
7
+ key :_id, String # usually of format ip_address:pid
8
+ key :hostname, String
9
+ key :ip, String
10
+
11
+ key :queues, Array
12
+ key :status, String
13
+
14
+ key :custom_status
15
+
16
+ key :pinged_at
17
+
18
+ # Can contain keys: done, failed with number of jobs
19
+ key :stats, Hash
20
+
21
+ timestamps!
22
+
23
+ many :jobs, class_name: 'MongoJob::Model::Job', foreign_key: :worker_id
24
+
25
+ def self.tick id, data
26
+ model_worker = Model::Worker.find id
27
+ model_worker ||= Model::Worker.create({
28
+ id: id
29
+ })
30
+ model_worker.set data
31
+ end
32
+
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,3 @@
1
+ module MongoJob
2
+ Version = '0.0.1'
3
+ end
@@ -0,0 +1,80 @@
1
+ require "sinatra"
2
+ require "haml"
3
+ require "sass"
4
+ require "json"
5
+
6
+ require "mongojob"
7
+
8
+ require "mongojob/web/helpers"
9
+
10
+ module MongoJob
11
+ class Web < Sinatra::Base
12
+
13
+ helpers Sinatra::Partials
14
+ helpers WebHelpers
15
+
16
+ configure do
17
+ set :raise_errors, Proc.new { test? }
18
+ set :show_exceptions, Proc.new { development? }
19
+ set :dump_errors, true
20
+ # set :sessions, true
21
+ # set :logging, false #Proc.new { ! test? }
22
+ set :methodoverride, true
23
+ set :static, true
24
+ set :public, MJ_ROOT + '/web/public'
25
+ set :views, MJ_ROOT + '/web/views'
26
+ set :root, MJ_ROOT
27
+
28
+ # set :logging, false
29
+ # LOGGER = Logger.new(::File.join(APP_ROOT, 'log/access.log'))
30
+ # use Rack::CommonLogger, LOGGER
31
+ end
32
+
33
+ configure :development do
34
+ use Rack::Reloader
35
+ end
36
+
37
+ before do
38
+ @config = {
39
+ host: MongoJob.host,
40
+ database_name: MongoJob.database_name
41
+ }
42
+ end
43
+
44
+ get "/style/:style.css" do
45
+ headers 'Content-Type' => 'text/css; charset=utf-8'
46
+ sass :"style/#{params[:style]}"
47
+ end
48
+
49
+ get "/" do
50
+ @queues = Model::Queue.all
51
+ @workers = Model::Worker.all
52
+ # TODO: Make some overview
53
+ haml :index
54
+ end
55
+
56
+ # Queue detailed information
57
+ get "/queue/:id" do
58
+ @queue = Model::Queue.find params[:id]
59
+ @jobs = @queue.jobs.all status: (params['job_status'] || 'queued')
60
+ haml :queue
61
+ end
62
+
63
+ get "/worker/:id" do
64
+ @worker = Model::Worker.find params[:id]
65
+ haml :worker
66
+ end
67
+
68
+ get "/job/:id" do
69
+ @job = Model::Job.find params[:id]
70
+ haml :job
71
+ end
72
+
73
+ delete "/job/:id" do
74
+ @job = Model::Job.find params[:id]
75
+ MongoJob.dequeue(@job.id)
76
+ end
77
+
78
+
79
+ end
80
+ end
@@ -0,0 +1,46 @@
1
+ module MongoJob::WebHelpers
2
+ def versioned_css(stylesheet)
3
+ # Check for css and sass files
4
+ css_file = File.join(MongoJob::Web.public,"style", "#{stylesheet}.css")
5
+ sass_file = File.join(MongoJob::Web.views,"style", "#{stylesheet}.sass")
6
+
7
+ if File.exists? css_file
8
+ mtime = File.mtime(css_file).to_i.to_s
9
+ else
10
+ if File.exists? sass_file
11
+ mtime = File.mtime(sass_file).to_i.to_s
12
+ end
13
+ end
14
+ mime ||= '0'
15
+ "/style/#{stylesheet}.css?" + mtime
16
+ end
17
+ def versioned_js(js)
18
+ "/script/#{js}.js?" + File.mtime(File.join(MongoJob::Web.public, "script", "#{js}.js")).to_i.to_s
19
+ end
20
+
21
+ def versioned_resource(resource)
22
+ "/#{resource}?" + File.mtime(File.join(MongoJob::Web.public, resource)).to_i.to_s
23
+ end
24
+
25
+ def request_uri
26
+ request.env["REQUEST_URI"]
27
+ end
28
+ end
29
+
30
+ # Copied and adapted to HAML from http://gist.github.com/119874 - thanks!
31
+ module Sinatra::Partials
32
+ def partial(template, *args)
33
+ template_array = template.to_s.split('/')
34
+ template = template_array[0..-2].join('/') + "/_#{template_array[-1]}"
35
+ options = args.last.is_a?(Hash) ? args.pop : {}
36
+ options.merge!(:layout => false)
37
+ if collection = options.delete(:collection) then
38
+ collection.inject([]) do |buffer, member|
39
+ buffer << haml(:"#{template}", options.merge(:layout =>
40
+ false, :locals => {template_array[-1].to_sym => member}))
41
+ end.join("\n")
42
+ else
43
+ haml(:"#{template}", options)
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,370 @@
1
+ require "eventmachine"
2
+ require "fiber"
3
+
4
+ module MongoJob
5
+
6
+ module ProcessWatcher
7
+ def process_exited
8
+ put 'the forked child died!'
9
+ end
10
+ end
11
+
12
+
13
+ class Worker
14
+
15
+ extend Mixins::FiberRunner::ClassMethods
16
+ include Mixins::FiberRunner::InstanceMethods
17
+
18
+ extend Helpers
19
+ include Helpers
20
+
21
+ task :tick, 3
22
+ task :work_job, 1
23
+ task :monitor_jobs, 3
24
+
25
+
26
+ attr_accessor :current_jobs
27
+ attr_accessor :log
28
+
29
+ def self.default_options
30
+ @default_options ||= {
31
+ max_jobs: 1,
32
+ log: STDOUT,
33
+ loglevel: Logger::DEBUG
34
+ }
35
+ end
36
+
37
+ # Workers should be initialized with an array of string queue
38
+ # names. The order is important: a Worker will check the first
39
+ # queue given for a job. If none is found, it will check the
40
+ # second queue name given. If a job is found, it will be
41
+ # processed. Upon completion, the Worker will again check the
42
+ # first queue given, and so forth. In this way the queue list
43
+ # passed to a Worker on startup defines the priorities of queues.
44
+ #
45
+ # If passed a single "*", this Worker will operate on all queues
46
+ # in alphabetical order. Queues can be dynamically added or
47
+ # removed without needing to restart workers using this method.
48
+ def initialize(*queues)
49
+ options = {}
50
+ options = queues.pop if queues.last.is_a?(Hash)
51
+ options = self.class.default_options.merge(options)
52
+ queues = options[:queues] if (queues.nil? || queues.empty?)
53
+ raise "No queues provided" if (queues.nil? || queues.empty?)
54
+ @id = options[:id]
55
+ @queues = queues
56
+ @max_jobs = options[:max_jobs]
57
+ @current_jobs = []
58
+ @job_pids = {}
59
+
60
+ # Initialize logger
61
+ @log = ::Logger.new options[:log]
62
+ @log.formatter = Logger::Formatter.new
63
+ @log.level = options[:loglevel]
64
+ $log = log
65
+ end
66
+
67
+ # chomp'd hostname of this machine
68
+ def hostname
69
+ @hostname ||= `hostname`.strip
70
+ end
71
+
72
+ def id
73
+ @id ||= "#{hostname}:#{Process.pid}"
74
+ end
75
+
76
+ # Runs the worker
77
+ def run
78
+ log.info "Starting worker"
79
+ register_signal_handlers
80
+ EM.run do
81
+ run_defined_tasks
82
+ end
83
+ end
84
+
85
+ # Contains the working cycle:
86
+ # 0. Maintanance stuff
87
+ # 1. Get a job
88
+ # 2. Run a job
89
+ def work_job
90
+
91
+ # MAINTENANCE
92
+
93
+ # Are we shutting down?
94
+ if @shutdown
95
+ Kernel.exit!(0) if @current_jobs.size == 0
96
+ end
97
+
98
+ # PROCESSING JOBS
99
+
100
+ # Get a job
101
+ job = get_new_job
102
+ return unless job
103
+ log.info "Got a new job #{job.id}"
104
+
105
+ if job.job_class.fork?
106
+ # Job that requires a fork, perfect for long-running stuff.
107
+ log.debug "Forking the process for job #{job.id}"
108
+ pid = fork do
109
+ process_job job
110
+ end
111
+ @job_pids[job.id] = pid
112
+ # TODO: We need to store which PID corresponds to this job
113
+ elsif job.job_class.fiber?
114
+ # A job that requires a separate fiber.
115
+ log.debug "Creating a new fiber for job #{job.id}"
116
+ Fiber.new do
117
+ process_job job
118
+ finish_job job
119
+ end.resume
120
+ else
121
+ # Old-school, blocking job
122
+ log.debug "Running job #{job.id} in the blocking mode"
123
+ process_job job
124
+ finish_job job
125
+ end
126
+ end
127
+
128
+ def get_new_job
129
+ return if @current_jobs.size >= @max_jobs
130
+ job = nil
131
+ @queues.find do |queue|
132
+ job = MongoJob.reserve(queue, self.id)
133
+ end
134
+ @current_jobs << job.id if job
135
+ job
136
+ end
137
+
138
+ # Processes the job, in the child process if forking.
139
+ def process_job job
140
+ begin
141
+ log.info "Performing job #{job.id}"
142
+ jo = job.job_object
143
+ jo.log = log
144
+ jo.perform
145
+ log.info "Job #{job.id} completed"
146
+ job.complete
147
+ Model::Worker.increment(id, {:'stats.done' => 1})
148
+ rescue Exception => e
149
+ log.info "Job #{job.id} failed"
150
+ log.info e
151
+ job.fail e
152
+ Model::Worker.increment(id, {:'stats.failed' => 1})
153
+ p e
154
+ end
155
+ end
156
+
157
+ # Removes job from the internal stack
158
+ def finish_job job
159
+ job_id = job.respond_to?(:id) ? job.id : job
160
+ @current_jobs.delete job_id
161
+ @job_pids.delete(job_id)
162
+ end
163
+
164
+ # Mark job as failed
165
+ def fail_job job, error
166
+ job.fail error
167
+ end
168
+
169
+ # Forks a process and runs the code passed in the block in the new process
170
+ def fork &blk
171
+ pid = Process.fork do
172
+ if EM.reactor_running?
173
+ # Need to clear EM reactor
174
+ EM.stop_event_loop
175
+ EM.release_machine
176
+ EM.instance_variable_set( '@reactor_running', false )
177
+ end
178
+ # TODO: Should we rescue exceptions from the block call?
179
+ blk.call
180
+ Process.exit!(0)
181
+ end
182
+ # Detach the process. We are not using Process.wait.
183
+ # Process.detach pid
184
+ pid
185
+ end
186
+
187
+ # Monitors jobs and pings storage if they are alive.
188
+ # Currently it monitors only forked processes
189
+ def monitor_jobs
190
+ @job_pids.each do |job_id, pid|
191
+ # Check if alive
192
+ line = `ps -www -o rss,state -p #{pid}`.split("\n")[1]
193
+ rss = state = nil
194
+ running = true
195
+ if line
196
+ rss, state = line.split ' '
197
+ log.debug "Process #{pid} for job #{job_id} in state #{state}, uses #{rss}k mem"
198
+ else
199
+ # Missing process, which means something went very wrong.
200
+ # TODO: report it!
201
+ log.debug "Process #{pid} for job #{job_id} is missing!"
202
+ running = false
203
+ end
204
+
205
+ # Now check if finished, which means it will be in Z (zombie) status
206
+ # TODO: should we use EventMachine#watch_process ?
207
+ if state =~ /Z/
208
+ # Process completed, collect information
209
+ pid, status = Process.wait2 pid
210
+ log.debug "Process #{pid} for job #{job_id} exited with status #{status.exitstatus}"
211
+ running = false
212
+ end
213
+
214
+ job = MongoJob.find_job job_id
215
+
216
+ if running
217
+ # Still running, so ping database
218
+ # One more thing to check - if the job does not exist, we are killing the process.
219
+ if job
220
+ job.ping
221
+ else
222
+ log.info "Job #{job_id} for process #{pid} is missing, killing"
223
+ Process.kill 'KILL', pid
224
+ end
225
+ else
226
+ # Process not running
227
+ # Check the status of the job - if it is still marked as "working", we should set its
228
+ # status to "failed"
229
+ if job && job.status == 'working'
230
+ job.fail "Process missing."
231
+ end
232
+ # For sure we are not working on it anymore, so remove from the stack
233
+ finish_job job_id
234
+ end
235
+
236
+ end
237
+ end
238
+
239
+ # Periodically send pings so that we know that the worker is alive.
240
+ # The method also checks stored worker status and shuts down the worker
241
+ # if the stored status indicates failure or timeout.
242
+ def tick
243
+ worker = Model::Worker.find id
244
+
245
+ # Shut down if there is no worker status stored
246
+ # shutdown! unless worker
247
+
248
+ # Shut down if worker status is different than 'ok'
249
+ # shutdown! unless worker.status == 'ok'
250
+
251
+ data = tick_data.merge({
252
+ pinged_at: Time.now,
253
+ status: 'ok',
254
+ queues: @queues
255
+ })
256
+ Model::Worker.tick id, data
257
+ end
258
+
259
+ # Prepares data to be send alongside with the tick.
260
+ def tick_data
261
+ {
262
+ hostname: hostname,
263
+ ip: real_ip,
264
+ custom_status: custom_status
265
+ }
266
+ end
267
+
268
+ # Override this method if needed.
269
+ def custom_status
270
+ {}
271
+ end
272
+
273
+ # Retrieves the real IP address of the machine
274
+ def real_ip
275
+ return @real_ip if @real_ip
276
+ begin
277
+ orig, Socket.do_not_reverse_lookup = Socket.do_not_reverse_lookup, true # turn off reverse DNS resolution temporarily
278
+
279
+ UDPSocket.open do |s|
280
+ s.connect '64.233.187.99', 1
281
+ @real_ip = s.addr.last
282
+ end
283
+ ensure
284
+ Socket.do_not_reverse_lookup = orig
285
+ end
286
+ @real_ip
287
+ end
288
+
289
+ # Registers the various signal handlers a worker responds to.
290
+ #
291
+ # TERM: Shutdown immediately, stop processing jobs.
292
+ # INT: Shutdown immediately, stop processing jobs.
293
+ # QUIT: Shutdown after the current job has finished processing.
294
+ def register_signal_handlers
295
+ trap('TERM') { shutdown! }
296
+ trap('INT') { shutdown! }
297
+
298
+ trap('QUIT') { shutdown }
299
+
300
+ log.info "Registered signals"
301
+ end
302
+
303
+ # Schedule this worker for shutdown. Will finish processing the
304
+ # current jobs.
305
+ def shutdown
306
+ log.info 'Shutting down...'
307
+ @shutdown = true
308
+ end
309
+
310
+ # Kill the child and shutdown immediately.
311
+ def shutdown!
312
+ shutdown
313
+ kill_jobs
314
+ end
315
+
316
+ # Kills all jobs
317
+ def kill_jobs
318
+ log.debug "Immediately killing all jobs"
319
+ @job_pids.each do |job_id, pid|
320
+ log.debug "Killing process #{pid} with job #{job_id}"
321
+ Process.kill 'KILL', pid
322
+ end
323
+
324
+ # How to kill fiber jobs? Remove them from @current_jobs, mark as failed
325
+ fiber_jobs = @current_jobs.select{|job_id| ! @job_pids[job_id]}
326
+ fiber_jobs.each do |job_id|
327
+ # FAIL FAIL FAIL!!!
328
+ job = MongoJob.find_job job_id
329
+ if job
330
+ job.fail "Process killed."
331
+ end
332
+ finish_job job_id
333
+ end
334
+ end
335
+
336
+ # Parse command-line parameters
337
+ def self.parse_options
338
+ options = {}
339
+ OptionParser.new do |opts|
340
+ opts.banner = "Usage: #{::File.basename($0)} [options]"
341
+ opts.on('-q QUEUES', 'coma-separated queues this worker will handle') {|queues|
342
+ options[:queues] = queues.split(/,\s*/)
343
+ }
344
+ opts.on('-h HOST', "--host HOST", "set the MongoDB host") {|host|
345
+ MongoJob.host = host
346
+ }
347
+ opts.on('-d DATABASE_NAME', "--database-name DATABASE_NAME", "set the MongoDB database name") {|database_name|
348
+ MongoJob.database_name = database_name
349
+ }
350
+ opts.on("-l LOGFILE", "logfile, or STDOUT to log to console") do |v|
351
+ options[:log] = (v == 'STDOUT' ? STDOUT : v)
352
+ end
353
+ opts.on("-v LOGLEVEL", "one of DEBUG, INFO, WARN, ERROR, FATAL") do |v|
354
+ options[:loglevel] = v
355
+ end
356
+ opts.on("-r LOAD_MODULE", "requires an extra ruby file") do |v|
357
+ require v
358
+ end
359
+ opts.on("-i ID", "set worker id") do |v|
360
+ options[:id] = v
361
+ end
362
+ opts.on("-m MAX_JOBS", "max jobs ") do |v|
363
+ options[:max_jobs] = v.to_i
364
+ end
365
+ end.parse!
366
+ options
367
+ end
368
+
369
+ end
370
+ end