rocketjob 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,82 @@
1
+ # encoding: UTF-8
2
+ require 'sync_attr'
3
+ module RocketJob
4
+ # Centralized Configuration for Rocket Jobs
5
+ class Config
6
+ include MongoMapper::Document
7
+ include SyncAttr
8
+
9
+ # Prevent data in MongoDB from re-defining the model behavior
10
+ #self.static_keys = true
11
+
12
+ # Returns the single instance of the Rocket Job Configuration for this site
13
+ # in a thread-safe way
14
+ sync_cattr_reader(:instance) do
15
+ begin
16
+ first || create
17
+ rescue Exception => exc
18
+ # In case another process has already created the first document
19
+ first
20
+ end
21
+ end
22
+
23
+ # By enabling inline_mode jobs will be called in-line
24
+ # No server processes will be created, nor threads created
25
+ sync_cattr_accessor(:inline_mode) { false }
26
+
27
+ # The maximum number of worker threads to create on any one server
28
+ key :max_worker_threads, Integer, default: 10
29
+
30
+ # Number of seconds between heartbeats from Rocket Job Server processes
31
+ key :heartbeat_seconds, Integer, default: 15
32
+
33
+ # Maximum number of seconds a Server will wait before checking for new jobs
34
+ key :max_poll_seconds, Integer, default: 5
35
+
36
+ # Number of seconds between checking for:
37
+ # - Jobs with a higher priority
38
+ # - If the current job has been paused, or aborted
39
+ #
40
+ # Making this interval too short results in too many checks for job status
41
+ # changes instead of focusing on completing the active tasks
42
+ #
43
+ # Note:
44
+ # Not all job types support pausing in the middle
45
+ key :re_check_seconds, Integer, default: 60
46
+
47
+ # Limit the number of workers per job class per server
48
+ # 'class_name' / group => 100
49
+ #key :limits, Hash
50
+
51
+ # Replace the MongoMapper default mongo connection for holding jobs
52
+ def self.mongo_connection=(connection)
53
+ connection(connection)
54
+ Server.connection(connection)
55
+ Job.connection(connection)
56
+
57
+ db_name = connection.db.name
58
+ set_database_name(db_name)
59
+ Server.set_database_name(db_name)
60
+ Job.set_database_name(db_name)
61
+ end
62
+
63
+ # Use a separate Mongo connection for the Records and Results
64
+ # Allows the records and results to be stored in a separate Mongo database
65
+ # from the Jobs themselves.
66
+ #
67
+ # It is recommended to set the work_connection to a local Mongo Server that
68
+ # is not replicated to another data center to prevent flooding the network
69
+ # with replication of data records and results.
70
+ # The jobs themselves can/should be replicated across data centers so that
71
+ # they are never lost.
72
+ def self.mongo_work_connection=(connection)
73
+ @@mongo_work_connection = connection
74
+ end
75
+
76
+ # Returns the Mongo connection for the Records and Results
77
+ def self.mongo_work_connection
78
+ @@mongo_work_connection || connection
79
+ end
80
+
81
+ end
82
+ end
@@ -0,0 +1,44 @@
1
+ # encoding: UTF-8
2
+ module RocketJob
3
+ # Heartbeat
4
+ #
5
+ # Information from the server as at it's last heartbeat
6
+ class Heartbeat
7
+ include MongoMapper::EmbeddedDocument
8
+
9
+ embedded_in :server
10
+
11
+ # Time of the last heartbeat received from this server
12
+ key :updated_at, Time
13
+
14
+ # Number of threads running as at the last heartbeat interval
15
+ key :active_threads, Integer
16
+ # Number of threads in the pool
17
+ # This number should grow and shrink between 1 and :max_threads
18
+ key :current_threads, Integer
19
+
20
+ #
21
+ # Process Information
22
+ #
23
+
24
+ # Percentage utilization for the server process alone
25
+ key :process_cpu, Integer
26
+ # Kilo Bytes used by the server process (Virtual & Physical)
27
+ key :process_mem_phys_kb, Integer
28
+ key :process_mem_virt_kb, Integer
29
+
30
+ #
31
+ # System Information
32
+ #
33
+
34
+ # Percentage utilization for the host machine
35
+ key :host_cpu, Integer
36
+ # Kilo Bytes Available on the host machine (Physical)
37
+ key :host_mem_avail_phys_kbytes, Float
38
+ key :host_mem_avail_virt_kbytes, Float
39
+
40
+ # If available
41
+ key :load_average, Float
42
+ end
43
+ end
44
+
@@ -0,0 +1,442 @@
1
+ # encoding: UTF-8
2
+ require 'aasm'
3
+ module RocketJob
4
+ # The base job from which all jobs are created
5
+ class Job
6
+ include MongoMapper::Document
7
+ include AASM
8
+ include SemanticLogger::Loggable
9
+ include Concerns::Worker
10
+
11
+ # Prevent data in MongoDB from re-defining the model behavior
12
+ #self.static_keys = true
13
+
14
+ #
15
+ # User definable attributes
16
+ #
17
+ # The following attributes are set when the job is created
18
+
19
+ # Description for this job instance
20
+ key :description, String
21
+
22
+ # Method that must be invoked to complete this job
23
+ key :perform_method, Symbol, default: :perform
24
+
25
+ # Priority of this job as it relates to other jobs [1..100]
26
+ # 1: Lowest Priority
27
+ # 100: Highest Priority
28
+ # 50: Default Priority
29
+ key :priority, Integer, default: 50
30
+
31
+ # Support running this job in the future
32
+ # Also set when a job fails and needs to be re-tried in the future
33
+ key :run_at, Time
34
+
35
+ # If a job has not started by this time, destroy it
36
+ key :expires_at, Time
37
+
38
+ # When specified a job will be re-scheduled to run at it's next scheduled interval
39
+ # Format is the same as cron
40
+ key :schedule, String
41
+
42
+ # Job should be marked as repeatable when it can be run multiple times
43
+ # without changing the system state or modifying database contents.
44
+ # Setting to false will result in an additional lookup on the results collection
45
+ # before processing the record to ensure it was not previously processed.
46
+ # This is necessary for retrying a job.
47
+ key :repeatable, Boolean, default: true
48
+
49
+ # When the job completes destroy it from both the database and the UI
50
+ key :destroy_on_complete, Boolean, default: true
51
+
52
+ # Any user supplied arguments for the method invocation
53
+ # All keys must be UTF-8 strings. The values can be any valid BSON type:
54
+ # Integer
55
+ # Float
56
+ # Time (UTC)
57
+ # String (UTF-8)
58
+ # Array
59
+ # Hash
60
+ # True
61
+ # False
62
+ # Symbol
63
+ # nil
64
+ # Regular Expression
65
+ #
66
+ # Note: Date is not supported, convert it to a UTC time
67
+ key :arguments, Array, default: []
68
+
69
+ # Whether to store the results from this job
70
+ key :collect_output, Boolean, default: false
71
+
72
+ # Raise or lower the log level when calling the job
73
+ # Can be used to reduce log noise, especially during high volume calls
74
+ # For debugging a single job can be logged at a low level such as :trace
75
+ # Levels supported: :trace, :debug, :info, :warn, :error, :fatal
76
+ key :log_level, Symbol
77
+
78
+ # Only give access through the Web UI to this group identifier
79
+ #key :group, String
80
+
81
+ #
82
+ # Read-only attributes
83
+ #
84
+
85
+ # Current state, as set by AASM
86
+ key :state, Symbol, default: :queued
87
+
88
+ # When the job was created
89
+ key :created_at, Time, default: -> { Time.now }
90
+
91
+ # When processing started on this job
92
+ key :started_at, Time
93
+
94
+ # When the job completed processing
95
+ key :completed_at, Time
96
+
97
+ # Number of times that this job has failed to process
98
+ key :failure_count, Integer, default: 0
99
+
100
+ # This name of the server that this job is being processed by, or was processed by
101
+ key :server_name, String
102
+
103
+ #
104
+ # Values that jobs can update during processing
105
+ #
106
+
107
+ # Allow a job to updates its estimated progress
108
+ # Any integer from 0 to 100
109
+ key :percent_complete, Integer, default: 0
110
+
111
+ # Store the last exception for this job
112
+ one :exception, class_name: 'RocketJob::JobException'
113
+
114
+ # Store the Hash result from this job if collect_output is true,
115
+ # and the job returned actually returned a Hash, otherwise nil
116
+ # Not applicable to SlicedJob jobs, since its output is stored in a
117
+ # separate collection
118
+ key :result, Hash
119
+
120
+ # Store all job types in this collection
121
+ set_collection_name 'rocket_job.jobs'
122
+
123
+ validates_presence_of :state, :failure_count, :created_at, :perform_method
124
+ # :repeatable, :destroy_on_complete, :collect_output, :arguments
125
+ validates :priority, inclusion: 1..100
126
+
127
+ # State Machine events and transitions
128
+ #
129
+ # For Job Record jobs, usual processing:
130
+ # :queued -> :running -> :completed
131
+ # -> :paused -> :running ( manual )
132
+ # -> :failed -> :running ( manual )
133
+ # -> :retry -> :running ( future date )
134
+ #
135
+ # Any state other than :completed can transition manually to :aborted
136
+ #
137
+ # Work queue is priority based and then FIFO thereafter
138
+ # means that records from existing multi-record jobs will be completed before
139
+ # new jobs are started with the same priority.
140
+ # Unless, the loader is not fast enough and the
141
+ # records queue is empty. In this case the next multi-record job will
142
+ # start loading too.
143
+ #
144
+ # Where: state: [:queued, :running], run_at: $lte: Time.now
145
+ # Sort: priority, created_at
146
+ #
147
+ # Index: state, run_at
148
+ aasm column: :state do
149
+ # Job has been created and is queued for processing ( Initial state )
150
+ state :queued, initial: true
151
+
152
+ # Job is running
153
+ state :running
154
+
155
+ # Job has completed processing ( End state )
156
+ state :completed
157
+
158
+ # Job is temporarily paused and no further processing will be completed
159
+ # until this job has been resumed
160
+ state :paused
161
+
162
+ # Job failed to process and needs to be manually re-tried or aborted
163
+ state :failed
164
+
165
+ # Job failed to process previously and is scheduled to be retried at a
166
+ # future date
167
+ state :retry
168
+
169
+ # Job was aborted and cannot be resumed ( End state )
170
+ state :aborted
171
+
172
+ event :start, before: :before_start do
173
+ transitions from: :queued, to: :running
174
+ end
175
+
176
+ event :complete, before: :before_complete do
177
+ after do
178
+ destroy if destroy_on_complete
179
+ end
180
+ transitions from: :running, to: :completed
181
+ end
182
+
183
+ event :fail, before: :before_fail do
184
+ transitions from: :queued, to: :failed
185
+ transitions from: :running, to: :failed
186
+ transitions from: :paused, to: :failed
187
+ end
188
+
189
+ event :retry, before: :before_retry do
190
+ transitions from: :failed, to: :running
191
+ end
192
+
193
+ event :pause, before: :before_pause do
194
+ transitions from: :running, to: :paused
195
+ end
196
+
197
+ event :resume, before: :before_resume do
198
+ transitions from: :paused, to: :running
199
+ end
200
+
201
+ event :abort, before: :before_abort do
202
+ transitions from: :running, to: :aborted
203
+ transitions from: :queued, to: :aborted
204
+ transitions from: :failed, to: :aborted
205
+ transitions from: :paused, to: :aborted
206
+ end
207
+ end
208
+
209
+ # Create indexes
210
+ def self.create_indexes
211
+ # Used by find_and_modify in .next_job
212
+ ensure_index({ state:1, run_at: 1, priority: 1, created_at: 1, sub_state: 1}, background: true)
213
+ # Remove outdated index if present
214
+ drop_index("state_1_priority_1_created_at_1_sub_state_1") rescue nil
215
+ # Used by Mission Control
216
+ ensure_index [[:created_at, 1]]
217
+ end
218
+
219
+ # Requeue all jobs for the specified dead server
220
+ def self.requeue_dead_server(server_name)
221
+ collection.update(
222
+ { 'server_name' => server_name, 'state' => :running },
223
+ { '$unset' => { 'server_name' => true, 'started_at' => true }, '$set' => { 'state' => :queued } },
224
+ multi: true
225
+ )
226
+ end
227
+
228
+ # Pause all running jobs
229
+ def self.pause_all
230
+ where(state: 'running').each { |job| job.pause! }
231
+ end
232
+
233
+ # Resume all paused jobs
234
+ def self.resume_all
235
+ where(state: 'paused').each { |job| job.resume! }
236
+ end
237
+
238
+ # Returns [true|false] whether to collect the results from running this batch
239
+ def collect_output?
240
+ collect_output == true
241
+ end
242
+
243
+ # Returns [Time] how long the job has taken to complete
244
+ # If not started then it is the time spent waiting in the queue
245
+ def duration
246
+ seconds = if completed_at
247
+ completed_at - (started_at || created_at)
248
+ elsif started_at
249
+ Time.now - started_at
250
+ else
251
+ Time.now - created_at
252
+ end
253
+ Time.at(seconds)
254
+ end
255
+
256
+ # Returns [Hash] status of this job
257
+ def status(time_zone='Eastern Time (US & Canada)')
258
+ h = {
259
+ state: state,
260
+ description: description
261
+ }
262
+ h[:started_at] = started_at.in_time_zone(time_zone) if started_at
263
+
264
+ case
265
+ when running? || paused?
266
+ h[:paused_at] = completed_at.in_time_zone(time_zone) if paused?
267
+ h[:percent_complete] = percent_complete if percent_complete
268
+ when completed?
269
+ h[:completed_at] = completed_at.in_time_zone(time_zone)
270
+ when aborted?
271
+ h[:aborted_at] = completed_at.in_time_zone(time_zone)
272
+ h[:percent_complete] = percent_complete if percent_complete
273
+ when failed?
274
+ h[:failed_at] = completed_at.in_time_zone(time_zone)
275
+ h[:percent_complete] = percent_complete if percent_complete
276
+ h[:exception] = exception.nil? ? {} : exception.attributes
277
+ end
278
+ h[:duration] = duration.strftime('%H:%M:%S')
279
+ h
280
+ end
281
+
282
+ # Same basic formula for calculating retry interval as delayed_job and Sidekiq
283
+ # TODO Consider lowering the priority automatically after every retry?
284
+ def seconds_to_delay(count)
285
+ (count ** 4) + 15 + (rand(30)*(count+1))
286
+ end
287
+
288
+ # Patch the way MongoMapper reloads a model
289
+ def reload
290
+ if doc = collection.find_one(:_id => id)
291
+ load_from_database(doc)
292
+ self
293
+ else
294
+ raise MongoMapper::DocumentNotFound, "Document match #{_id.inspect} does not exist in #{collection.name} collection"
295
+ end
296
+ end
297
+
298
+ # After this model is read, convert any hashes in the arguments list to HashWithIndifferentAccess
299
+ def load_from_database(*args)
300
+ super
301
+ self.arguments = arguments.collect {|i| i.is_a?(BSON::OrderedHash) ? i.with_indifferent_access : i } if arguments.present?
302
+ end
303
+
304
+ protected
305
+
306
+ # Before events that can be overridden by child classes
307
+ def before_start
308
+ self.started_at = Time.now
309
+ end
310
+
311
+ def before_complete
312
+ self.percent_complete = 100
313
+ self.completed_at = Time.now
314
+ end
315
+
316
+ def before_fail
317
+ self.completed_at = Time.now
318
+ end
319
+
320
+ def before_retry
321
+ self.completed_at = nil
322
+ end
323
+
324
+ def before_pause
325
+ self.completed_at = Time.now
326
+ end
327
+
328
+ def before_resume
329
+ self.completed_at = nil
330
+ end
331
+
332
+ def before_abort
333
+ self.completed_at = Time.now
334
+ end
335
+
336
+ ############################################################################
337
+ protected
338
+
339
+ # Returns the next job to work on in priority based order
340
+ # Returns nil if there are currently no queued jobs, or processing batch jobs
341
+ # with records that require processing
342
+ #
343
+ # Parameters
344
+ # server_name [String]
345
+ # Name of the server that will be processing this job
346
+ #
347
+ # skip_job_ids [Array<BSON::ObjectId>]
348
+ # Job ids to exclude when looking for 3the next job
349
+ #
350
+ # Note:
351
+ # If a job is in queued state it will be started
352
+ def self.next_job(server_name, skip_job_ids = nil)
353
+ query = {
354
+ '$and' => [
355
+ {
356
+ '$or' => [
357
+ { 'state' => 'queued' }, # Jobs
358
+ { 'state' => 'running', 'sub_state' => :processing } # Slices
359
+ ]
360
+ },
361
+ {
362
+ '$or' => [
363
+ { run_at: { '$exists' => false } },
364
+ { run_at: { '$lte' => Time.now } }
365
+ ]
366
+ },
367
+ ]
368
+ }
369
+ query['_id'] = { '$nin' => skip_job_ids } if skip_job_ids && skip_job_ids.size > 0
370
+
371
+ if doc = find_and_modify(
372
+ query: query,
373
+ sort: [['priority', 'asc'], ['created_at', 'asc']],
374
+ update: { '$set' => { 'server_name' => server_name, 'state' => 'running' } }
375
+ )
376
+ job = load(doc)
377
+ unless job.running?
378
+ # Also update in-memory state and run call-backs
379
+ job.start
380
+ job.set(started_at: job.started_at)
381
+ end
382
+ job
383
+ end
384
+ end
385
+
386
+ ############################################################################
387
+ private
388
+
389
+ # Set exception information for this job
390
+ def set_exception(server_name, exc)
391
+ self.server_name = nil
392
+ self.failure_count += 1
393
+ self.exception = JobException.from_exception(exc)
394
+ exception.server_name = server_name
395
+ fail! unless failed?
396
+ logger.error("Exception running #{self.class.name}##{perform_method}", exc)
397
+ end
398
+
399
+ # Calls a method on this job, if it is defined
400
+ # Adds the event name to the method call if supplied
401
+ #
402
+ # Returns [Object] the result of calling the method
403
+ #
404
+ # Parameters
405
+ # method [Symbol]
406
+ # The method to call on this job
407
+ #
408
+ # arguments [Array]
409
+ # Arguments to pass to the method call
410
+ #
411
+ # Options:
412
+ # event: [Symbol]
413
+ # Any one of: :before, :after
414
+ # Default: None, just calls the method itself
415
+ #
416
+ # log_level: [Symbol]
417
+ # Log level to apply to silence logging during the call
418
+ # Default: nil ( no change )
419
+ #
420
+ def call_method(method, arguments, options={})
421
+ options = options.dup
422
+ event = options.delete(:event)
423
+ log_level = options.delete(:log_level)
424
+ raise(ArgumentError, "Unknown #{self.class.name}#call_method options: #{options.inspect}") if options.size > 0
425
+
426
+ the_method = event.nil? ? method : "#{event}_#{method}".to_sym
427
+ if respond_to?(the_method)
428
+ method_name = "#{self.class.name}##{the_method}"
429
+ logger.info "Start #{method_name}"
430
+ logger.benchmark_info("Completed #{method_name}",
431
+ metric: "rocketjob/#{self.class.name.underscore}/#{the_method}",
432
+ log_exception: :full,
433
+ on_exception_level: :error,
434
+ silence: log_level
435
+ ) do
436
+ self.send(the_method, *arguments)
437
+ end
438
+ end
439
+ end
440
+
441
+ end
442
+ end