rocketjob 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -8,6 +8,7 @@ module RocketJob
8
8
 
9
9
  embedded_in :worker
10
10
 
11
+ # @formatter:off
11
12
  # Time of the last heartbeat received from this worker
12
13
  key :updated_at, Time
13
14
 
@@ -39,6 +40,8 @@ module RocketJob
39
40
 
40
41
  # If available
41
42
  key :load_average, Float
43
+ # @formatter:on
44
+
42
45
  end
43
46
  end
44
47
 
@@ -15,6 +15,7 @@ module RocketJob
15
15
  # User definable attributes
16
16
  #
17
17
  # The following attributes are set when the job is created
18
+ # @formatter:off
18
19
 
19
20
  # Description for this job instance
20
21
  key :description, String
@@ -128,6 +129,7 @@ module RocketJob
128
129
  # -> :failed -> :running
129
130
  # -> :aborted
130
131
  # -> :aborted
132
+ # -> :queued (when a worker dies)
131
133
  # -> :aborted
132
134
  aasm column: :state do
133
135
  # Job has been created and is queued for processing ( Initial state )
@@ -167,7 +169,7 @@ module RocketJob
167
169
  end
168
170
 
169
171
  event :retry, before: :before_retry do
170
- transitions from: :failed, to: :running
172
+ transitions from: :failed, to: :queued
171
173
  end
172
174
 
173
175
  event :pause, before: :before_pause do
@@ -184,39 +186,40 @@ module RocketJob
184
186
  transitions from: :failed, to: :aborted
185
187
  transitions from: :paused, to: :aborted
186
188
  end
189
+
190
+ event :requeue, before: :before_requeue do
191
+ transitions from: :running, to: :queued
192
+ end
187
193
  end
194
+ # @formatter:on
188
195
 
189
196
  # Create indexes
190
197
  def self.create_indexes
191
198
  # Used by find_and_modify in .next_job
192
- ensure_index({ state:1, run_at: 1, priority: 1, created_at: 1, sub_state: 1}, background: true)
199
+ ensure_index({state: 1, run_at: 1, priority: 1, created_at: 1, sub_state: 1}, background: true)
193
200
  # Remove outdated index if present
194
- drop_index("state_1_priority_1_created_at_1_sub_state_1") rescue nil
201
+ drop_index('state_1_priority_1_created_at_1_sub_state_1') rescue nil
195
202
  # Used by Mission Control
196
203
  ensure_index [[:created_at, 1]]
197
204
  end
198
205
 
199
- # Requeue all jobs for the specified dead worker
206
+ # Requeues all jobs that were running on worker that died
200
207
  def self.requeue_dead_worker(worker_name)
201
- collection.update(
202
- { 'worker_name' => worker_name, 'state' => :running },
203
- { '$unset' => { 'worker_name' => true, 'started_at' => true }, '$set' => { 'state' => :queued } },
204
- multi: true
205
- )
208
+ running.each { |job| job.requeue!(worker_name) }
206
209
  end
207
210
 
208
211
  # Pause all running jobs
209
212
  def self.pause_all
210
- where(state: 'running').each { |job| job.pause! }
213
+ running.each(&:pause!)
211
214
  end
212
215
 
213
216
  # Resume all paused jobs
214
217
  def self.resume_all
215
- where(state: 'paused').each { |job| job.resume! }
218
+ paused.each(&:resume!)
216
219
  end
217
220
 
218
221
  # Returns the number of required arguments for this job
219
- def self.argument_count(method=:perform)
222
+ def self.argument_count(method = :perform)
220
223
  instance_method(method).arity
221
224
  end
222
225
 
@@ -241,7 +244,7 @@ module RocketJob
241
244
 
242
245
  # Returns a human readable duration the job has taken
243
246
  def duration
244
- seconds_as_duration(seconds)
247
+ RocketJob.seconds_as_duration(seconds)
245
248
  end
246
249
 
247
250
  # A job has expired if the expiry time has passed before it is started
@@ -262,57 +265,94 @@ module RocketJob
262
265
  attrs.delete('completed_at')
263
266
  attrs.delete('result')
264
267
  # Ensure 'paused_at' appears first in the hash
265
- { 'paused_at' => completed_at }.merge(attrs)
268
+ {'paused_at' => completed_at}.merge(attrs)
266
269
  when aborted?
267
270
  attrs.delete('completed_at')
268
271
  attrs.delete('result')
269
- { 'aborted_at' => completed_at }.merge(attrs)
272
+ {'aborted_at' => completed_at}.merge(attrs)
270
273
  when failed?
271
274
  attrs.delete('completed_at')
272
275
  attrs.delete('result')
273
- { 'failed_at' => completed_at }.merge(attrs)
276
+ {'failed_at' => completed_at}.merge(attrs)
274
277
  else
275
278
  attrs
276
279
  end
277
280
  end
278
281
 
279
- def status(time_zone='Eastern Time (US & Canada)')
282
+ def status(time_zone = 'Eastern Time (US & Canada)')
280
283
  h = as_json
281
284
  h.delete('seconds')
282
285
  h.delete('perform_method') if h['perform_method'] == :perform
283
- h.dup.each_pair do |k,v|
286
+ h.dup.each_pair do |k, v|
284
287
  case
285
- when v.kind_of?(Time)
288
+ when v.is_a?(Time)
286
289
  h[k] = v.in_time_zone(time_zone).to_s
287
- when v.kind_of?(BSON::ObjectId)
290
+ when v.is_a?(BSON::ObjectId)
288
291
  h[k] = v.to_s
289
292
  end
290
293
  end
291
294
  h
292
295
  end
293
296
 
294
- # TODO Jobs are not currently automatically retried. Is there a need?
295
- def seconds_to_delay(count)
296
- # TODO Consider lowering the priority automatically after every retry?
297
- # Same basic formula for calculating retry interval as delayed_job and Sidekiq
298
- (count ** 4) + 15 + (rand(30)*(count+1))
299
- end
300
-
301
297
  # Patch the way MongoMapper reloads a model
302
298
  # Only reload MongoMapper attributes, leaving other instance variables untouched
303
299
  def reload
304
- if doc = collection.find_one(:_id => id)
300
+ if (doc = collection.find_one(_id: id))
301
+ # Clear out keys that are not returned during the reload from MongoDB
302
+ (keys.keys - doc.keys).each { |key| send("#{key}=", nil) }
303
+ initialize_default_values
305
304
  load_from_database(doc)
306
305
  self
307
306
  else
308
- raise MongoMapper::DocumentNotFound, "Document match #{_id.inspect} does not exist in #{collection.name} collection"
307
+ if destroy_on_complete
308
+ self.state = :completed
309
+ before_complete
310
+ else
311
+ raise(MongoMapper::DocumentNotFound, "Document match #{_id.inspect} does not exist in #{collection.name} collection")
312
+ end
309
313
  end
310
314
  end
311
315
 
312
316
  # After this model is read, convert any hashes in the arguments list to HashWithIndifferentAccess
313
317
  def load_from_database(*args)
314
318
  super
315
- self.arguments = arguments.collect {|i| i.is_a?(BSON::OrderedHash) ? i.with_indifferent_access : i } if arguments.present?
319
+ if arguments.present?
320
+ self.arguments = arguments.collect { |i| i.is_a?(BSON::OrderedHash) ? i.with_indifferent_access : i }
321
+ end
322
+ end
323
+
324
+ # Set exception information for this job and fail it
325
+ def fail!(worker_name='user', exc_or_message='Job failed through user action')
326
+ if exc_or_message.is_a?(Exception)
327
+ self.exception = JobException.from_exception(exc_or_message)
328
+ exception.worker_name = worker_name
329
+ else
330
+ build_exception(
331
+ class_name: 'RocketJob::JobException',
332
+ message: exc_or_message,
333
+ backtrace: [],
334
+ worker_name: worker_name
335
+ )
336
+ end
337
+ # not available as #super
338
+ aasm.current_event = :fail!
339
+ aasm_fire_event(:fail, persist: true)
340
+ end
341
+
342
+ # Requeue this running job since the worker assigned to it has died
343
+ def requeue!(worker_name_=nil)
344
+ return false if worker_name_ && (worker_name != worker_name_)
345
+ # not available as #super
346
+ aasm.current_event = :requeue!
347
+ aasm_fire_event(:requeue, persist: true)
348
+ end
349
+
350
+ # Requeue this running job since the worker assigned to it has died
351
+ def requeue(worker_name_=nil)
352
+ return false if worker_name_ && (worker_name != worker_name_)
353
+ # not available as #super
354
+ aasm.current_event = :requeue
355
+ aasm_fire_event(:requeue, persist: false)
316
356
  end
317
357
 
318
358
  ############################################################################
@@ -330,8 +370,9 @@ module RocketJob
330
370
  end
331
371
 
332
372
  def before_fail
333
- self.completed_at = Time.now
334
- self.worker_name = nil
373
+ self.completed_at = Time.now
374
+ self.worker_name = nil
375
+ self.failure_count += 1
335
376
  end
336
377
 
337
378
  def before_retry
@@ -340,7 +381,7 @@ module RocketJob
340
381
 
341
382
  def before_pause
342
383
  self.completed_at = Time.now
343
- self.worker_name = nil
384
+ self.worker_name = nil
344
385
  end
345
386
 
346
387
  def before_resume
@@ -349,130 +390,12 @@ module RocketJob
349
390
 
350
391
  def before_abort
351
392
  self.completed_at = Time.now
352
- self.worker_name = nil
393
+ self.worker_name = nil
353
394
  end
354
395
 
355
- # Returns a human readable duration from the supplied [Float] number of seconds
356
- def seconds_as_duration(seconds)
357
- time = Time.at(seconds)
358
- if seconds >= 1.day
359
- "#{(seconds / 1.day).to_i}d #{time.strftime('%-Hh %-Mm %-Ss')}"
360
- elsif seconds >= 1.hour
361
- time.strftime('%-Hh %-Mm %-Ss')
362
- elsif seconds >= 1.minute
363
- time.strftime('%-Mm %-Ss')
364
- else
365
- time.strftime('%-Ss')
366
- end
367
- end
368
-
369
- # Returns the next job to work on in priority based order
370
- # Returns nil if there are currently no queued jobs, or processing batch jobs
371
- # with records that require processing
372
- #
373
- # Parameters
374
- # worker_name [String]
375
- # Name of the worker that will be processing this job
376
- #
377
- # skip_job_ids [Array<BSON::ObjectId>]
378
- # Job ids to exclude when looking for the next job
379
- #
380
- # Note:
381
- # If a job is in queued state it will be started
382
- def self.next_job(worker_name, skip_job_ids = nil)
383
- query = {
384
- '$and' => [
385
- {
386
- '$or' => [
387
- { 'state' => 'queued' }, # Jobs
388
- { 'state' => 'running', 'sub_state' => :processing } # Slices
389
- ]
390
- },
391
- {
392
- '$or' => [
393
- { run_at: { '$exists' => false } },
394
- { run_at: { '$lte' => Time.now } }
395
- ]
396
- },
397
- ]
398
- }
399
- query['_id'] = { '$nin' => skip_job_ids } if skip_job_ids && skip_job_ids.size > 0
400
-
401
- while doc = find_and_modify(
402
- query: query,
403
- sort: [['priority', 'asc'], ['created_at', 'asc']],
404
- update: { '$set' => { 'worker_name' => worker_name, 'state' => 'running' } }
405
- )
406
- job = load(doc)
407
- if job.running?
408
- return job
409
- else
410
- if job.expired?
411
- job.destroy
412
- logger.info "Destroyed expired job #{job.class.name}, id:#{job.id}"
413
- else
414
- # Also update in-memory state and run call-backs
415
- job.start
416
- job.set(started_at: job.started_at)
417
- return job
418
- end
419
- end
420
- end
421
- end
422
-
423
- ############################################################################
424
- private
425
-
426
- # Set exception information for this job
427
- def set_exception(worker_name, exc)
396
+ def before_requeue
397
+ self.started_at = nil
428
398
  self.worker_name = nil
429
- self.failure_count += 1
430
- self.exception = JobException.from_exception(exc)
431
- exception.worker_name = worker_name
432
- fail! unless failed?
433
- logger.error("Exception running #{self.class.name}##{perform_method}", exc)
434
- end
435
-
436
- # Calls a method on this job, if it is defined
437
- # Adds the event name to the method call if supplied
438
- #
439
- # Returns [Object] the result of calling the method
440
- #
441
- # Parameters
442
- # method [Symbol]
443
- # The method to call on this job
444
- #
445
- # arguments [Array]
446
- # Arguments to pass to the method call
447
- #
448
- # Options:
449
- # event: [Symbol]
450
- # Any one of: :before, :after
451
- # Default: None, just calls the method itself
452
- #
453
- # log_level: [Symbol]
454
- # Log level to apply to silence logging during the call
455
- # Default: nil ( no change )
456
- #
457
- def call_method(method, arguments, options={})
458
- options = options.dup
459
- event = options.delete(:event)
460
- log_level = options.delete(:log_level)
461
- raise(ArgumentError, "Unknown #{self.class.name}#call_method options: #{options.inspect}") if options.size > 0
462
-
463
- the_method = event.nil? ? method : "#{event}_#{method}".to_sym
464
- if respond_to?(the_method)
465
- method_name = "#{self.class.name}##{the_method}"
466
- logger.info "Start #{method_name}"
467
- logger.benchmark_info("Completed #{method_name}",
468
- metric: "rocketjob/#{self.class.name.underscore}/#{the_method}",
469
- log_exception: :full,
470
- on_exception_level: :error,
471
- silence: log_level
472
- ) do
473
- self.send(the_method, *arguments)
474
- end
475
- end
476
399
  end
477
400
 
478
401
  end
@@ -6,6 +6,7 @@ module RocketJob
6
6
  class JobException
7
7
  include MongoMapper::EmbeddedDocument
8
8
 
9
+ # @formatter:off
9
10
  # Name of the exception class
10
11
  key :class_name, String
11
12
 
@@ -13,7 +14,7 @@ module RocketJob
13
14
  key :message, String
14
15
 
15
16
  # Exception Backtrace [Array<String>]
16
- key :backtrace, Array
17
+ key :backtrace, Array, default: []
17
18
 
18
19
  # Name of the worker on which this exception occurred
19
20
  key :worker_name, String
@@ -21,15 +22,16 @@ module RocketJob
21
22
  # The record within which this exception occurred
22
23
  key :record_number, Integer
23
24
 
25
+ # @formatter:on
26
+
24
27
  # Returns [JobException] built from the supplied exception
25
28
  def self.from_exception(exc)
26
- self.new(
27
- class_name: exc.class.name,
28
- message: exc.message,
29
- backtrace: exc.backtrace || []
29
+ new(
30
+ class_name: exc.class.name,
31
+ message: exc.message,
32
+ backtrace: exc.backtrace || []
30
33
  )
31
34
  end
32
35
 
33
36
  end
34
37
  end
35
-
@@ -5,17 +5,21 @@ module RocketJob
5
5
  #
6
6
  # * The first time Dirmon runs it gathers the names of files in the monitored
7
7
  # folders.
8
- # * On completion Dirmon kicks off a new Dimon job passing it the list
8
+ # * On completion Dirmon kicks off a new Dirmon job passing it the list
9
9
  # of known files.
10
10
  # * On each subsequent Dirmon run it checks the size of each file against the
11
- # previous list of known files, and only of the file size has not changed
11
+ # previous list of known files, and only if the file size has not changed
12
12
  # the corresponding job is started for that file.
13
13
  # * If the job implements #file_store_upload or #upload, that method is called
14
14
  # and then the file is deleted, or moved to the archive_directory if supplied
15
+
15
16
  # * Otherwise, the file is moved to the supplied archive_directory (defaults to
16
17
  # `_archive` in the same folder as the file itself. The absolute path and
17
18
  # file name of the archived file is passed into the job as it's first argument.
18
- # Note: This means that such jobs _must_ have a Hash as the first agrument
19
+
20
+ # Note:
21
+ # - Jobs that do not implement #file_store_upload or #upload _must_ have a
22
+ # Hash as the first argument
19
23
  #
20
24
  # With RocketJob Pro, the file is automatically uploaded into the job itself
21
25
  # using the job's #upload method, after which the file is archived or deleted
@@ -25,35 +29,18 @@ module RocketJob
25
29
  #
26
30
  #
27
31
  # Note:
28
- # Do _not_ start multiple copies of Dirmon as it will result in duplicate
29
- # jobs being started.
32
+ # Use `DirmonJob.start` to prevent creating multiple Dirmon jobs, otherwise
33
+ # it will result in multiple jobs being started
30
34
  class DirmonJob < RocketJob::Job
31
- DEFAULT_ARCHIVE_DIR = '_archive'.freeze
35
+ # Only allow one DirmonJob instance to be running at a time
36
+ include RocketJob::Concerns::Singleton
32
37
 
33
38
  rocket_job do |job|
34
39
  job.priority = 40
35
40
  end
36
41
 
37
42
  # Number of seconds between directory scans. Default 5 mins
38
- key :check_seconds, Float, default: 300.0
39
-
40
- # TODO Make :perform_later, :perform_now, :perform, :now protected/private
41
- # class << self
42
- # # Ensure that only one instance of the job is running.
43
- # protected :perform_later, :perform_now, :perform, :now
44
- # end
45
- #self.send(:protected, :perform_later)
46
-
47
- # Start the single instance of this job
48
- # Returns true if the job was started
49
- # Returns false if the job is already running and doe not need to be started
50
- def self.start(&block)
51
- # Prevent multiple Dirmon Jobs from running at the same time
52
- return false if where(state: [ :running, :queued ]).count > 0
53
-
54
- perform_later({}, &block)
55
- true
56
- end
43
+ key :check_seconds, Float, default: 300.0
57
44
 
58
45
  # Iterate over each Dirmon entry looking for new files
59
46
  # If a new file is found, it is not processed immediately, instead
@@ -70,21 +57,19 @@ module RocketJob
70
57
  end
71
58
  end
72
59
 
60
+ protected
61
+
73
62
  # Checks the directories for new files, starting jobs if files have not changed
74
63
  # since the last run
75
64
  def check_directories(previous_file_names)
76
65
  new_file_names = {}
77
- DirmonEntry.where(enabled: true).each do |entry|
78
- logger.tagged("Entry:#{entry.id}") do
79
- Dir[entry.path].each do |file_name|
80
- next if File.directory?(file_name)
81
- next if file_name.include?(DEFAULT_ARCHIVE_DIR)
82
- # BSON Keys cannot contain periods
83
- key = file_name.gsub('.', '_')
84
- previous_size = previous_file_names[key]
85
- if size = check_file(entry, file_name, previous_size)
86
- new_file_names[key] = size
87
- end
66
+ DirmonEntry.where(state: :enabled).each do |entry|
67
+ entry.each do |pathname|
68
+ # BSON Keys cannot contain periods
69
+ key = pathname.to_s.gsub('.', '_')
70
+ previous_size = previous_file_names[key]
71
+ if (size = check_file(entry, pathname, previous_size))
72
+ new_file_names[key] = size
88
73
  end
89
74
  end
90
75
  end
@@ -93,14 +78,14 @@ module RocketJob
93
78
 
94
79
  # Checks if a file should result in starting a job
95
80
  # Returns [Integer] file size, or nil if the file started a job
96
- def check_file(entry, file_name, previous_size)
97
- size = File.size(file_name)
81
+ def check_file(entry, pathname, previous_size)
82
+ size = pathname.size
98
83
  if previous_size && (previous_size == size)
99
- logger.info("File stabilized: #{file_name}. Starting: #{entry.job_name}")
100
- start_job(entry, file_name)
84
+ logger.info("File stabilized: #{pathname}. Starting: #{entry.job_class_name}")
85
+ entry.later(pathname)
101
86
  nil
102
87
  else
103
- logger.info("Found file: #{file_name}. File size: #{size}")
88
+ logger.info("Found file: #{pathname}. File size: #{size}")
104
89
  # Keep for the next run
105
90
  size
106
91
  end
@@ -109,67 +94,6 @@ module RocketJob
109
94
  nil
110
95
  end
111
96
 
112
- # Starts the job for the supplied entry
113
- def start_job(entry, file_name)
114
- entry.job_class.perform_later(*entry.arguments) do |job|
115
- job.perform_method = entry.perform_method
116
- # Set properties
117
- entry.properties.each_pair { |k, v| job.send("#{k}=".to_sym, v) }
118
-
119
- upload_file(job, file_name, entry.archive_directory)
120
- end
121
- end
122
-
123
- # Upload the file to the job
124
- def upload_file(job, file_name, archive_directory)
125
- if job.respond_to?(:file_store_upload)
126
- # Allow the job to determine what to do with the file
127
- job.file_store_upload(file_name)
128
- archive_file(file_name, archive_directory)
129
- elsif job.respond_to?(:upload)
130
- # With RocketJob Pro the file can be uploaded directly into the Job itself
131
- job.upload(file_name)
132
- archive_file(file_name, archive_directory)
133
- else
134
- upload_default(job, file_name, archive_directory)
135
- end
136
- end
137
-
138
- # Archives the file for a job where there was no #file_store_upload or #upload method
139
- def upload_default(job, file_name, archive_directory)
140
- # The first argument must be a hash
141
- job.arguments << {} if job.arguments.size == 0
142
- # If no archive directory is supplied, use DEFAULT_ARCHIVE_DIR under the same path as the file
143
- archive_directory ||= File.join(File.dirname(file_name), DEFAULT_ARCHIVE_DIR)
144
- file_name = File.join(archive_directory, File.basename(file_name))
145
- job.arguments.first[:full_file_name] = File.absolute_path(file_name)
146
- archive_file(file_name, archive_directory)
147
- end
148
-
149
- # Move the file to the archive directory
150
- # Or, delete it if no archive directory was supplied for this entry
151
- #
152
- # If the file_name contains a relative path the relative path will be
153
- # created in the archive_directory before moving the file.
154
- #
155
- # If an absolute path is supplied, then the file is just moved into the
156
- # archive directory without any sub-directories
157
- def archive_file(file_name, archive_directory)
158
- # Move file to archive directory if set
159
- if archive_directory
160
- # Absolute path?
161
- target_file_name = if file_name.start_with?('/')
162
- File.join(archive_directory, File.basename(file_name))
163
- else
164
- File.join(archive_directory, file_name)
165
- end
166
- FileUtils.mkdir_p(File.dirname(target_file_name))
167
- FileUtils.move(file_name, target_file_name)
168
- else
169
- File.delete(file_name)
170
- end
171
- end
172
-
173
97
  end
174
98
  end
175
99
  end