rocketjob 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,7 @@ module RocketJob
8
8
 
9
9
  embedded_in :worker
10
10
 
11
+ # @formatter:off
11
12
  # Time of the last heartbeat received from this worker
12
13
  key :updated_at, Time
13
14
 
@@ -39,6 +40,8 @@ module RocketJob
39
40
 
40
41
  # If available
41
42
  key :load_average, Float
43
+ # @formatter:on
44
+
42
45
  end
43
46
  end
44
47
 
@@ -15,6 +15,7 @@ module RocketJob
15
15
  # User definable attributes
16
16
  #
17
17
  # The following attributes are set when the job is created
18
+ # @formatter:off
18
19
 
19
20
  # Description for this job instance
20
21
  key :description, String
@@ -128,6 +129,7 @@ module RocketJob
128
129
  # -> :failed -> :running
129
130
  # -> :aborted
130
131
  # -> :aborted
132
+ # -> :queued (when a worker dies)
131
133
  # -> :aborted
132
134
  aasm column: :state do
133
135
  # Job has been created and is queued for processing ( Initial state )
@@ -167,7 +169,7 @@ module RocketJob
167
169
  end
168
170
 
169
171
  event :retry, before: :before_retry do
170
- transitions from: :failed, to: :running
172
+ transitions from: :failed, to: :queued
171
173
  end
172
174
 
173
175
  event :pause, before: :before_pause do
@@ -184,39 +186,40 @@ module RocketJob
184
186
  transitions from: :failed, to: :aborted
185
187
  transitions from: :paused, to: :aborted
186
188
  end
189
+
190
+ event :requeue, before: :before_requeue do
191
+ transitions from: :running, to: :queued
192
+ end
187
193
  end
194
+ # @formatter:on
188
195
 
189
196
  # Create indexes
190
197
  def self.create_indexes
191
198
  # Used by find_and_modify in .next_job
192
- ensure_index({ state:1, run_at: 1, priority: 1, created_at: 1, sub_state: 1}, background: true)
199
+ ensure_index({state: 1, run_at: 1, priority: 1, created_at: 1, sub_state: 1}, background: true)
193
200
  # Remove outdated index if present
194
- drop_index("state_1_priority_1_created_at_1_sub_state_1") rescue nil
201
+ drop_index('state_1_priority_1_created_at_1_sub_state_1') rescue nil
195
202
  # Used by Mission Control
196
203
  ensure_index [[:created_at, 1]]
197
204
  end
198
205
 
199
- # Requeue all jobs for the specified dead worker
206
+ # Requeues all jobs that were running on worker that died
200
207
  def self.requeue_dead_worker(worker_name)
201
- collection.update(
202
- { 'worker_name' => worker_name, 'state' => :running },
203
- { '$unset' => { 'worker_name' => true, 'started_at' => true }, '$set' => { 'state' => :queued } },
204
- multi: true
205
- )
208
+ running.each { |job| job.requeue!(worker_name) }
206
209
  end
207
210
 
208
211
  # Pause all running jobs
209
212
  def self.pause_all
210
- where(state: 'running').each { |job| job.pause! }
213
+ running.each(&:pause!)
211
214
  end
212
215
 
213
216
  # Resume all paused jobs
214
217
  def self.resume_all
215
- where(state: 'paused').each { |job| job.resume! }
218
+ paused.each(&:resume!)
216
219
  end
217
220
 
218
221
  # Returns the number of required arguments for this job
219
- def self.argument_count(method=:perform)
222
+ def self.argument_count(method = :perform)
220
223
  instance_method(method).arity
221
224
  end
222
225
 
@@ -241,7 +244,7 @@ module RocketJob
241
244
 
242
245
  # Returns a human readable duration the job has taken
243
246
  def duration
244
- seconds_as_duration(seconds)
247
+ RocketJob.seconds_as_duration(seconds)
245
248
  end
246
249
 
247
250
  # A job has expired if the expiry time has passed before it is started
@@ -262,57 +265,94 @@ module RocketJob
262
265
  attrs.delete('completed_at')
263
266
  attrs.delete('result')
264
267
  # Ensure 'paused_at' appears first in the hash
265
- { 'paused_at' => completed_at }.merge(attrs)
268
+ {'paused_at' => completed_at}.merge(attrs)
266
269
  when aborted?
267
270
  attrs.delete('completed_at')
268
271
  attrs.delete('result')
269
- { 'aborted_at' => completed_at }.merge(attrs)
272
+ {'aborted_at' => completed_at}.merge(attrs)
270
273
  when failed?
271
274
  attrs.delete('completed_at')
272
275
  attrs.delete('result')
273
- { 'failed_at' => completed_at }.merge(attrs)
276
+ {'failed_at' => completed_at}.merge(attrs)
274
277
  else
275
278
  attrs
276
279
  end
277
280
  end
278
281
 
279
- def status(time_zone='Eastern Time (US & Canada)')
282
+ def status(time_zone = 'Eastern Time (US & Canada)')
280
283
  h = as_json
281
284
  h.delete('seconds')
282
285
  h.delete('perform_method') if h['perform_method'] == :perform
283
- h.dup.each_pair do |k,v|
286
+ h.dup.each_pair do |k, v|
284
287
  case
285
- when v.kind_of?(Time)
288
+ when v.is_a?(Time)
286
289
  h[k] = v.in_time_zone(time_zone).to_s
287
- when v.kind_of?(BSON::ObjectId)
290
+ when v.is_a?(BSON::ObjectId)
288
291
  h[k] = v.to_s
289
292
  end
290
293
  end
291
294
  h
292
295
  end
293
296
 
294
- # TODO Jobs are not currently automatically retried. Is there a need?
295
- def seconds_to_delay(count)
296
- # TODO Consider lowering the priority automatically after every retry?
297
- # Same basic formula for calculating retry interval as delayed_job and Sidekiq
298
- (count ** 4) + 15 + (rand(30)*(count+1))
299
- end
300
-
301
297
  # Patch the way MongoMapper reloads a model
302
298
  # Only reload MongoMapper attributes, leaving other instance variables untouched
303
299
  def reload
304
- if doc = collection.find_one(:_id => id)
300
+ if (doc = collection.find_one(_id: id))
301
+ # Clear out keys that are not returned during the reload from MongoDB
302
+ (keys.keys - doc.keys).each { |key| send("#{key}=", nil) }
303
+ initialize_default_values
305
304
  load_from_database(doc)
306
305
  self
307
306
  else
308
- raise MongoMapper::DocumentNotFound, "Document match #{_id.inspect} does not exist in #{collection.name} collection"
307
+ if destroy_on_complete
308
+ self.state = :completed
309
+ before_complete
310
+ else
311
+ raise(MongoMapper::DocumentNotFound, "Document match #{_id.inspect} does not exist in #{collection.name} collection")
312
+ end
309
313
  end
310
314
  end
311
315
 
312
316
  # After this model is read, convert any hashes in the arguments list to HashWithIndifferentAccess
313
317
  def load_from_database(*args)
314
318
  super
315
- self.arguments = arguments.collect {|i| i.is_a?(BSON::OrderedHash) ? i.with_indifferent_access : i } if arguments.present?
319
+ if arguments.present?
320
+ self.arguments = arguments.collect { |i| i.is_a?(BSON::OrderedHash) ? i.with_indifferent_access : i }
321
+ end
322
+ end
323
+
324
+ # Set exception information for this job and fail it
325
+ def fail!(worker_name='user', exc_or_message='Job failed through user action')
326
+ if exc_or_message.is_a?(Exception)
327
+ self.exception = JobException.from_exception(exc_or_message)
328
+ exception.worker_name = worker_name
329
+ else
330
+ build_exception(
331
+ class_name: 'RocketJob::JobException',
332
+ message: exc_or_message,
333
+ backtrace: [],
334
+ worker_name: worker_name
335
+ )
336
+ end
337
+ # not available as #super
338
+ aasm.current_event = :fail!
339
+ aasm_fire_event(:fail, persist: true)
340
+ end
341
+
342
+ # Requeue this running job since the worker assigned to it has died
343
+ def requeue!(worker_name_=nil)
344
+ return false if worker_name_ && (worker_name != worker_name_)
345
+ # not available as #super
346
+ aasm.current_event = :requeue!
347
+ aasm_fire_event(:requeue, persist: true)
348
+ end
349
+
350
+ # Requeue this running job since the worker assigned to it has died
351
+ def requeue(worker_name_=nil)
352
+ return false if worker_name_ && (worker_name != worker_name_)
353
+ # not available as #super
354
+ aasm.current_event = :requeue
355
+ aasm_fire_event(:requeue, persist: false)
316
356
  end
317
357
 
318
358
  ############################################################################
@@ -330,8 +370,9 @@ module RocketJob
330
370
  end
331
371
 
332
372
  def before_fail
333
- self.completed_at = Time.now
334
- self.worker_name = nil
373
+ self.completed_at = Time.now
374
+ self.worker_name = nil
375
+ self.failure_count += 1
335
376
  end
336
377
 
337
378
  def before_retry
@@ -340,7 +381,7 @@ module RocketJob
340
381
 
341
382
  def before_pause
342
383
  self.completed_at = Time.now
343
- self.worker_name = nil
384
+ self.worker_name = nil
344
385
  end
345
386
 
346
387
  def before_resume
@@ -349,130 +390,12 @@ module RocketJob
349
390
 
350
391
  def before_abort
351
392
  self.completed_at = Time.now
352
- self.worker_name = nil
393
+ self.worker_name = nil
353
394
  end
354
395
 
355
- # Returns a human readable duration from the supplied [Float] number of seconds
356
- def seconds_as_duration(seconds)
357
- time = Time.at(seconds)
358
- if seconds >= 1.day
359
- "#{(seconds / 1.day).to_i}d #{time.strftime('%-Hh %-Mm %-Ss')}"
360
- elsif seconds >= 1.hour
361
- time.strftime('%-Hh %-Mm %-Ss')
362
- elsif seconds >= 1.minute
363
- time.strftime('%-Mm %-Ss')
364
- else
365
- time.strftime('%-Ss')
366
- end
367
- end
368
-
369
- # Returns the next job to work on in priority based order
370
- # Returns nil if there are currently no queued jobs, or processing batch jobs
371
- # with records that require processing
372
- #
373
- # Parameters
374
- # worker_name [String]
375
- # Name of the worker that will be processing this job
376
- #
377
- # skip_job_ids [Array<BSON::ObjectId>]
378
- # Job ids to exclude when looking for the next job
379
- #
380
- # Note:
381
- # If a job is in queued state it will be started
382
- def self.next_job(worker_name, skip_job_ids = nil)
383
- query = {
384
- '$and' => [
385
- {
386
- '$or' => [
387
- { 'state' => 'queued' }, # Jobs
388
- { 'state' => 'running', 'sub_state' => :processing } # Slices
389
- ]
390
- },
391
- {
392
- '$or' => [
393
- { run_at: { '$exists' => false } },
394
- { run_at: { '$lte' => Time.now } }
395
- ]
396
- },
397
- ]
398
- }
399
- query['_id'] = { '$nin' => skip_job_ids } if skip_job_ids && skip_job_ids.size > 0
400
-
401
- while doc = find_and_modify(
402
- query: query,
403
- sort: [['priority', 'asc'], ['created_at', 'asc']],
404
- update: { '$set' => { 'worker_name' => worker_name, 'state' => 'running' } }
405
- )
406
- job = load(doc)
407
- if job.running?
408
- return job
409
- else
410
- if job.expired?
411
- job.destroy
412
- logger.info "Destroyed expired job #{job.class.name}, id:#{job.id}"
413
- else
414
- # Also update in-memory state and run call-backs
415
- job.start
416
- job.set(started_at: job.started_at)
417
- return job
418
- end
419
- end
420
- end
421
- end
422
-
423
- ############################################################################
424
- private
425
-
426
- # Set exception information for this job
427
- def set_exception(worker_name, exc)
396
+ def before_requeue
397
+ self.started_at = nil
428
398
  self.worker_name = nil
429
- self.failure_count += 1
430
- self.exception = JobException.from_exception(exc)
431
- exception.worker_name = worker_name
432
- fail! unless failed?
433
- logger.error("Exception running #{self.class.name}##{perform_method}", exc)
434
- end
435
-
436
- # Calls a method on this job, if it is defined
437
- # Adds the event name to the method call if supplied
438
- #
439
- # Returns [Object] the result of calling the method
440
- #
441
- # Parameters
442
- # method [Symbol]
443
- # The method to call on this job
444
- #
445
- # arguments [Array]
446
- # Arguments to pass to the method call
447
- #
448
- # Options:
449
- # event: [Symbol]
450
- # Any one of: :before, :after
451
- # Default: None, just calls the method itself
452
- #
453
- # log_level: [Symbol]
454
- # Log level to apply to silence logging during the call
455
- # Default: nil ( no change )
456
- #
457
- def call_method(method, arguments, options={})
458
- options = options.dup
459
- event = options.delete(:event)
460
- log_level = options.delete(:log_level)
461
- raise(ArgumentError, "Unknown #{self.class.name}#call_method options: #{options.inspect}") if options.size > 0
462
-
463
- the_method = event.nil? ? method : "#{event}_#{method}".to_sym
464
- if respond_to?(the_method)
465
- method_name = "#{self.class.name}##{the_method}"
466
- logger.info "Start #{method_name}"
467
- logger.benchmark_info("Completed #{method_name}",
468
- metric: "rocketjob/#{self.class.name.underscore}/#{the_method}",
469
- log_exception: :full,
470
- on_exception_level: :error,
471
- silence: log_level
472
- ) do
473
- self.send(the_method, *arguments)
474
- end
475
- end
476
399
  end
477
400
 
478
401
  end
@@ -6,6 +6,7 @@ module RocketJob
6
6
  class JobException
7
7
  include MongoMapper::EmbeddedDocument
8
8
 
9
+ # @formatter:off
9
10
  # Name of the exception class
10
11
  key :class_name, String
11
12
 
@@ -13,7 +14,7 @@ module RocketJob
13
14
  key :message, String
14
15
 
15
16
  # Exception Backtrace [Array<String>]
16
- key :backtrace, Array
17
+ key :backtrace, Array, default: []
17
18
 
18
19
  # Name of the worker on which this exception occurred
19
20
  key :worker_name, String
@@ -21,15 +22,16 @@ module RocketJob
21
22
  # The record within which this exception occurred
22
23
  key :record_number, Integer
23
24
 
25
+ # @formatter:on
26
+
24
27
  # Returns [JobException] built from the supplied exception
25
28
  def self.from_exception(exc)
26
- self.new(
27
- class_name: exc.class.name,
28
- message: exc.message,
29
- backtrace: exc.backtrace || []
29
+ new(
30
+ class_name: exc.class.name,
31
+ message: exc.message,
32
+ backtrace: exc.backtrace || []
30
33
  )
31
34
  end
32
35
 
33
36
  end
34
37
  end
35
-
@@ -5,17 +5,21 @@ module RocketJob
5
5
  #
6
6
  # * The first time Dirmon runs it gathers the names of files in the monitored
7
7
  # folders.
8
- # * On completion Dirmon kicks off a new Dimon job passing it the list
8
+ # * On completion Dirmon kicks off a new Dirmon job passing it the list
9
9
  # of known files.
10
10
  # * On each subsequent Dirmon run it checks the size of each file against the
11
- # previous list of known files, and only of the file size has not changed
11
+ # previous list of known files, and only if the file size has not changed
12
12
  # the corresponding job is started for that file.
13
13
  # * If the job implements #file_store_upload or #upload, that method is called
14
14
  # and then the file is deleted, or moved to the archive_directory if supplied
15
+
15
16
  # * Otherwise, the file is moved to the supplied archive_directory (defaults to
16
17
  # `_archive` in the same folder as the file itself. The absolute path and
17
18
  # file name of the archived file is passed into the job as it's first argument.
18
- # Note: This means that such jobs _must_ have a Hash as the first agrument
19
+
20
+ # Note:
21
+ # - Jobs that do not implement #file_store_upload or #upload _must_ have a
22
+ # Hash as the first argument
19
23
  #
20
24
  # With RocketJob Pro, the file is automatically uploaded into the job itself
21
25
  # using the job's #upload method, after which the file is archived or deleted
@@ -25,35 +29,18 @@ module RocketJob
25
29
  #
26
30
  #
27
31
  # Note:
28
- # Do _not_ start multiple copies of Dirmon as it will result in duplicate
29
- # jobs being started.
32
+ # Use `DirmonJob.start` to prevent creating multiple Dirmon jobs, otherwise
33
+ # it will result in multiple jobs being started
30
34
  class DirmonJob < RocketJob::Job
31
- DEFAULT_ARCHIVE_DIR = '_archive'.freeze
35
+ # Only allow one DirmonJob instance to be running at a time
36
+ include RocketJob::Concerns::Singleton
32
37
 
33
38
  rocket_job do |job|
34
39
  job.priority = 40
35
40
  end
36
41
 
37
42
  # Number of seconds between directory scans. Default 5 mins
38
- key :check_seconds, Float, default: 300.0
39
-
40
- # TODO Make :perform_later, :perform_now, :perform, :now protected/private
41
- # class << self
42
- # # Ensure that only one instance of the job is running.
43
- # protected :perform_later, :perform_now, :perform, :now
44
- # end
45
- #self.send(:protected, :perform_later)
46
-
47
- # Start the single instance of this job
48
- # Returns true if the job was started
49
- # Returns false if the job is already running and doe not need to be started
50
- def self.start(&block)
51
- # Prevent multiple Dirmon Jobs from running at the same time
52
- return false if where(state: [ :running, :queued ]).count > 0
53
-
54
- perform_later({}, &block)
55
- true
56
- end
43
+ key :check_seconds, Float, default: 300.0
57
44
 
58
45
  # Iterate over each Dirmon entry looking for new files
59
46
  # If a new file is found, it is not processed immediately, instead
@@ -70,21 +57,19 @@ module RocketJob
70
57
  end
71
58
  end
72
59
 
60
+ protected
61
+
73
62
  # Checks the directories for new files, starting jobs if files have not changed
74
63
  # since the last run
75
64
  def check_directories(previous_file_names)
76
65
  new_file_names = {}
77
- DirmonEntry.where(enabled: true).each do |entry|
78
- logger.tagged("Entry:#{entry.id}") do
79
- Dir[entry.path].each do |file_name|
80
- next if File.directory?(file_name)
81
- next if file_name.include?(DEFAULT_ARCHIVE_DIR)
82
- # BSON Keys cannot contain periods
83
- key = file_name.gsub('.', '_')
84
- previous_size = previous_file_names[key]
85
- if size = check_file(entry, file_name, previous_size)
86
- new_file_names[key] = size
87
- end
66
+ DirmonEntry.where(state: :enabled).each do |entry|
67
+ entry.each do |pathname|
68
+ # BSON Keys cannot contain periods
69
+ key = pathname.to_s.gsub('.', '_')
70
+ previous_size = previous_file_names[key]
71
+ if (size = check_file(entry, pathname, previous_size))
72
+ new_file_names[key] = size
88
73
  end
89
74
  end
90
75
  end
@@ -93,14 +78,14 @@ module RocketJob
93
78
 
94
79
  # Checks if a file should result in starting a job
95
80
  # Returns [Integer] file size, or nil if the file started a job
96
- def check_file(entry, file_name, previous_size)
97
- size = File.size(file_name)
81
+ def check_file(entry, pathname, previous_size)
82
+ size = pathname.size
98
83
  if previous_size && (previous_size == size)
99
- logger.info("File stabilized: #{file_name}. Starting: #{entry.job_name}")
100
- start_job(entry, file_name)
84
+ logger.info("File stabilized: #{pathname}. Starting: #{entry.job_class_name}")
85
+ entry.later(pathname)
101
86
  nil
102
87
  else
103
- logger.info("Found file: #{file_name}. File size: #{size}")
88
+ logger.info("Found file: #{pathname}. File size: #{size}")
104
89
  # Keep for the next run
105
90
  size
106
91
  end
@@ -109,67 +94,6 @@ module RocketJob
109
94
  nil
110
95
  end
111
96
 
112
- # Starts the job for the supplied entry
113
- def start_job(entry, file_name)
114
- entry.job_class.perform_later(*entry.arguments) do |job|
115
- job.perform_method = entry.perform_method
116
- # Set properties
117
- entry.properties.each_pair { |k, v| job.send("#{k}=".to_sym, v) }
118
-
119
- upload_file(job, file_name, entry.archive_directory)
120
- end
121
- end
122
-
123
- # Upload the file to the job
124
- def upload_file(job, file_name, archive_directory)
125
- if job.respond_to?(:file_store_upload)
126
- # Allow the job to determine what to do with the file
127
- job.file_store_upload(file_name)
128
- archive_file(file_name, archive_directory)
129
- elsif job.respond_to?(:upload)
130
- # With RocketJob Pro the file can be uploaded directly into the Job itself
131
- job.upload(file_name)
132
- archive_file(file_name, archive_directory)
133
- else
134
- upload_default(job, file_name, archive_directory)
135
- end
136
- end
137
-
138
- # Archives the file for a job where there was no #file_store_upload or #upload method
139
- def upload_default(job, file_name, archive_directory)
140
- # The first argument must be a hash
141
- job.arguments << {} if job.arguments.size == 0
142
- # If no archive directory is supplied, use DEFAULT_ARCHIVE_DIR under the same path as the file
143
- archive_directory ||= File.join(File.dirname(file_name), DEFAULT_ARCHIVE_DIR)
144
- file_name = File.join(archive_directory, File.basename(file_name))
145
- job.arguments.first[:full_file_name] = File.absolute_path(file_name)
146
- archive_file(file_name, archive_directory)
147
- end
148
-
149
- # Move the file to the archive directory
150
- # Or, delete it if no archive directory was supplied for this entry
151
- #
152
- # If the file_name contains a relative path the relative path will be
153
- # created in the archive_directory before moving the file.
154
- #
155
- # If an absolute path is supplied, then the file is just moved into the
156
- # archive directory without any sub-directories
157
- def archive_file(file_name, archive_directory)
158
- # Move file to archive directory if set
159
- if archive_directory
160
- # Absolute path?
161
- target_file_name = if file_name.start_with?('/')
162
- File.join(archive_directory, File.basename(file_name))
163
- else
164
- File.join(archive_directory, file_name)
165
- end
166
- FileUtils.mkdir_p(File.dirname(target_file_name))
167
- FileUtils.move(file_name, target_file_name)
168
- else
169
- File.delete(file_name)
170
- end
171
- end
172
-
173
97
  end
174
98
  end
175
99
  end