rocketjob 1.3.0 → 2.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +201 -0
  3. data/README.md +15 -10
  4. data/bin/rocketjob +3 -1
  5. data/bin/rocketjob_perf +92 -0
  6. data/lib/rocket_job/cli.rb +71 -31
  7. data/lib/rocket_job/config.rb +21 -23
  8. data/lib/rocket_job/dirmon_entry.rb +63 -45
  9. data/lib/rocket_job/extensions/aasm.rb +56 -0
  10. data/lib/rocket_job/extensions/mongo.rb +23 -0
  11. data/lib/rocket_job/job.rb +9 -433
  12. data/lib/rocket_job/jobs/dirmon_job.rb +20 -20
  13. data/lib/rocket_job/jobs/simple_job.rb +12 -0
  14. data/lib/rocket_job/plugins/document.rb +69 -0
  15. data/lib/rocket_job/plugins/job/callbacks.rb +92 -0
  16. data/lib/rocket_job/plugins/job/defaults.rb +40 -0
  17. data/lib/rocket_job/plugins/job/logger.rb +36 -0
  18. data/lib/rocket_job/plugins/job/model.rb +288 -0
  19. data/lib/rocket_job/plugins/job/persistence.rb +167 -0
  20. data/lib/rocket_job/plugins/job/state_machine.rb +166 -0
  21. data/lib/rocket_job/plugins/job/worker.rb +167 -0
  22. data/lib/rocket_job/plugins/restart.rb +54 -0
  23. data/lib/rocket_job/plugins/singleton.rb +26 -0
  24. data/lib/rocket_job/plugins/state_machine.rb +105 -0
  25. data/lib/rocket_job/version.rb +1 -1
  26. data/lib/rocket_job/worker.rb +150 -119
  27. data/lib/rocketjob.rb +43 -21
  28. data/test/config_test.rb +12 -0
  29. data/test/dirmon_entry_test.rb +81 -85
  30. data/test/dirmon_job_test.rb +40 -28
  31. data/test/job_test.rb +14 -257
  32. data/test/plugins/job/callbacks_test.rb +163 -0
  33. data/test/plugins/job/defaults_test.rb +52 -0
  34. data/test/plugins/job/logger_test.rb +58 -0
  35. data/test/plugins/job/model_test.rb +97 -0
  36. data/test/plugins/job/persistence_test.rb +81 -0
  37. data/test/plugins/job/state_machine_test.rb +118 -0
  38. data/test/plugins/job/worker_test.rb +183 -0
  39. data/test/plugins/restart_test.rb +185 -0
  40. data/test/plugins/singleton_test.rb +94 -0
  41. data/test/plugins/state_machine_event_callbacks_test.rb +101 -0
  42. data/test/plugins/state_machine_test.rb +64 -0
  43. data/test/test_helper.rb +3 -36
  44. metadata +64 -19
  45. data/lib/rocket_job/concerns/singleton.rb +0 -33
  46. data/lib/rocket_job/concerns/worker.rb +0 -214
  47. data/test/files/_archive/archived.txt +0 -3
  48. data/test/job_worker_test.rb +0 -86
  49. data/test/jobs/test_job.rb +0 -46
  50. data/test/worker_test.rb +0 -97
@@ -1,440 +1,16 @@
1
1
  # encoding: UTF-8
2
- require 'aasm'
3
2
  module RocketJob
4
3
  # The base job from which all jobs are created
5
4
  class Job
6
- include MongoMapper::Document
7
- include AASM
8
5
  include SemanticLogger::Loggable
9
- include Concerns::Worker
10
-
11
- # Prevent data in MongoDB from re-defining the model behavior
12
- #self.static_keys = true
13
-
14
- #
15
- # User definable attributes
16
- #
17
- # The following attributes are set when the job is created
18
- # @formatter:off
19
-
20
- # Description for this job instance
21
- key :description, String
22
-
23
- # Method that must be invoked to complete this job
24
- key :perform_method, Symbol, default: :perform
25
-
26
- # Priority of this job as it relates to other jobs [1..100]
27
- # 1: Highest Priority
28
- # 50: Default Priority
29
- # 100: Lowest Priority
30
- #
31
- # Example:
32
- # A job with a priority of 40 will execute before a job with priority 50
33
- #
34
- # In RocketJob Pro, if a SlicedJob is running and a higher priority job
35
- # arrives, then the current job will complete the current slices and process
36
- # the new higher priority job
37
- key :priority, Integer, default: 50
38
-
39
- # Run this job no earlier than this time
40
- key :run_at, Time
41
-
42
- # If a job has not started by this time, destroy it
43
- key :expires_at, Time
44
-
45
- # When specified a job will be re-scheduled to run at it's next scheduled interval
46
- # Format is the same as cron.
47
- # #TODO Future capability.
48
- #key :schedule, String
49
-
50
- # When the job completes destroy it from both the database and the UI
51
- key :destroy_on_complete, Boolean, default: true
52
-
53
- # Any user supplied arguments for the method invocation
54
- # All keys must be UTF-8 strings. The values can be any valid BSON type:
55
- # Integer
56
- # Float
57
- # Time (UTC)
58
- # String (UTF-8)
59
- # Array
60
- # Hash
61
- # True
62
- # False
63
- # Symbol
64
- # nil
65
- # Regular Expression
66
- #
67
- # Note: Date is not supported, convert it to a UTC time
68
- key :arguments, Array
69
-
70
- # Whether to store the results from this job
71
- key :collect_output, Boolean, default: false
72
-
73
- # Raise or lower the log level when calling the job
74
- # Can be used to reduce log noise, especially during high volume calls
75
- # For debugging a single job can be logged at a low level such as :trace
76
- # Levels supported: :trace, :debug, :info, :warn, :error, :fatal
77
- key :log_level, Symbol
78
-
79
- #
80
- # Read-only attributes
81
- #
82
-
83
- # Current state, as set by AASM
84
- key :state, Symbol, default: :queued
85
-
86
- # When the job was created
87
- key :created_at, Time, default: -> { Time.now }
88
-
89
- # When processing started on this job
90
- key :started_at, Time
91
-
92
- # When the job completed processing
93
- key :completed_at, Time
94
-
95
- # Number of times that this job has failed to process
96
- key :failure_count, Integer, default: 0
97
-
98
- # This name of the worker that this job is being processed by, or was processed by
99
- key :worker_name, String
100
-
101
- #
102
- # Values that jobs can update during processing
103
- #
104
-
105
- # Allow a job to updates its estimated progress
106
- # Any integer from 0 to 100
107
- key :percent_complete, Integer, default: 0
108
-
109
- # Store the last exception for this job
110
- one :exception, class_name: 'RocketJob::JobException'
111
-
112
- # Store the Hash result from this job if collect_output is true,
113
- # and the job returned actually returned a Hash, otherwise nil
114
- # Not applicable to SlicedJob jobs, since its output is stored in a
115
- # separate collection
116
- key :result, Hash
117
-
118
- # Store all job types in this collection
119
- set_collection_name 'rocket_job.jobs'
120
-
121
- validates_presence_of :state, :failure_count, :created_at, :perform_method
122
- validates :priority, inclusion: 1..100
123
- validates :log_level, inclusion: SemanticLogger::LEVELS + [nil]
124
-
125
- # User definable properties in Dirmon Entry
126
- def self.rocket_job_properties
127
- @rocket_job_properties ||= (self == RocketJob::Job ? [] : superclass.rocket_job_properties)
128
- end
129
-
130
- # Add to user definable properties in Dirmon Entry
131
- def self.public_rocket_job_properties(*properties)
132
- rocket_job_properties.concat(properties).uniq!
133
- end
134
-
135
- # User definable properties in Dirmon Entry
136
- public_rocket_job_properties :description, :priority, :perform_method, :log_level, :arguments
137
-
138
- # State Machine events and transitions
139
- #
140
- # :queued -> :running -> :completed
141
- # -> :paused -> :running
142
- # -> :aborted
143
- # -> :failed -> :running
144
- # -> :aborted
145
- # -> :aborted
146
- # -> :queued (when a worker dies)
147
- # -> :aborted
148
- aasm column: :state do
149
- # Job has been created and is queued for processing ( Initial state )
150
- state :queued, initial: true
151
-
152
- # Job is running
153
- state :running
154
-
155
- # Job has completed processing ( End state )
156
- state :completed
157
-
158
- # Job is temporarily paused and no further processing will be completed
159
- # until this job has been resumed
160
- state :paused
161
-
162
- # Job failed to process and needs to be manually re-tried or aborted
163
- state :failed
164
-
165
- # Job was aborted and cannot be resumed ( End state )
166
- state :aborted
167
-
168
- event :start, before: :before_start do
169
- transitions from: :queued, to: :running
170
- end
171
-
172
- event :complete, before: :before_complete do
173
- after do
174
- destroy if destroy_on_complete
175
- end
176
- transitions from: :running, to: :completed
177
- end
178
-
179
- event :fail, before: :before_fail do
180
- transitions from: :queued, to: :failed
181
- transitions from: :running, to: :failed
182
- transitions from: :paused, to: :failed
183
- end
184
-
185
- event :retry, before: :before_retry do
186
- transitions from: :failed, to: :queued
187
- end
188
-
189
- event :pause, before: :before_pause do
190
- transitions from: :running, to: :paused
191
- end
192
-
193
- event :resume, before: :before_resume do
194
- transitions from: :paused, to: :running
195
- end
196
-
197
- event :abort, before: :before_abort do
198
- transitions from: :running, to: :aborted
199
- transitions from: :queued, to: :aborted
200
- transitions from: :failed, to: :aborted
201
- transitions from: :paused, to: :aborted
202
- end
203
-
204
- event :requeue, before: :before_requeue do
205
- transitions from: :running, to: :queued
206
- end
207
- end
208
- # @formatter:on
209
-
210
- # Create indexes
211
- def self.create_indexes
212
- # Used by find_and_modify in .next_job
213
- ensure_index({state: 1, run_at: 1, priority: 1, created_at: 1, sub_state: 1}, background: true)
214
- # Remove outdated index if present
215
- drop_index('state_1_priority_1_created_at_1_sub_state_1') rescue nil
216
- # Used by Mission Control
217
- ensure_index [[:created_at, 1]]
218
- end
219
-
220
- # Requeues all jobs that were running on worker that died
221
- def self.requeue_dead_worker(worker_name)
222
- running.each { |job| job.requeue!(worker_name) }
223
- end
224
-
225
- # Pause all running jobs
226
- def self.pause_all
227
- running.each(&:pause!)
228
- end
229
-
230
- # Resume all paused jobs
231
- def self.resume_all
232
- paused.each(&:resume!)
233
- end
234
-
235
- # Returns the number of required arguments for this job
236
- def self.argument_count(method = :perform)
237
- instance_method(method).arity
238
- end
239
-
240
- # Override parent defaults
241
- def self.rocket_job(&block)
242
- @rocket_job_defaults = block
243
- self
244
- end
245
-
246
- # Returns [true|false] whether to collect the results from running this batch
247
- def collect_output?
248
- collect_output == true
249
- end
250
-
251
- # Returns [Float] the number of seconds the job has taken
252
- # - Elapsed seconds to process the job from when a worker first started working on it
253
- # until now if still running, or until it was completed
254
- # - Seconds in the queue if queued
255
- def seconds
256
- if completed_at
257
- completed_at - (started_at || created_at)
258
- elsif started_at
259
- Time.now - started_at
260
- else
261
- Time.now - created_at
262
- end
263
- end
264
-
265
- # Returns a human readable duration the job has taken
266
- def duration
267
- RocketJob.seconds_as_duration(seconds)
268
- end
269
-
270
- # A job has expired if the expiry time has passed before it is started
271
- def expired?
272
- started_at.nil? && expires_at && (expires_at < Time.now)
273
- end
274
-
275
- # Returns [Hash] status of this job
276
- def as_json
277
- attrs = serializable_hash(methods: [:seconds, :duration])
278
- attrs.delete('result') unless collect_output?
279
- case
280
- when running?
281
- attrs.delete('completed_at')
282
- attrs.delete('result')
283
- attrs
284
- when paused?
285
- attrs.delete('completed_at')
286
- attrs.delete('result')
287
- # Ensure 'paused_at' appears first in the hash
288
- {'paused_at' => completed_at}.merge(attrs)
289
- when aborted?
290
- attrs.delete('completed_at')
291
- attrs.delete('result')
292
- {'aborted_at' => completed_at}.merge(attrs)
293
- when failed?
294
- attrs.delete('completed_at')
295
- attrs.delete('result')
296
- {'failed_at' => completed_at}.merge(attrs)
297
- else
298
- attrs
299
- end
300
- end
301
-
302
- def status(time_zone = 'Eastern Time (US & Canada)')
303
- h = as_json
304
- h.delete('seconds')
305
- h.delete('perform_method') if h['perform_method'] == :perform
306
- h.dup.each_pair do |k, v|
307
- case
308
- when v.is_a?(Time)
309
- h[k] = v.in_time_zone(time_zone).to_s
310
- when v.is_a?(BSON::ObjectId)
311
- h[k] = v.to_s
312
- end
313
- end
314
- h
315
- end
316
-
317
- # Patch the way MongoMapper reloads a model
318
- # Only reload MongoMapper attributes, leaving other instance variables untouched
319
- def reload
320
- if (doc = collection.find_one(_id: id))
321
- # Clear out keys that are not returned during the reload from MongoDB
322
- (keys.keys - doc.keys).each { |key| send("#{key}=", nil) }
323
- initialize_default_values
324
- load_from_database(doc)
325
- self
326
- else
327
- if destroy_on_complete
328
- self.state = :completed
329
- before_complete
330
- else
331
- raise(MongoMapper::DocumentNotFound, "Document match #{_id.inspect} does not exist in #{collection.name} collection")
332
- end
333
- end
334
- end
335
-
336
- # Set exception information for this job and fail it
337
- def fail(worker_name='user', exc_or_message='Job failed through user action')
338
- if exc_or_message.is_a?(Exception)
339
- self.exception = JobException.from_exception(exc_or_message)
340
- exception.worker_name = worker_name
341
- else
342
- build_exception(
343
- class_name: 'RocketJob::JobException',
344
- message: exc_or_message,
345
- backtrace: [],
346
- worker_name: worker_name
347
- )
348
- end
349
- # not available as #super
350
- aasm.current_event = :fail
351
- aasm_fire_event(:fail, persist: false)
352
- end
353
-
354
- def fail!(worker_name='user', exc_or_message='Job failed through user action')
355
- self.fail(worker_name, exc_or_message)
356
- save!
357
- end
358
-
359
- # Requeue this running job since the worker assigned to it has died
360
- def requeue!(worker_name_=nil)
361
- return false if worker_name_ && (worker_name != worker_name_)
362
- # not available as #super
363
- aasm.current_event = :requeue!
364
- aasm_fire_event(:requeue, persist: true)
365
- end
366
-
367
- # Requeue this running job since the worker assigned to it has died
368
- def requeue(worker_name_=nil)
369
- return false if worker_name_ && (worker_name != worker_name_)
370
- # not available as #super
371
- aasm.current_event = :requeue
372
- aasm_fire_event(:requeue, persist: false)
373
- end
374
-
375
- protected
376
-
377
- # Before events that can be overridden by child classes
378
- def before_start
379
- self.started_at = Time.now
380
- end
381
-
382
- def before_complete
383
- self.percent_complete = 100
384
- self.completed_at = Time.now
385
- self.worker_name = nil
386
- end
387
-
388
- def before_fail
389
- self.completed_at = Time.now
390
- self.worker_name = nil
391
- self.failure_count += 1
392
- end
393
-
394
- def before_retry
395
- self.completed_at = nil
396
- self.exception = nil
397
- end
398
-
399
- def before_pause
400
- self.completed_at = Time.now
401
- self.worker_name = nil
402
- end
403
-
404
- def before_resume
405
- self.completed_at = nil
406
- end
407
-
408
- def before_abort
409
- self.completed_at = Time.now
410
- self.worker_name = nil
411
- end
412
-
413
- def before_requeue
414
- self.started_at = nil
415
- self.worker_name = nil
416
- end
417
-
418
- private
419
-
420
- # After this model is loaded, convert any hashes in the arguments list to HashWithIndifferentAccess
421
- def load_from_database(*args)
422
- super
423
- if arguments.present?
424
- self.arguments = arguments.collect { |i| i.is_a?(BSON::OrderedHash) ? i.with_indifferent_access : i }
425
- end
426
- end
427
-
428
- def self.apply_defaults(job)
429
- @rocket_job_defaults.call(job) if @rocket_job_defaults
430
- end
431
-
432
- # Apply RocketJob defaults after initializing default values
433
- # but before setting attributes
434
- def initialize_default_values(except = {})
435
- super
436
- self.class.apply_defaults(self)
437
- end
438
-
6
+ include Plugins::Document
7
+ include Plugins::Job::Model
8
+ include Plugins::Job::Persistence
9
+ include Plugins::Job::Callbacks
10
+ include Plugins::Job::Logger
11
+ include Plugins::StateMachine
12
+ include Plugins::Job::StateMachine
13
+ include Plugins::Job::Worker
14
+ include Plugins::Job::Defaults
439
15
  end
440
16
  end
@@ -10,7 +10,7 @@ module RocketJob
10
10
  # * On each subsequent Dirmon run it checks the size of each file against the
11
11
  # previous list of known files, and only if the file size has not changed
12
12
  # the corresponding job is started for that file.
13
- # * If the job implements #file_store_upload or #upload, that method is called
13
+ # * If the job implements #upload, that method is called
14
14
  # and then the file is deleted, or moved to the archive_directory if supplied
15
15
 
16
16
  # * Otherwise, the file is moved to the supplied archive_directory (defaults to
@@ -18,22 +18,26 @@ module RocketJob
18
18
  # file name of the archived file is passed into the job as it's first argument.
19
19
 
20
20
  # Note:
21
- # - Jobs that do not implement #file_store_upload or #upload _must_ have a
22
- # Hash as the first argument
21
+ # - Jobs that do not implement #upload _must_ have a Hash as the first argument
23
22
  #
24
23
  # With RocketJob Pro, the file is automatically uploaded into the job itself
25
24
  # using the job's #upload method, after which the file is archived or deleted
26
25
  # if no archive_directory was specified in the DirmonEntry.
27
26
  #
28
27
  # To start Dirmon for the first time
28
+ # RocketJob::Jobs::DirmonJob.create!
29
29
  #
30
+ # If another DirmonJob instance is already queued or running, then the create
31
+ # above will fail with:
32
+ # MongoMapper::DocumentNotValid: Validation failed: State Another instance of this job is already queued or running
30
33
  #
31
- # Note:
32
- # Use `DirmonJob.start` to prevent creating multiple Dirmon jobs, otherwise
33
- # it will result in multiple jobs being started
34
+ # Or to start DirmonJob and ignore errors if already running
35
+ # RocketJob::Jobs::DirmonJob.create
34
36
  class DirmonJob < RocketJob::Job
35
37
  # Only allow one DirmonJob instance to be running at a time
36
- include RocketJob::Concerns::Singleton
38
+ include RocketJob::Plugins::Singleton
39
+ # Start a new job when this one completes, fails, or aborts
40
+ include RocketJob::Plugins::Restart
37
41
 
38
42
  rocket_job do |job|
39
43
  job.priority = 40
@@ -43,23 +47,22 @@ module RocketJob
43
47
  key :check_seconds, Float, default: 300.0
44
48
  key :previous_file_names, Hash # Hash[file_name, size]
45
49
 
50
+ before_create :set_run_at
51
+
46
52
  # Iterate over each Dirmon entry looking for new files
47
53
  # If a new file is found, it is not processed immediately, instead
48
54
  # it is passed to the next run of this job along with the file size.
49
55
  # If the file size has not changed, the Job is kicked off.
50
56
  def perform
51
57
  check_directories
52
- ensure
53
- # Run again in the future, even if this run fails with an exception
54
- self.class.create!(
55
- previous_file_names: previous_file_names,
56
- priority: priority,
57
- check_seconds: check_seconds,
58
- run_at: Time.now + check_seconds
59
- )
60
58
  end
61
59
 
62
- protected
60
+ private
61
+
62
+ # Set a run_at when a new instance of this job is created
63
+ def set_run_at
64
+ self.run_at = Time.now + check_seconds
65
+ end
63
66
 
64
67
  # Checks the directories for new files, starting jobs if files have not changed
65
68
  # since the last run
@@ -70,7 +73,7 @@ module RocketJob
70
73
  # BSON Keys cannot contain periods
71
74
  key = pathname.to_s.gsub('.', '_')
72
75
  previous_size = previous_file_names[key]
73
- if (size = check_file(entry, pathname, previous_size))
76
+ if size = check_file(entry, pathname, previous_size)
74
77
  new_file_names[key] = size
75
78
  end
76
79
  end
@@ -91,9 +94,6 @@ module RocketJob
91
94
  # Keep for the next run
92
95
  size
93
96
  end
94
- rescue Errno::ENOENT => exc
95
- # File may have been deleted since the scan was performed
96
- nil
97
97
  end
98
98
 
99
99
  end
@@ -0,0 +1,12 @@
1
+ module RocketJob
2
+ module Jobs
3
+
4
+ class SimpleJob < RocketJob::Job
5
+ # No operation, used for performance testing
6
+ def perform
7
+ sleep 1
8
+ end
9
+ end
10
+
11
+ end
12
+ end
@@ -0,0 +1,69 @@
1
+ # encoding: UTF-8
2
+ require 'active_support/concern'
3
+ require 'mongo'
4
+ require 'mongo_ha'
5
+ require 'mongo_mapper'
6
+
7
+ module RocketJob
8
+ module Plugins
9
+ # Prevent more than one instance of this job class from running at a time
10
+ module Document
11
+ extend ActiveSupport::Concern
12
+ include MongoMapper::Document
13
+
14
+ included do
15
+ # Add after_initialize & after_find callbacks
16
+ define_model_callbacks :initialize, :find, :only => [:after]
17
+
18
+ # Prevent data in MongoDB from re-defining the model behavior
19
+ #self.static_keys = true
20
+
21
+ # Turn off embedded callbacks. Slow and not used for Jobs
22
+ embedded_callbacks_off
23
+ end
24
+
25
+ # Patch the way MongoMapper reloads a model
26
+ def reload
27
+ if doc = collection.find_one(:_id => id)
28
+ # Clear out keys that are not returned during the reload from MongoDB
29
+ (keys.keys - doc.keys).each { |key| send("#{key}=", nil) }
30
+ initialize_default_values
31
+ load_from_database(doc)
32
+ self
33
+ else
34
+ raise MongoMapper::DocumentNotFound, "Document match #{_id.inspect} does not exist in #{collection.name} collection"
35
+ end
36
+ end
37
+
38
+ # Add after_initialize callbacks
39
+ # TODO: Remove after new MongoMapper gem is released
40
+ # Also remove define_model_callbacks above
41
+ def initialize(*)
42
+ run_callbacks(:initialize) { super }
43
+ end
44
+
45
+ def initialize_from_database(*)
46
+ run_callbacks(:initialize) do
47
+ run_callbacks(:find) do
48
+ super
49
+ end
50
+ end
51
+ end
52
+
53
+ private
54
+
55
+ def update_attributes_and_reload(attrs)
56
+ if doc = collection.find_and_modify(query: {:_id => id}, update: {'$set' => attrs})
57
+ # Clear out keys that are not returned during the reload from MongoDB
58
+ (keys.keys - doc.keys).each { |key| send("#{key}=", nil) }
59
+ initialize_default_values
60
+ load_from_database(doc)
61
+ self
62
+ else
63
+ raise MongoMapper::DocumentNotFound, "Document match #{_id.inspect} does not exist in #{collection.name} collection"
64
+ end
65
+ end
66
+
67
+ end
68
+ end
69
+ end