rocketjob 1.3.0 → 2.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +201 -0
  3. data/README.md +15 -10
  4. data/bin/rocketjob +3 -1
  5. data/bin/rocketjob_perf +92 -0
  6. data/lib/rocket_job/cli.rb +71 -31
  7. data/lib/rocket_job/config.rb +21 -23
  8. data/lib/rocket_job/dirmon_entry.rb +63 -45
  9. data/lib/rocket_job/extensions/aasm.rb +56 -0
  10. data/lib/rocket_job/extensions/mongo.rb +23 -0
  11. data/lib/rocket_job/job.rb +9 -433
  12. data/lib/rocket_job/jobs/dirmon_job.rb +20 -20
  13. data/lib/rocket_job/jobs/simple_job.rb +12 -0
  14. data/lib/rocket_job/plugins/document.rb +69 -0
  15. data/lib/rocket_job/plugins/job/callbacks.rb +92 -0
  16. data/lib/rocket_job/plugins/job/defaults.rb +40 -0
  17. data/lib/rocket_job/plugins/job/logger.rb +36 -0
  18. data/lib/rocket_job/plugins/job/model.rb +288 -0
  19. data/lib/rocket_job/plugins/job/persistence.rb +167 -0
  20. data/lib/rocket_job/plugins/job/state_machine.rb +166 -0
  21. data/lib/rocket_job/plugins/job/worker.rb +167 -0
  22. data/lib/rocket_job/plugins/restart.rb +54 -0
  23. data/lib/rocket_job/plugins/singleton.rb +26 -0
  24. data/lib/rocket_job/plugins/state_machine.rb +105 -0
  25. data/lib/rocket_job/version.rb +1 -1
  26. data/lib/rocket_job/worker.rb +150 -119
  27. data/lib/rocketjob.rb +43 -21
  28. data/test/config_test.rb +12 -0
  29. data/test/dirmon_entry_test.rb +81 -85
  30. data/test/dirmon_job_test.rb +40 -28
  31. data/test/job_test.rb +14 -257
  32. data/test/plugins/job/callbacks_test.rb +163 -0
  33. data/test/plugins/job/defaults_test.rb +52 -0
  34. data/test/plugins/job/logger_test.rb +58 -0
  35. data/test/plugins/job/model_test.rb +97 -0
  36. data/test/plugins/job/persistence_test.rb +81 -0
  37. data/test/plugins/job/state_machine_test.rb +118 -0
  38. data/test/plugins/job/worker_test.rb +183 -0
  39. data/test/plugins/restart_test.rb +185 -0
  40. data/test/plugins/singleton_test.rb +94 -0
  41. data/test/plugins/state_machine_event_callbacks_test.rb +101 -0
  42. data/test/plugins/state_machine_test.rb +64 -0
  43. data/test/test_helper.rb +3 -36
  44. metadata +64 -19
  45. data/lib/rocket_job/concerns/singleton.rb +0 -33
  46. data/lib/rocket_job/concerns/worker.rb +0 -214
  47. data/test/files/_archive/archived.txt +0 -3
  48. data/test/job_worker_test.rb +0 -86
  49. data/test/jobs/test_job.rb +0 -46
  50. data/test/worker_test.rb +0 -97
@@ -1,440 +1,16 @@
1
1
  # encoding: UTF-8
2
- require 'aasm'
3
2
  module RocketJob
4
3
  # The base job from which all jobs are created
5
4
  class Job
6
- include MongoMapper::Document
7
- include AASM
8
5
  include SemanticLogger::Loggable
9
- include Concerns::Worker
10
-
11
- # Prevent data in MongoDB from re-defining the model behavior
12
- #self.static_keys = true
13
-
14
- #
15
- # User definable attributes
16
- #
17
- # The following attributes are set when the job is created
18
- # @formatter:off
19
-
20
- # Description for this job instance
21
- key :description, String
22
-
23
- # Method that must be invoked to complete this job
24
- key :perform_method, Symbol, default: :perform
25
-
26
- # Priority of this job as it relates to other jobs [1..100]
27
- # 1: Highest Priority
28
- # 50: Default Priority
29
- # 100: Lowest Priority
30
- #
31
- # Example:
32
- # A job with a priority of 40 will execute before a job with priority 50
33
- #
34
- # In RocketJob Pro, if a SlicedJob is running and a higher priority job
35
- # arrives, then the current job will complete the current slices and process
36
- # the new higher priority job
37
- key :priority, Integer, default: 50
38
-
39
- # Run this job no earlier than this time
40
- key :run_at, Time
41
-
42
- # If a job has not started by this time, destroy it
43
- key :expires_at, Time
44
-
45
- # When specified a job will be re-scheduled to run at it's next scheduled interval
46
- # Format is the same as cron.
47
- # #TODO Future capability.
48
- #key :schedule, String
49
-
50
- # When the job completes destroy it from both the database and the UI
51
- key :destroy_on_complete, Boolean, default: true
52
-
53
- # Any user supplied arguments for the method invocation
54
- # All keys must be UTF-8 strings. The values can be any valid BSON type:
55
- # Integer
56
- # Float
57
- # Time (UTC)
58
- # String (UTF-8)
59
- # Array
60
- # Hash
61
- # True
62
- # False
63
- # Symbol
64
- # nil
65
- # Regular Expression
66
- #
67
- # Note: Date is not supported, convert it to a UTC time
68
- key :arguments, Array
69
-
70
- # Whether to store the results from this job
71
- key :collect_output, Boolean, default: false
72
-
73
- # Raise or lower the log level when calling the job
74
- # Can be used to reduce log noise, especially during high volume calls
75
- # For debugging a single job can be logged at a low level such as :trace
76
- # Levels supported: :trace, :debug, :info, :warn, :error, :fatal
77
- key :log_level, Symbol
78
-
79
- #
80
- # Read-only attributes
81
- #
82
-
83
- # Current state, as set by AASM
84
- key :state, Symbol, default: :queued
85
-
86
- # When the job was created
87
- key :created_at, Time, default: -> { Time.now }
88
-
89
- # When processing started on this job
90
- key :started_at, Time
91
-
92
- # When the job completed processing
93
- key :completed_at, Time
94
-
95
- # Number of times that this job has failed to process
96
- key :failure_count, Integer, default: 0
97
-
98
- # This name of the worker that this job is being processed by, or was processed by
99
- key :worker_name, String
100
-
101
- #
102
- # Values that jobs can update during processing
103
- #
104
-
105
- # Allow a job to updates its estimated progress
106
- # Any integer from 0 to 100
107
- key :percent_complete, Integer, default: 0
108
-
109
- # Store the last exception for this job
110
- one :exception, class_name: 'RocketJob::JobException'
111
-
112
- # Store the Hash result from this job if collect_output is true,
113
- # and the job returned actually returned a Hash, otherwise nil
114
- # Not applicable to SlicedJob jobs, since its output is stored in a
115
- # separate collection
116
- key :result, Hash
117
-
118
- # Store all job types in this collection
119
- set_collection_name 'rocket_job.jobs'
120
-
121
- validates_presence_of :state, :failure_count, :created_at, :perform_method
122
- validates :priority, inclusion: 1..100
123
- validates :log_level, inclusion: SemanticLogger::LEVELS + [nil]
124
-
125
- # User definable properties in Dirmon Entry
126
- def self.rocket_job_properties
127
- @rocket_job_properties ||= (self == RocketJob::Job ? [] : superclass.rocket_job_properties)
128
- end
129
-
130
- # Add to user definable properties in Dirmon Entry
131
- def self.public_rocket_job_properties(*properties)
132
- rocket_job_properties.concat(properties).uniq!
133
- end
134
-
135
- # User definable properties in Dirmon Entry
136
- public_rocket_job_properties :description, :priority, :perform_method, :log_level, :arguments
137
-
138
- # State Machine events and transitions
139
- #
140
- # :queued -> :running -> :completed
141
- # -> :paused -> :running
142
- # -> :aborted
143
- # -> :failed -> :running
144
- # -> :aborted
145
- # -> :aborted
146
- # -> :queued (when a worker dies)
147
- # -> :aborted
148
- aasm column: :state do
149
- # Job has been created and is queued for processing ( Initial state )
150
- state :queued, initial: true
151
-
152
- # Job is running
153
- state :running
154
-
155
- # Job has completed processing ( End state )
156
- state :completed
157
-
158
- # Job is temporarily paused and no further processing will be completed
159
- # until this job has been resumed
160
- state :paused
161
-
162
- # Job failed to process and needs to be manually re-tried or aborted
163
- state :failed
164
-
165
- # Job was aborted and cannot be resumed ( End state )
166
- state :aborted
167
-
168
- event :start, before: :before_start do
169
- transitions from: :queued, to: :running
170
- end
171
-
172
- event :complete, before: :before_complete do
173
- after do
174
- destroy if destroy_on_complete
175
- end
176
- transitions from: :running, to: :completed
177
- end
178
-
179
- event :fail, before: :before_fail do
180
- transitions from: :queued, to: :failed
181
- transitions from: :running, to: :failed
182
- transitions from: :paused, to: :failed
183
- end
184
-
185
- event :retry, before: :before_retry do
186
- transitions from: :failed, to: :queued
187
- end
188
-
189
- event :pause, before: :before_pause do
190
- transitions from: :running, to: :paused
191
- end
192
-
193
- event :resume, before: :before_resume do
194
- transitions from: :paused, to: :running
195
- end
196
-
197
- event :abort, before: :before_abort do
198
- transitions from: :running, to: :aborted
199
- transitions from: :queued, to: :aborted
200
- transitions from: :failed, to: :aborted
201
- transitions from: :paused, to: :aborted
202
- end
203
-
204
- event :requeue, before: :before_requeue do
205
- transitions from: :running, to: :queued
206
- end
207
- end
208
- # @formatter:on
209
-
210
- # Create indexes
211
- def self.create_indexes
212
- # Used by find_and_modify in .next_job
213
- ensure_index({state: 1, run_at: 1, priority: 1, created_at: 1, sub_state: 1}, background: true)
214
- # Remove outdated index if present
215
- drop_index('state_1_priority_1_created_at_1_sub_state_1') rescue nil
216
- # Used by Mission Control
217
- ensure_index [[:created_at, 1]]
218
- end
219
-
220
- # Requeues all jobs that were running on worker that died
221
- def self.requeue_dead_worker(worker_name)
222
- running.each { |job| job.requeue!(worker_name) }
223
- end
224
-
225
- # Pause all running jobs
226
- def self.pause_all
227
- running.each(&:pause!)
228
- end
229
-
230
- # Resume all paused jobs
231
- def self.resume_all
232
- paused.each(&:resume!)
233
- end
234
-
235
- # Returns the number of required arguments for this job
236
- def self.argument_count(method = :perform)
237
- instance_method(method).arity
238
- end
239
-
240
- # Override parent defaults
241
- def self.rocket_job(&block)
242
- @rocket_job_defaults = block
243
- self
244
- end
245
-
246
- # Returns [true|false] whether to collect the results from running this batch
247
- def collect_output?
248
- collect_output == true
249
- end
250
-
251
- # Returns [Float] the number of seconds the job has taken
252
- # - Elapsed seconds to process the job from when a worker first started working on it
253
- # until now if still running, or until it was completed
254
- # - Seconds in the queue if queued
255
- def seconds
256
- if completed_at
257
- completed_at - (started_at || created_at)
258
- elsif started_at
259
- Time.now - started_at
260
- else
261
- Time.now - created_at
262
- end
263
- end
264
-
265
- # Returns a human readable duration the job has taken
266
- def duration
267
- RocketJob.seconds_as_duration(seconds)
268
- end
269
-
270
- # A job has expired if the expiry time has passed before it is started
271
- def expired?
272
- started_at.nil? && expires_at && (expires_at < Time.now)
273
- end
274
-
275
- # Returns [Hash] status of this job
276
- def as_json
277
- attrs = serializable_hash(methods: [:seconds, :duration])
278
- attrs.delete('result') unless collect_output?
279
- case
280
- when running?
281
- attrs.delete('completed_at')
282
- attrs.delete('result')
283
- attrs
284
- when paused?
285
- attrs.delete('completed_at')
286
- attrs.delete('result')
287
- # Ensure 'paused_at' appears first in the hash
288
- {'paused_at' => completed_at}.merge(attrs)
289
- when aborted?
290
- attrs.delete('completed_at')
291
- attrs.delete('result')
292
- {'aborted_at' => completed_at}.merge(attrs)
293
- when failed?
294
- attrs.delete('completed_at')
295
- attrs.delete('result')
296
- {'failed_at' => completed_at}.merge(attrs)
297
- else
298
- attrs
299
- end
300
- end
301
-
302
- def status(time_zone = 'Eastern Time (US & Canada)')
303
- h = as_json
304
- h.delete('seconds')
305
- h.delete('perform_method') if h['perform_method'] == :perform
306
- h.dup.each_pair do |k, v|
307
- case
308
- when v.is_a?(Time)
309
- h[k] = v.in_time_zone(time_zone).to_s
310
- when v.is_a?(BSON::ObjectId)
311
- h[k] = v.to_s
312
- end
313
- end
314
- h
315
- end
316
-
317
- # Patch the way MongoMapper reloads a model
318
- # Only reload MongoMapper attributes, leaving other instance variables untouched
319
- def reload
320
- if (doc = collection.find_one(_id: id))
321
- # Clear out keys that are not returned during the reload from MongoDB
322
- (keys.keys - doc.keys).each { |key| send("#{key}=", nil) }
323
- initialize_default_values
324
- load_from_database(doc)
325
- self
326
- else
327
- if destroy_on_complete
328
- self.state = :completed
329
- before_complete
330
- else
331
- raise(MongoMapper::DocumentNotFound, "Document match #{_id.inspect} does not exist in #{collection.name} collection")
332
- end
333
- end
334
- end
335
-
336
- # Set exception information for this job and fail it
337
- def fail(worker_name='user', exc_or_message='Job failed through user action')
338
- if exc_or_message.is_a?(Exception)
339
- self.exception = JobException.from_exception(exc_or_message)
340
- exception.worker_name = worker_name
341
- else
342
- build_exception(
343
- class_name: 'RocketJob::JobException',
344
- message: exc_or_message,
345
- backtrace: [],
346
- worker_name: worker_name
347
- )
348
- end
349
- # not available as #super
350
- aasm.current_event = :fail
351
- aasm_fire_event(:fail, persist: false)
352
- end
353
-
354
- def fail!(worker_name='user', exc_or_message='Job failed through user action')
355
- self.fail(worker_name, exc_or_message)
356
- save!
357
- end
358
-
359
- # Requeue this running job since the worker assigned to it has died
360
- def requeue!(worker_name_=nil)
361
- return false if worker_name_ && (worker_name != worker_name_)
362
- # not available as #super
363
- aasm.current_event = :requeue!
364
- aasm_fire_event(:requeue, persist: true)
365
- end
366
-
367
- # Requeue this running job since the worker assigned to it has died
368
- def requeue(worker_name_=nil)
369
- return false if worker_name_ && (worker_name != worker_name_)
370
- # not available as #super
371
- aasm.current_event = :requeue
372
- aasm_fire_event(:requeue, persist: false)
373
- end
374
-
375
- protected
376
-
377
- # Before events that can be overridden by child classes
378
- def before_start
379
- self.started_at = Time.now
380
- end
381
-
382
- def before_complete
383
- self.percent_complete = 100
384
- self.completed_at = Time.now
385
- self.worker_name = nil
386
- end
387
-
388
- def before_fail
389
- self.completed_at = Time.now
390
- self.worker_name = nil
391
- self.failure_count += 1
392
- end
393
-
394
- def before_retry
395
- self.completed_at = nil
396
- self.exception = nil
397
- end
398
-
399
- def before_pause
400
- self.completed_at = Time.now
401
- self.worker_name = nil
402
- end
403
-
404
- def before_resume
405
- self.completed_at = nil
406
- end
407
-
408
- def before_abort
409
- self.completed_at = Time.now
410
- self.worker_name = nil
411
- end
412
-
413
- def before_requeue
414
- self.started_at = nil
415
- self.worker_name = nil
416
- end
417
-
418
- private
419
-
420
- # After this model is loaded, convert any hashes in the arguments list to HashWithIndifferentAccess
421
- def load_from_database(*args)
422
- super
423
- if arguments.present?
424
- self.arguments = arguments.collect { |i| i.is_a?(BSON::OrderedHash) ? i.with_indifferent_access : i }
425
- end
426
- end
427
-
428
- def self.apply_defaults(job)
429
- @rocket_job_defaults.call(job) if @rocket_job_defaults
430
- end
431
-
432
- # Apply RocketJob defaults after initializing default values
433
- # but before setting attributes
434
- def initialize_default_values(except = {})
435
- super
436
- self.class.apply_defaults(self)
437
- end
438
-
6
+ include Plugins::Document
7
+ include Plugins::Job::Model
8
+ include Plugins::Job::Persistence
9
+ include Plugins::Job::Callbacks
10
+ include Plugins::Job::Logger
11
+ include Plugins::StateMachine
12
+ include Plugins::Job::StateMachine
13
+ include Plugins::Job::Worker
14
+ include Plugins::Job::Defaults
439
15
  end
440
16
  end
@@ -10,7 +10,7 @@ module RocketJob
10
10
  # * On each subsequent Dirmon run it checks the size of each file against the
11
11
  # previous list of known files, and only if the file size has not changed
12
12
  # the corresponding job is started for that file.
13
- # * If the job implements #file_store_upload or #upload, that method is called
13
+ # * If the job implements #upload, that method is called
14
14
  # and then the file is deleted, or moved to the archive_directory if supplied
15
15
 
16
16
  # * Otherwise, the file is moved to the supplied archive_directory (defaults to
@@ -18,22 +18,26 @@ module RocketJob
18
18
  # file name of the archived file is passed into the job as it's first argument.
19
19
 
20
20
  # Note:
21
- # - Jobs that do not implement #file_store_upload or #upload _must_ have a
22
- # Hash as the first argument
21
+ # - Jobs that do not implement #upload _must_ have a Hash as the first argument
23
22
  #
24
23
  # With RocketJob Pro, the file is automatically uploaded into the job itself
25
24
  # using the job's #upload method, after which the file is archived or deleted
26
25
  # if no archive_directory was specified in the DirmonEntry.
27
26
  #
28
27
  # To start Dirmon for the first time
28
+ # RocketJob::Jobs::DirmonJob.create!
29
29
  #
30
+ # If another DirmonJob instance is already queued or running, then the create
31
+ # above will fail with:
32
+ # MongoMapper::DocumentNotValid: Validation failed: State Another instance of this job is already queued or running
30
33
  #
31
- # Note:
32
- # Use `DirmonJob.start` to prevent creating multiple Dirmon jobs, otherwise
33
- # it will result in multiple jobs being started
34
+ # Or to start DirmonJob and ignore errors if already running
35
+ # RocketJob::Jobs::DirmonJob.create
34
36
  class DirmonJob < RocketJob::Job
35
37
  # Only allow one DirmonJob instance to be running at a time
36
- include RocketJob::Concerns::Singleton
38
+ include RocketJob::Plugins::Singleton
39
+ # Start a new job when this one completes, fails, or aborts
40
+ include RocketJob::Plugins::Restart
37
41
 
38
42
  rocket_job do |job|
39
43
  job.priority = 40
@@ -43,23 +47,22 @@ module RocketJob
43
47
  key :check_seconds, Float, default: 300.0
44
48
  key :previous_file_names, Hash # Hash[file_name, size]
45
49
 
50
+ before_create :set_run_at
51
+
46
52
  # Iterate over each Dirmon entry looking for new files
47
53
  # If a new file is found, it is not processed immediately, instead
48
54
  # it is passed to the next run of this job along with the file size.
49
55
  # If the file size has not changed, the Job is kicked off.
50
56
  def perform
51
57
  check_directories
52
- ensure
53
- # Run again in the future, even if this run fails with an exception
54
- self.class.create!(
55
- previous_file_names: previous_file_names,
56
- priority: priority,
57
- check_seconds: check_seconds,
58
- run_at: Time.now + check_seconds
59
- )
60
58
  end
61
59
 
62
- protected
60
+ private
61
+
62
+ # Set a run_at when a new instance of this job is created
63
+ def set_run_at
64
+ self.run_at = Time.now + check_seconds
65
+ end
63
66
 
64
67
  # Checks the directories for new files, starting jobs if files have not changed
65
68
  # since the last run
@@ -70,7 +73,7 @@ module RocketJob
70
73
  # BSON Keys cannot contain periods
71
74
  key = pathname.to_s.gsub('.', '_')
72
75
  previous_size = previous_file_names[key]
73
- if (size = check_file(entry, pathname, previous_size))
76
+ if size = check_file(entry, pathname, previous_size)
74
77
  new_file_names[key] = size
75
78
  end
76
79
  end
@@ -91,9 +94,6 @@ module RocketJob
91
94
  # Keep for the next run
92
95
  size
93
96
  end
94
- rescue Errno::ENOENT => exc
95
- # File may have been deleted since the scan was performed
96
- nil
97
97
  end
98
98
 
99
99
  end
@@ -0,0 +1,12 @@
1
+ module RocketJob
2
+ module Jobs
3
+
4
+ class SimpleJob < RocketJob::Job
5
+ # No operation, used for performance testing
6
+ def perform
7
+ sleep 1
8
+ end
9
+ end
10
+
11
+ end
12
+ end
@@ -0,0 +1,69 @@
1
+ # encoding: UTF-8
2
+ require 'active_support/concern'
3
+ require 'mongo'
4
+ require 'mongo_ha'
5
+ require 'mongo_mapper'
6
+
7
+ module RocketJob
8
+ module Plugins
9
+ # Prevent more than one instance of this job class from running at a time
10
+ module Document
11
+ extend ActiveSupport::Concern
12
+ include MongoMapper::Document
13
+
14
+ included do
15
+ # Add after_initialize & after_find callbacks
16
+ define_model_callbacks :initialize, :find, :only => [:after]
17
+
18
+ # Prevent data in MongoDB from re-defining the model behavior
19
+ #self.static_keys = true
20
+
21
+ # Turn off embedded callbacks. Slow and not used for Jobs
22
+ embedded_callbacks_off
23
+ end
24
+
25
+ # Patch the way MongoMapper reloads a model
26
+ def reload
27
+ if doc = collection.find_one(:_id => id)
28
+ # Clear out keys that are not returned during the reload from MongoDB
29
+ (keys.keys - doc.keys).each { |key| send("#{key}=", nil) }
30
+ initialize_default_values
31
+ load_from_database(doc)
32
+ self
33
+ else
34
+ raise MongoMapper::DocumentNotFound, "Document match #{_id.inspect} does not exist in #{collection.name} collection"
35
+ end
36
+ end
37
+
38
+ # Add after_initialize callbacks
39
+ # TODO: Remove after new MongoMapper gem is released
40
+ # Also remove define_model_callbacks above
41
+ def initialize(*)
42
+ run_callbacks(:initialize) { super }
43
+ end
44
+
45
+ def initialize_from_database(*)
46
+ run_callbacks(:initialize) do
47
+ run_callbacks(:find) do
48
+ super
49
+ end
50
+ end
51
+ end
52
+
53
+ private
54
+
55
+ def update_attributes_and_reload(attrs)
56
+ if doc = collection.find_and_modify(query: {:_id => id}, update: {'$set' => attrs})
57
+ # Clear out keys that are not returned during the reload from MongoDB
58
+ (keys.keys - doc.keys).each { |key| send("#{key}=", nil) }
59
+ initialize_default_values
60
+ load_from_database(doc)
61
+ self
62
+ else
63
+ raise MongoMapper::DocumentNotFound, "Document match #{_id.inspect} does not exist in #{collection.name} collection"
64
+ end
65
+ end
66
+
67
+ end
68
+ end
69
+ end