rocketjob 1.3.0 → 2.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +201 -0
  3. data/README.md +15 -10
  4. data/bin/rocketjob +3 -1
  5. data/bin/rocketjob_perf +92 -0
  6. data/lib/rocket_job/cli.rb +71 -31
  7. data/lib/rocket_job/config.rb +21 -23
  8. data/lib/rocket_job/dirmon_entry.rb +63 -45
  9. data/lib/rocket_job/extensions/aasm.rb +56 -0
  10. data/lib/rocket_job/extensions/mongo.rb +23 -0
  11. data/lib/rocket_job/job.rb +9 -433
  12. data/lib/rocket_job/jobs/dirmon_job.rb +20 -20
  13. data/lib/rocket_job/jobs/simple_job.rb +12 -0
  14. data/lib/rocket_job/plugins/document.rb +69 -0
  15. data/lib/rocket_job/plugins/job/callbacks.rb +92 -0
  16. data/lib/rocket_job/plugins/job/defaults.rb +40 -0
  17. data/lib/rocket_job/plugins/job/logger.rb +36 -0
  18. data/lib/rocket_job/plugins/job/model.rb +288 -0
  19. data/lib/rocket_job/plugins/job/persistence.rb +167 -0
  20. data/lib/rocket_job/plugins/job/state_machine.rb +166 -0
  21. data/lib/rocket_job/plugins/job/worker.rb +167 -0
  22. data/lib/rocket_job/plugins/restart.rb +54 -0
  23. data/lib/rocket_job/plugins/singleton.rb +26 -0
  24. data/lib/rocket_job/plugins/state_machine.rb +105 -0
  25. data/lib/rocket_job/version.rb +1 -1
  26. data/lib/rocket_job/worker.rb +150 -119
  27. data/lib/rocketjob.rb +43 -21
  28. data/test/config_test.rb +12 -0
  29. data/test/dirmon_entry_test.rb +81 -85
  30. data/test/dirmon_job_test.rb +40 -28
  31. data/test/job_test.rb +14 -257
  32. data/test/plugins/job/callbacks_test.rb +163 -0
  33. data/test/plugins/job/defaults_test.rb +52 -0
  34. data/test/plugins/job/logger_test.rb +58 -0
  35. data/test/plugins/job/model_test.rb +97 -0
  36. data/test/plugins/job/persistence_test.rb +81 -0
  37. data/test/plugins/job/state_machine_test.rb +118 -0
  38. data/test/plugins/job/worker_test.rb +183 -0
  39. data/test/plugins/restart_test.rb +185 -0
  40. data/test/plugins/singleton_test.rb +94 -0
  41. data/test/plugins/state_machine_event_callbacks_test.rb +101 -0
  42. data/test/plugins/state_machine_test.rb +64 -0
  43. data/test/test_helper.rb +3 -36
  44. metadata +64 -19
  45. data/lib/rocket_job/concerns/singleton.rb +0 -33
  46. data/lib/rocket_job/concerns/worker.rb +0 -214
  47. data/test/files/_archive/archived.txt +0 -3
  48. data/test/job_worker_test.rb +0 -86
  49. data/test/jobs/test_job.rb +0 -46
  50. data/test/worker_test.rb +0 -97
@@ -0,0 +1,92 @@
1
+ # encoding: UTF-8
2
+ require 'active_support/concern'
3
+
4
+ module RocketJob
5
+ module Plugins
6
+ module Job
7
+ # Define before and after callbacks
8
+ #
9
+ # Before callbacks are called in the order they are defined.
10
+ # After callbacks are called in the _reverse_ order to which they were defined.
11
+ #
12
+ # Example:
13
+ # before_1
14
+ # before_2
15
+ # perform
16
+ # after_2
17
+ # after_1
18
+ #
19
+ # Example including around callbacks:
20
+ #
21
+ # class MyJob < RocketJob::Job
22
+ # before_perform do
23
+ # puts "BEFORE 1"
24
+ # end
25
+ #
26
+ # around_perform do |job, block|
27
+ # puts "AROUND 1 BEFORE"
28
+ # block.call
29
+ # puts "AROUND 1 AFTER"
30
+ # end
31
+ #
32
+ # before_perform do
33
+ # puts "BEFORE 2"
34
+ # end
35
+ #
36
+ # after_perform do
37
+ # puts "AFTER 1"
38
+ # end
39
+ #
40
+ # around_perform do |job, block|
41
+ # puts "AROUND 2 BEFORE"
42
+ # block.call
43
+ # puts "AROUND 2 AFTER"
44
+ # end
45
+ #
46
+ # after_perform do
47
+ # puts "AFTER 2"
48
+ # end
49
+ #
50
+ # def perform
51
+ # puts "PERFORM"
52
+ # 23
53
+ # end
54
+ # end
55
+ #
56
+ # MyJob.new.perform_now
57
+ #
58
+ # Output from the example above
59
+ #
60
+ # BEFORE 1
61
+ # AROUND 1 BEFORE
62
+ # BEFORE 2
63
+ # AROUND 2 BEFORE
64
+ # PERFORM
65
+ # AFTER 2
66
+ # AROUND 2 AFTER
67
+ # AFTER 1
68
+ # AROUND 1 AFTER
69
+ module Callbacks
70
+ extend ActiveSupport::Concern
71
+ include ActiveSupport::Callbacks
72
+
73
+ included do
74
+ define_callbacks :perform
75
+
76
+ def self.before_perform(*filters, &blk)
77
+ set_callback(:perform, :before, *filters, &blk)
78
+ end
79
+
80
+ def self.after_perform(*filters, &blk)
81
+ set_callback(:perform, :after, *filters, &blk)
82
+ end
83
+
84
+ def self.around_perform(*filters, &blk)
85
+ set_callback(:perform, :around, *filters, &blk)
86
+ end
87
+ end
88
+
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,40 @@
1
+ # encoding: UTF-8
2
+ require 'active_support/concern'
3
+
4
+ module RocketJob
5
+ module Plugins
6
+ module Job
7
+ # Allow each child job to set its own defaults
8
+ module Defaults
9
+ extend ActiveSupport::Concern
10
+
11
+ included do
12
+ # Copy parent job defaults
13
+ def self.inherited(base)
14
+ super
15
+ @rocket_job_defaults.each { |block| base.rocket_job(&block) } if @rocket_job_defaults
16
+ end
17
+
18
+ # Override parent defaults
19
+ def self.rocket_job(&block)
20
+ (@rocket_job_defaults ||=[]) << block
21
+ end
22
+
23
+ private
24
+
25
+ def self.rocket_job_defaults
26
+ @rocket_job_defaults
27
+ end
28
+
29
+ # Apply defaults after creating the model but before applying values
30
+ def rocket_job_set_defaults
31
+ if defaults = self.class.rocket_job_defaults
32
+ defaults.each { |block| block.call(self) }
33
+ end
34
+ end
35
+ end
36
+
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,36 @@
1
+ # encoding: UTF-8
2
+ require 'active_support/concern'
3
+
4
+ module RocketJob
5
+ module Plugins
6
+ module Job
7
+ module Logger
8
+ extend ActiveSupport::Concern
9
+
10
+ included do
11
+ around_perform :rocket_job_around_logger
12
+ end
13
+
14
+ private
15
+
16
+ # Add logging around the perform call
17
+ # - metric allows duration to be forwarded to statsd, etc.
18
+ # - log_exception logs entire exception if raised
19
+ # - on_exception_level changes log level from info to error on exception
20
+ # - silence noisy jobs by raising log level
21
+ def rocket_job_around_logger(&block)
22
+ logger.info('Start #perform')
23
+ logger.benchmark_info(
24
+ 'Completed #perform',
25
+ metric: "rocketjob/#{self.class.name.underscore}/perform",
26
+ log_exception: :full,
27
+ on_exception_level: :error,
28
+ silence: log_level,
29
+ &block
30
+ )
31
+ end
32
+
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,288 @@
1
+ # encoding: UTF-8
2
+ require 'active_support/concern'
3
+
4
+ module RocketJob
5
+ module Plugins
6
+ module Job
7
+ # Prevent more than one instance of this job class from running at a time
8
+ module Model
9
+ extend ActiveSupport::Concern
10
+
11
+ included do
12
+ #
13
+ # User definable attributes
14
+ #
15
+ # The following attributes are set when the job is created
16
+ # @formatter:off
17
+
18
+ # Description for this job instance
19
+ key :description, String
20
+
21
+ # Priority of this job as it relates to other jobs [1..100]
22
+ # 1: Highest Priority
23
+ # 50: Default Priority
24
+ # 100: Lowest Priority
25
+ #
26
+ # Example:
27
+ # A job with a priority of 40 will execute before a job with priority 50
28
+ #
29
+ # In RocketJob Pro, if a SlicedJob is running and a higher priority job
30
+ # arrives, then the current job will complete the current slices and process
31
+ # the new higher priority job
32
+ key :priority, Integer, default: 50
33
+
34
+ # Run this job no earlier than this time
35
+ key :run_at, Time
36
+
37
+ # If a job has not started by this time, destroy it
38
+ key :expires_at, Time
39
+
40
+ # When the job completes destroy it from both the database and the UI
41
+ key :destroy_on_complete, Boolean, default: true
42
+
43
+ # Any user supplied arguments for the method invocation
44
+ # All keys must be UTF-8 strings. The values can be any valid BSON type:
45
+ # Integer
46
+ # Float
47
+ # Time (UTC)
48
+ # String (UTF-8)
49
+ # Array
50
+ # Hash
51
+ # True
52
+ # False
53
+ # Symbol
54
+ # nil
55
+ # Regular Expression
56
+ #
57
+ # Note: Date is not supported, convert it to a UTC time
58
+ key :arguments, Array
59
+
60
+ # Whether to store the results from this job
61
+ key :collect_output, Boolean, default: false
62
+
63
+ # Raise or lower the log level when calling the job
64
+ # Can be used to reduce log noise, especially during high volume calls
65
+ # For debugging a single job can be logged at a low level such as :trace
66
+ # Levels supported: :trace, :debug, :info, :warn, :error, :fatal
67
+ key :log_level, Symbol
68
+
69
+ #
70
+ # Read-only attributes
71
+ #
72
+
73
+ # Current state, as set by the state machine. Do not modify this value directly.
74
+ key :state, Symbol, default: :queued
75
+
76
+ # When the job was created
77
+ key :created_at, Time, default: -> { Time.now }
78
+
79
+ # When processing started on this job
80
+ key :started_at, Time
81
+
82
+ # When the job completed processing
83
+ key :completed_at, Time
84
+
85
+ # Number of times that this job has failed to process
86
+ key :failure_count, Integer, default: 0
87
+
88
+ # This name of the worker that this job is being processed by, or was processed by
89
+ key :worker_name, String
90
+
91
+ #
92
+ # Values that jobs can update during processing
93
+ #
94
+
95
+ # Allow a job to updates its estimated progress
96
+ # Any integer from 0 to 100
97
+ key :percent_complete, Integer, default: 0
98
+
99
+ # Store the last exception for this job
100
+ one :exception, class_name: 'RocketJob::JobException'
101
+
102
+ # Store the Hash result from this job if collect_output is true,
103
+ # and the job returned actually returned a Hash, otherwise nil
104
+ # Not applicable to SlicedJob jobs, since its output is stored in a
105
+ # separate collection
106
+ key :result, Hash
107
+
108
+ # @formatter:on
109
+
110
+ # Store all job types in this collection
111
+ set_collection_name 'rocket_job.jobs'
112
+
113
+ validates_presence_of :state, :failure_count, :created_at
114
+ validates :priority, inclusion: 1..100
115
+ validates :log_level, inclusion: SemanticLogger::LEVELS + [nil]
116
+
117
+ # Returns [String] the singular name for this job class
118
+ #
119
+ # Example:
120
+ # job = DataStudyJob.new
121
+ # job.underscore_name
122
+ # # => "data_study"
123
+ def self.underscore_name
124
+ @underscore_name ||= name.sub(/Job$/, '').underscore
125
+ end
126
+
127
+ # Allow the collective name for this job class to be overridden
128
+ def self.underscore_name=(underscore_name)
129
+ @underscore_name = underscore_name
130
+ end
131
+
132
+ # Returns [String] the human readable name for this job class
133
+ #
134
+ # Example:
135
+ # job = DataStudyJob.new
136
+ # job.human_name
137
+ # # => "Data Study"
138
+ def self.human_name
139
+ @human_name ||= name.sub(/Job$/, '').titleize
140
+ end
141
+
142
+ # Allow the human readable job name for this job class to be overridden
143
+ def self.human_name=(human_name)
144
+ @human_name = human_name
145
+ end
146
+
147
+ # Returns [String] the collective name for this job class
148
+ #
149
+ # Example:
150
+ # job = DataStudyJob.new
151
+ # job.collective_name
152
+ # # => "data_studies"
153
+ def self.collective_name
154
+ @collective_name ||= name.sub(/Job$/, '').pluralize.underscore
155
+ end
156
+
157
+ # Allow the collective name for this job class to be overridden
158
+ def self.collective_name=(collective_name)
159
+ @collective_name = collective_name
160
+ end
161
+
162
+ # Scope for jobs scheduled to run in the future
163
+ def self.scheduled
164
+ queued.where(run_at: {'$gt' => Time.now})
165
+ end
166
+
167
+ # Scope for queued jobs that can run now
168
+ # I.e. Queued jobs excluding scheduled jobs
169
+ def self.queued_now
170
+ queued.where(
171
+ '$or' => [
172
+ {run_at: {'$exists' => false}},
173
+ {run_at: {'$lte' => Time.now}}
174
+ ]
175
+ )
176
+ end
177
+
178
+ # Returns the number of required arguments for this job
179
+ def self.rocket_job_argument_count
180
+ instance_method(:perform).arity
181
+ end
182
+
183
+ # User definable properties in Dirmon Entry
184
+ def self.rocket_job_properties
185
+ @rocket_job_properties ||= (self == RocketJob::Job ? [] : superclass.rocket_job_properties)
186
+ end
187
+
188
+ # Add to user definable properties in Dirmon Entry
189
+ def self.public_rocket_job_properties(*properties)
190
+ rocket_job_properties.concat(properties).uniq!
191
+ end
192
+
193
+ # User definable properties in Dirmon Entry
194
+ public_rocket_job_properties :description, :priority, :log_level, :arguments
195
+ end
196
+
197
+ # Returns [true|false] whether to collect nil results from running this batch
198
+ def collect_nil_output?
199
+ collect_output? ? (collect_nil_output == true) : false
200
+ end
201
+
202
+ # Returns [true|false] whether to collect the results from running this batch
203
+ def collect_output?
204
+ collect_output == true
205
+ end
206
+
207
+ # Returns [Float] the number of seconds the job has taken
208
+ # - Elapsed seconds to process the job from when a worker first started working on it
209
+ # until now if still running, or until it was completed
210
+ # - Seconds in the queue if queued
211
+ def seconds
212
+ if completed_at
213
+ completed_at - (started_at || created_at)
214
+ elsif started_at
215
+ Time.now - started_at
216
+ else
217
+ Time.now - created_at
218
+ end
219
+ end
220
+
221
+ # Returns a human readable duration the job has taken
222
+ def duration
223
+ RocketJob.seconds_as_duration(seconds)
224
+ end
225
+
226
+ # Returns [true|false] whether the job has expired
227
+ def expired?
228
+ expires_at && (expires_at < Time.now)
229
+ end
230
+
231
+ # Returns [true|false] whether the job is scheduled to run in the future
232
+ def scheduled?
233
+ queued? && run_at.present? && (run_at > Time.now)
234
+ end
235
+
236
+ # Returns [Hash] status of this job
237
+ def as_json
238
+ attrs = serializable_hash(methods: [:seconds, :duration])
239
+ attrs.delete('result') unless collect_output?
240
+ case
241
+ when queued?
242
+ attrs.delete('started_at')
243
+ attrs.delete('completed_at')
244
+ attrs.delete('result')
245
+ attrs
246
+ when running?
247
+ attrs.delete('completed_at')
248
+ attrs.delete('result')
249
+ attrs
250
+ when paused?
251
+ attrs.delete('completed_at')
252
+ attrs.delete('result')
253
+ # Ensure 'paused_at' appears first in the hash
254
+ {'paused_at' => completed_at}.merge(attrs)
255
+ when aborted?
256
+ attrs.delete('completed_at')
257
+ attrs.delete('result')
258
+ {'aborted_at' => completed_at}.merge(attrs)
259
+ when failed?
260
+ attrs.delete('completed_at')
261
+ attrs.delete('result')
262
+ {'failed_at' => completed_at}.merge(attrs)
263
+ else
264
+ attrs
265
+ end
266
+ end
267
+
268
+ # Returns [Hash] the status of this job
269
+ def status(time_zone = 'Eastern Time (US & Canada)')
270
+ h = as_json
271
+ h.delete('seconds')
272
+ h.delete('percent_complete') if completed?
273
+ h.delete('failure_count') unless failure_count > 0
274
+ h.dup.each_pair do |k, v|
275
+ case
276
+ when v.is_a?(Time)
277
+ h[k] = v.in_time_zone(time_zone).to_s
278
+ when v.is_a?(BSON::ObjectId)
279
+ h[k] = v.to_s
280
+ end
281
+ end
282
+ h
283
+ end
284
+
285
+ end
286
+ end
287
+ end
288
+ end
@@ -0,0 +1,167 @@
1
+ # encoding: UTF-8
2
+ require 'active_support/concern'
3
+
4
+ module RocketJob
5
+ module Plugins
6
+ module Job
7
+ # Prevent more than one instance of this job class from running at a time
8
+ module Persistence
9
+ extend ActiveSupport::Concern
10
+
11
+ included do
12
+ # Store all job types in this collection
13
+ set_collection_name 'rocket_job.jobs'
14
+
15
+ # Create indexes
16
+ def self.create_indexes
17
+ # Used by find_and_modify in .rocket_job_retrieve
18
+ ensure_index({state: 1, priority: 1, _id: 1}, background: true)
19
+ # Remove outdated indexes if present
20
+ drop_index('state_1_run_at_1_priority_1_created_at_1_sub_state_1') rescue nil
21
+ drop_index('state_1_priority_1_created_at_1_sub_state_1') rescue nil
22
+ drop_index('state_1_priority_1_created_at_1') rescue nil
23
+ drop_index('created_at_1') rescue nil
24
+ end
25
+
26
+ # Retrieves the next job to work on in priority based order
27
+ # and assigns it to this worker
28
+ #
29
+ # Returns nil if no jobs are available for processing
30
+ #
31
+ # Parameters
32
+ # worker_name [String]
33
+ # Name of the worker that will be processing this job
34
+ #
35
+ # skip_job_ids [Array<BSON::ObjectId>]
36
+ # Job ids to exclude when looking for the next job
37
+ def self.rocket_job_retrieve(worker_name, skip_job_ids = nil)
38
+ run_at = [
39
+ {run_at: {'$exists' => false}},
40
+ {run_at: {'$lte' => Time.now}}
41
+ ]
42
+ query =
43
+ if defined?(RocketJobPro)
44
+ {
45
+ '$and' => [
46
+ {
47
+ '$or' => [
48
+ {'state' => 'queued'}, # Jobs
49
+ {'state' => 'running', 'sub_state' => :processing} # Slices
50
+ ]
51
+ },
52
+ {
53
+ '$or' => run_at
54
+ }
55
+ ]
56
+ }
57
+ else
58
+ {
59
+ 'state' => 'queued',
60
+ '$or' => run_at
61
+ }
62
+ end
63
+
64
+ query['_id'] = {'$nin' => skip_job_ids} if skip_job_ids && skip_job_ids.size > 0
65
+
66
+ if doc = find_and_modify(
67
+ query: query,
68
+ sort: {priority: 1, _id: 1},
69
+ update: {'$set' => {'worker_name' => worker_name, 'state' => 'running'}}
70
+ )
71
+ load(doc)
72
+ end
73
+ end
74
+
75
+ # Returns [Hash<String:Integer>] of the number of jobs in each state
76
+ # Queued jobs are separated into :queued_now and :scheduled
77
+ # :queued_now are jobs that are awaiting processing and can be processed now.
78
+ # :scheduled are jobs scheduled to run the future.
79
+ #
80
+ # Note: If there are no jobs in that particular state then the hash will not have a value for it
81
+ #
82
+ # Example jobs in every state:
83
+ # RocketJob::Job.counts_by_state
84
+ # # => {
85
+ # :aborted => 1,
86
+ # :completed => 37,
87
+ # :failed => 1,
88
+ # :paused => 3,
89
+ # :queued => 4,
90
+ # :running => 1,
91
+ # :queued_now => 1,
92
+ # :scheduled => 3
93
+ # }
94
+ #
95
+ # Example jobs some states:
96
+ # RocketJob::Job.counts_by_state
97
+ # # => {
98
+ # :failed => 1,
99
+ # :running => 25,
100
+ # :completed => 1237
101
+ # }
102
+ def self.counts_by_state
103
+ counts = {}
104
+ collection.aggregate([
105
+ {
106
+ '$group' => {
107
+ _id: '$state',
108
+ count: {'$sum' => 1}
109
+ }
110
+ }
111
+ ]
112
+ ).each do |result|
113
+ counts[result['_id']] = result['count']
114
+ end
115
+
116
+ # Calculate :queued_now and :scheduled if there are queued jobs
117
+ if queued_count = counts[:queued]
118
+ scheduled_count = RocketJob::Job.where(state: :queued, run_at: {'$gt' => Time.now}).count
119
+ if scheduled_count > 0
120
+ queued_now_count = queued_count - scheduled_count
121
+ counts[:queued_now] = queued_count - scheduled_count if queued_now_count > 0
122
+ counts[:scheduled] = scheduled_count
123
+ else
124
+ counts[:queued_now] = queued_count
125
+ end
126
+ end
127
+ counts
128
+ end
129
+
130
+ end
131
+
132
+ # Set in-memory job to complete if `destroy_on_complete` and the job has been destroyed
133
+ def reload
134
+ return super unless destroy_on_complete
135
+ begin
136
+ super
137
+ rescue MongoMapper::DocumentNotFound
138
+ unless completed?
139
+ self.state = :completed
140
+ rocket_job_set_completed_at
141
+ rocket_job_mark_complete
142
+ end
143
+ self
144
+ end
145
+ end
146
+
147
+ private
148
+
149
+ # After this model is loaded, convert any hashes in the arguments list to HashWithIndifferentAccess
150
+ def load_from_database(*args)
151
+ super
152
+ if arguments.present?
153
+ self.arguments = arguments.collect { |i| i.is_a?(BSON::OrderedHash) ? i.with_indifferent_access : i }
154
+ end
155
+ end
156
+
157
+ # Apply RocketJob defaults after initializing default values
158
+ # but before setting attributes. after_initialize is too late
159
+ def initialize_default_values(except = {})
160
+ super
161
+ rocket_job_set_defaults
162
+ end
163
+
164
+ end
165
+ end
166
+ end
167
+ end