procrastinator 0.6.1 → 1.0.0.pre.rc2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rake'
4
+
5
+ module Procrastinator
6
+ module Rake
7
+ # RakeTask builder. Provide this in your Rakefile:
8
+ #
9
+ # require 'procrastinator/rake/task'
10
+ # Procrastinator::RakeTask.new('/var/run') do
11
+ # # return your Procrastinator::Scheduler here or construct it using Procrastinator.config
12
+ # end
13
+ #
14
+ class DaemonTasks
15
+ include ::Rake::Cloneable
16
+ include ::Rake::DSL
17
+
18
+ # Shorthand for DaemonTasks.new.define
19
+ #
20
+ # @param (see #define)
21
+ # @see DaemonTasks#define
22
+ def self.define(**args)
23
+ new.define(**args)
24
+ end
25
+
26
+ # Defines procrastinator:start and procrastinator:stop Rake tasks that operate on the given scheduler.
27
+ # If provided a block, that block will run in the daemon process.
28
+ #
29
+ # @param scheduler [Procrastinator::Scheduler]
30
+ # @param pid_path [Pathname, File, String, nil]
31
+ def define(scheduler:, pid_path: nil, &block)
32
+ pid_path = Scheduler::DaemonWorking.normalize_pid(pid_path)
33
+
34
+ namespace :procrastinator do
35
+ task :start do
36
+ scheduler.work.daemonized!(pid_path, &block)
37
+ end
38
+
39
+ task :status do
40
+ if Scheduler::DaemonWorking.running?(pid_path)
41
+ warn "Procrastinator instance running (pid #{ File.read(pid_path) })"
42
+ else
43
+ warn "No Procrastinator instance detected for #{ pid_path }"
44
+ end
45
+ end
46
+
47
+ task :stop do
48
+ Scheduler::DaemonWorking.halt!(pid_path)
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'daemon_tasks'
@@ -0,0 +1,393 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'stringio'
4
+
5
+ module Procrastinator
6
+ # A Scheduler object provides the API for client applications to manage delayed tasks.
7
+ #
8
+ # Use #delay to schedule new tasks, #reschedule to alter existing tasks, and #cancel to remove unwanted tasks.
9
+ #
10
+ # @author Robin Miller
11
+ class Scheduler
12
+ def initialize(config)
13
+ @config = config
14
+ end
15
+
16
+ # Records a new task to be executed at the given time.
17
+ #
18
+ # @param queue_name [Symbol] the symbol identifier for the queue to add a new task on
19
+ # @param run_at [Time, Integer] Optional time when this task should be executed. Defaults to the current time.
20
+ # @param data [Hash, Array, String, Integer] Optional simple data object to be provided to the task on execution.
21
+ # @param expire_at [Time, Integer] Optional time when the task should be abandoned
22
+ def delay(queue_name = nil, data: nil, run_at: Time.now, expire_at: nil)
23
+ raise ArgumentError, <<~ERR unless queue_name.nil? || queue_name.is_a?(Symbol)
24
+ must provide a queue name as the first argument. Received: #{ queue_name }
25
+ ERR
26
+
27
+ queue = @config.queue(name: queue_name)
28
+
29
+ queue.create(run_at: run_at, expire_at: expire_at, data: data)
30
+ end
31
+
32
+ # Alters an existing task to run at a new time, expire at a new time, or both.
33
+ #
34
+ # Call #to on the result and pass in the new :run_at and/or :expire_at.
35
+ #
36
+ # Example:
37
+ #
38
+ # scheduler.reschedule(:alerts, data: {user_id: 5}).to(run_at: Time.now, expire_at: Time.now + 10)
39
+ #
40
+ # The identifier can include any data field stored in the task loader. Often this is the information in :data.
41
+ #
42
+ # @param queue [Symbol] the symbol identifier for the queue to add a new task on
43
+ # @param identifier [Hash] Some identifying information to find the appropriate task.
44
+ #
45
+ # @see TaskMetaData
46
+ def reschedule(queue, identifier)
47
+ UpdateProxy.new(@config, identifier: identifier.merge(queue: queue.to_s))
48
+ end
49
+
50
+ # Removes an existing task, as located by the givne identifying information.
51
+ #
52
+ # The identifier can include any data field stored in the task loader. Often this is the information in :data.
53
+ #
54
+ # @param queue [Symbol] the symbol identifier for the queue to add a new task on
55
+ # @param identifier [Hash] Some identifying information to find the appropriate task.
56
+ #
57
+ # @see TaskMetaData
58
+ def cancel(queue, identifier)
59
+ queue = @config.queue(name: queue)
60
+
61
+ tasks = queue.read(identifier.merge(queue: queue.name.to_s))
62
+
63
+ raise "no task matches search: #{ identifier }" if tasks.empty?
64
+ raise "multiple tasks match search: #{ identifier }" if tasks.size > 1
65
+
66
+ queue.delete(tasks.first[:id])
67
+ end
68
+
69
+ # Spawns a new worker thread for each queue defined in the config
70
+ #
71
+ # @param queue_names [Array<String,Symbol>] Names of specific queues to act upon.
72
+ # Omit or leave empty to act on all queues.
73
+ def work(*queue_names)
74
+ queue_names = @config.queues if queue_names.empty?
75
+
76
+ workers = queue_names.collect do |queue_name|
77
+ QueueWorker.new(queue: queue_name, config: @config)
78
+ end
79
+
80
+ WorkProxy.new(workers, @config)
81
+ end
82
+
83
+ # Provides a more natural syntax for rescheduling tasks
84
+ #
85
+ # @see Scheduler#reschedule
86
+ class UpdateProxy
87
+ def initialize(queue, identifier:)
88
+ @queue = queue
89
+ @identifier = identifier
90
+ end
91
+
92
+ def to(run_at: nil, expire_at: nil)
93
+ task = @queue.fetch_task(@identifier)
94
+
95
+ raise ArgumentError, 'you must provide at least :run_at or :expire_at' if run_at.nil? && expire_at.nil?
96
+
97
+ task.reschedule(expire_at: expire_at) if expire_at
98
+ task.reschedule(run_at: run_at) if run_at
99
+
100
+ new_data = task.to_h
101
+ new_data.delete(:queue)
102
+ new_data.delete(:data)
103
+ @queue.update(new_data.delete(:id), new_data)
104
+ end
105
+
106
+ alias at to
107
+ end
108
+
109
+ # Serial work style
110
+ #
111
+ # @see WorkProxy
112
+ module SerialWorking
113
+ # Work off the given number of tasks for each queue and return
114
+ # @param steps [integer] The number of tasks to complete.
115
+ def serially(steps: 1)
116
+ steps.times do
117
+ workers.each(&:work_one)
118
+ end
119
+ end
120
+ end
121
+
122
+ # Threaded work style
123
+ #
124
+ # @see WorkProxy
125
+ module ThreadedWorking
126
+ PROG_NAME = 'Procrastinator'
127
+
128
+ # Work off jobs per queue, each in its own thread.
129
+ #
130
+ # @param timeout Maximum number of seconds to run for. If nil, will run indefinitely.
131
+ def threaded(timeout: nil)
132
+ open_log
133
+ shutdown_on_interrupt
134
+
135
+ begin
136
+ @threads = spawn_threads
137
+
138
+ @logger.info "Procrastinator running. Process ID: #{ Process.pid }"
139
+ @threads.each do |thread|
140
+ thread.join(timeout)
141
+ end
142
+ rescue StandardError => e
143
+ thread_crash(e)
144
+ ensure
145
+ @logger&.info 'Halting worker threads...'
146
+ shutdown!
147
+ @logger&.info 'Threads halted.'
148
+ end
149
+ end
150
+
151
+ private
152
+
153
+ def spawn_threads
154
+ @logger.info "Starting workers for queues: #{ @workers.collect(&:name).join(', ') }"
155
+
156
+ @workers.collect do |worker|
157
+ @logger.debug "Spawning thread: #{ worker.name }"
158
+ Thread.new(worker) do |w|
159
+ Thread.current.abort_on_exception = true
160
+ Thread.current.thread_variable_set(:name, w.name)
161
+
162
+ begin
163
+ worker.work!
164
+ ensure
165
+ worker.halt
166
+ end
167
+ end
168
+ end
169
+ end
170
+
171
+ def thread_crash(error)
172
+ crashed_threads = (@threads || []).select { |t| t.status.nil? }.collect do |thread|
173
+ "Crashed thread: #{ thread.thread_variable_get(:name) }"
174
+ end
175
+
176
+ @logger.fatal <<~MSG
177
+ Crash detected in queue worker thread.
178
+ #{ crashed_threads.join("\n") }
179
+ #{ error.message }
180
+ #{ error.backtrace.join("\n\t") }"
181
+ MSG
182
+ end
183
+
184
+ def shutdown_on_interrupt
185
+ Signal.trap('INT') do
186
+ warn "\n" # just to separate the shutdown log item
187
+ shutdown!
188
+ end
189
+ end
190
+
191
+ def shutdown!
192
+ (@threads || []).select(&:alive?).each(&:kill)
193
+ end
194
+
195
+ def open_log(quiet: false)
196
+ return if @logger
197
+
198
+ log_devs = []
199
+
200
+ log_devs << StringIO.new if quiet && !@config.log_level
201
+ log_devs << $stderr unless quiet
202
+ log_devs << log_path.open('a') if @config.log_level
203
+
204
+ multi = MultiIO.new(*log_devs)
205
+ multi.sync = true
206
+
207
+ @logger = Logger.new(multi,
208
+ progname: PROG_NAME.downcase,
209
+ level: @config.log_level || Logger::INFO,
210
+ formatter: Config::DEFAULT_LOG_FORMATTER)
211
+ end
212
+
213
+ def log_path
214
+ path = @config.log_dir / "#{ PROG_NAME.downcase }.log"
215
+ path.dirname.mkpath
216
+ # FileUtils.touch(log_path)
217
+ path
218
+ end
219
+
220
+ # IO Multiplexer that forwards calls to a list of IO streams.
221
+ class MultiIO
222
+ def initialize(*stream)
223
+ @streams = stream
224
+ end
225
+
226
+ (IO.methods << :path << :sync=).uniq.each do |method_name|
227
+ define_method(method_name) do |*args|
228
+ able_streams(method_name).collect do |stream|
229
+ stream.send(method_name, *args)
230
+ end.last # forces consistent return result type for callers (but may lose some info)
231
+ end
232
+ end
233
+
234
+ private
235
+
236
+ def able_streams(method_name)
237
+ @streams.select { |stream| stream.respond_to?(method_name) }
238
+ end
239
+ end
240
+ end
241
+
242
+ # Daemonized work style
243
+ #
244
+ # @see WorkProxy
245
+ module DaemonWorking
246
+ PID_EXT = '.pid'
247
+ DEFAULT_PID_DIR = Pathname.new('/var/run/').freeze
248
+
249
+ # 15 chars is linux limit
250
+ MAX_PROC_LEN = 15
251
+
252
+ # Consumes the current process and turns it into a background daemon. A log will be started in the log
253
+ # directory defined in the configuration block.
254
+ #
255
+ # If pid_path ends with extension '.pid', the basename will be requested as process title (depending on OS
256
+ # support). An extensionless path is assumed to be a directory and a default filename (and proctitle) is used.
257
+ #
258
+ # @param pid_path [Pathname, File, String, nil] Path to where the process ID file is to be kept.
259
+ # @yield [void] Block to run after daemonization
260
+ def daemonized!(pid_path = nil, &block)
261
+ spawn_daemon(pid_path, &block)
262
+
263
+ threaded
264
+ end
265
+
266
+ # Normalizes the given pid path, including conversion to absolute path and defaults.
267
+ #
268
+ # @param pid_path [Pathname, String] path to normalize
269
+ def self.normalize_pid(pid_path)
270
+ pid_path = Pathname.new(pid_path || DEFAULT_PID_DIR)
271
+ pid_path /= "#{ PROG_NAME.downcase }#{ PID_EXT }" unless pid_path.extname == PID_EXT
272
+
273
+ pid_path.expand_path
274
+ end
275
+
276
+ # Stops the procrastinator process denoted by the provided pid file
277
+ def self.halt!(pid_path)
278
+ pid_path = normalize_pid pid_path
279
+
280
+ Process.kill('TERM', pid_path.read.to_i)
281
+ end
282
+
283
+ def self.running?(pid_path)
284
+ pid = normalize_pid(pid_path).read.to_i
285
+
286
+ # this raises Errno::ESRCH when no process found, therefore if found we should exit
287
+ Process.getpgid pid
288
+
289
+ true
290
+ rescue Errno::ESRCH
291
+ false
292
+ end
293
+
294
+ private
295
+
296
+ # "You, search from the spastic dentistry department down through disembowelment. You, cover children's dance
297
+ # recitals through holiday weekend IKEA. Go."
298
+ def spawn_daemon(pid_path, &block)
299
+ pid_path = DaemonWorking.normalize_pid pid_path
300
+
301
+ open_log quiet: true
302
+ @logger.info "Starting #{ PROG_NAME } daemon..."
303
+
304
+ print_debug_context
305
+
306
+ Process.daemon
307
+
308
+ manage_pid pid_path
309
+ rename_process pid_path
310
+
311
+ yield if block
312
+ rescue StandardError => e
313
+ @logger.fatal ([e.message] + e.backtrace).join("\n")
314
+ raise e
315
+ end
316
+
317
+ def manage_pid(pid_path)
318
+ ensure_unique(pid_path)
319
+
320
+ @logger.debug "Managing pid at path: #{ pid_path }"
321
+ pid_path.dirname.mkpath
322
+ pid_path.write Process.pid.to_s
323
+
324
+ at_exit do
325
+ if pid_path.exist?
326
+ @logger.debug "Cleaning up pid file #{ pid_path }"
327
+ pid_path.delete
328
+ end
329
+ @logger.info "Procrastinator (pid #{ Process.pid }) halted."
330
+ end
331
+ end
332
+
333
+ def ensure_unique(pid_path)
334
+ return unless pid_path.exist?
335
+
336
+ @logger.debug "Checking pid file #{ pid_path }"
337
+
338
+ if DaemonWorking.running? pid_path
339
+ hint = 'Either terminate that process or remove the pid file (if coincidental).'
340
+ msg = "Another process (pid #{ pid_path.read }) already exists for #{ pid_path }. #{ hint }"
341
+ @logger.fatal msg
342
+ raise ProcessExistsError, msg
343
+ else
344
+ @logger.warn "Replacing old pid file of defunct process (pid #{ pid_path.read }) at #{ pid_path }."
345
+ end
346
+ end
347
+
348
+ def print_debug_context
349
+ @logger.debug "Ruby Path: #{ ENV['RUBY_ROOT'] }"
350
+ @logger.debug "Bundler Path: #{ ENV['BUNDLE_BIN_PATH'] }"
351
+ # logname is the posix standard and is set by cron, so probably reliable.
352
+ @logger.debug "Runtime User: #{ ENV['LOGNAME'] || ENV['USERNAME'] }"
353
+ end
354
+
355
+ def rename_process(pid_path)
356
+ name = pid_path.basename(PID_EXT).to_s
357
+
358
+ if name.size > MAX_PROC_LEN
359
+ @logger.warn "Process name is longer than max length (#{ MAX_PROC_LEN }). Trimming to fit."
360
+ name = name[0, MAX_PROC_LEN]
361
+ end
362
+
363
+ if system('pidof', name, out: File::NULL)
364
+ @logger.warn "Another process is already named '#{ name }'. Consider the 'name:' keyword to distinguish."
365
+ end
366
+
367
+ @logger.debug "Renaming process to: #{ name }"
368
+ Process.setproctitle name
369
+ end
370
+
371
+ include ThreadedWorking
372
+ end
373
+
374
+ # DSL grammar object to enable chaining #work with the three work modes.
375
+ #
376
+ # @see Scheduler#work
377
+ class WorkProxy
378
+ include SerialWorking
379
+ include ThreadedWorking
380
+ include DaemonWorking
381
+
382
+ attr_reader :workers
383
+
384
+ def initialize(workers, config)
385
+ @workers = workers
386
+ @config = config
387
+ end
388
+ end
389
+ end
390
+
391
+ class ProcessExistsError < RuntimeError
392
+ end
393
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'forwardable'
4
+ require 'time'
5
+
6
+ module Procrastinator
7
+ # Wraps a task handler and task metadata
8
+ #
9
+ # @author Robin Miller
10
+ class Task
11
+ extend Forwardable
12
+
13
+ def_delegators :@metadata,
14
+ :id, :run_at, :initial_run_at, :expire_at,
15
+ :attempts, :last_fail_at, :last_error,
16
+ :data, :to_h, :serialized_data,
17
+ :queue, :reschedule
18
+
19
+ def initialize(metadata, handler)
20
+ @metadata = metadata
21
+ @handler = handler
22
+ end
23
+
24
+ def run
25
+ raise ExpiredError, "task is over its expiry time of #{ @metadata.expire_at.iso8601 }" if @metadata.expired?
26
+
27
+ @metadata.add_attempt
28
+ result = Timeout.timeout(queue.timeout) do
29
+ @handler.run
30
+ end
31
+ @metadata.clear_fails
32
+
33
+ try_hook(:success, result)
34
+ end
35
+
36
+ alias call run
37
+
38
+ # Records a failure in metadata and attempts to run the handler's #fail hook if present.
39
+ #
40
+ # @param error [StandardError] - the error that caused the failure
41
+ def fail(error)
42
+ hook = @metadata.failure(error)
43
+
44
+ try_hook(hook, error)
45
+ hook
46
+ end
47
+
48
+ def try_hook(method, *params)
49
+ @handler.send(method, *params) if @handler.respond_to? method
50
+ rescue StandardError => e
51
+ warn "#{ method.to_s.capitalize } hook error: #{ e.message }"
52
+ end
53
+
54
+ def to_s
55
+ "#{ @metadata.queue.name }##{ id } [#{ serialized_data }]"
56
+ end
57
+
58
+ class ExpiredError < RuntimeError
59
+ end
60
+
61
+ class AttemptsExhaustedError < RuntimeError
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'time'
4
+
5
+ module Procrastinator
6
+ # TaskMetaData objects are State Patterns that record information about the work done on a particular task.
7
+ #
8
+ # It contains the specific information needed to run a task instance. Users define a task handler class, which
9
+ # describes the "how" of a task and TaskMetaData represents the "what" and "when".
10
+ #
11
+ # It contains task-specific data, timing information, and error records.
12
+ #
13
+ # All of its state is read-only.
14
+ #
15
+ # @author Robin Miller
16
+ #
17
+ # @!attribute [r] :id
18
+ # @return [Integer] the unique identifier for this task
19
+ # @!attribute [r] :run_at
20
+ # @return [Integer] Linux epoch timestamp of when to attempt this task next
21
+ # @!attribute [r] :initial_run_at
22
+ # @return [Integer] Linux epoch timestamp of the original value for run_at
23
+ # @!attribute [r] :expire_at
24
+ # @return [Integer] Linux epoch timestamp of when to consider this task obsolete
25
+ # @!attribute [r] :attempts
26
+ # @return [Integer] The number of times this task has been attempted
27
+ # @!attribute [r] :last_error
28
+ # @return [String] The message and stack trace of the error encountered on the most recent failed attempt
29
+ # @!attribute [r] :last_fail_at
30
+ # @return [Integer] Linux epoch timestamp of when the last_error was recorded
31
+ # @!attribute [r] :data
32
+ # @return [String] App-provided JSON data
33
+ class TaskMetaData
34
+ # These are the attributes expected to be in the persistence mechanism
35
+ EXPECTED_DATA = [:id, :run_at, :initial_run_at, :expire_at, :attempts, :last_error, :last_fail_at, :data].freeze
36
+
37
+ attr_reader(*EXPECTED_DATA, :queue)
38
+
39
+ def initialize(id: nil, queue: nil, data: nil,
40
+ run_at: nil, initial_run_at: nil, expire_at: nil,
41
+ attempts: 0, last_error: nil, last_fail_at: nil)
42
+ @id = id
43
+ @queue = queue || raise(ArgumentError, 'queue cannot be nil')
44
+ @run_at = get_time(run_at)
45
+ @initial_run_at = get_time(initial_run_at) || @run_at
46
+ @expire_at = get_time(expire_at)
47
+ @attempts = (attempts || 0).to_i
48
+ @last_error = last_error
49
+ @last_fail_at = get_time(last_fail_at)
50
+ @data = data ? JSON.parse(data, symbolize_names: true) : nil
51
+ end
52
+
53
+ def add_attempt
54
+ raise Task::AttemptsExhaustedError unless attempts_left?
55
+
56
+ @attempts += 1
57
+ end
58
+
59
+ # Records a failure on this task
60
+ #
61
+ # @param error [StandardError] The error to record
62
+ def failure(error)
63
+ @last_fail_at = Time.now
64
+ @last_error = %[Task failed: #{ error.message }\n#{ error.backtrace&.join("\n") }]
65
+
66
+ if retryable?
67
+ reschedule
68
+ :fail
69
+ else
70
+ @run_at = nil
71
+ :final_fail
72
+ end
73
+ end
74
+
75
+ def retryable?
76
+ attempts_left? && !expired?
77
+ end
78
+
79
+ def expired?
80
+ !@expire_at.nil? && @expire_at < Time.now
81
+ end
82
+
83
+ def attempts_left?
84
+ @queue.max_attempts.nil? || @attempts < @queue.max_attempts
85
+ end
86
+
87
+ def runnable?
88
+ !@run_at.nil? && @run_at <= Time.now
89
+ end
90
+
91
+ def successful?
92
+ raise 'you cannot check for success before running #work' if !expired? && @attempts <= 0
93
+
94
+ !expired? && @last_error.nil? && @last_fail_at.nil?
95
+ end
96
+
97
+ # Updates the run and/or expiry time. If neither is provided, will reschedule based on the rescheduling
98
+ # calculation algorithm.
99
+ #
100
+ # @param run_at - the new time to run this task
101
+ # @param expire_at - the new time to expire this task
102
+ def reschedule(run_at: nil, expire_at: nil)
103
+ validate_run_at(run_at, expire_at)
104
+
105
+ @expire_at = expire_at if expire_at
106
+
107
+ if run_at
108
+ @run_at = @initial_run_at = get_time(run_at)
109
+ clear_fails
110
+ @attempts = 0
111
+ end
112
+
113
+ return if run_at || expire_at
114
+
115
+ # (30 + n_attempts^4) seconds is chosen to rapidly expand
116
+ # but with the baseline of 30s to avoid hitting the disk too frequently.
117
+ @run_at += 30 + (@attempts ** 4) unless @run_at.nil?
118
+ end
119
+
120
+ def to_h
121
+ {id: @id,
122
+ queue: @queue.name.to_s,
123
+ run_at: @run_at,
124
+ initial_run_at: @initial_run_at,
125
+ expire_at: @expire_at,
126
+ attempts: @attempts,
127
+ last_fail_at: @last_fail_at,
128
+ last_error: @last_error,
129
+ data: serialized_data}
130
+ end
131
+
132
+ def serialized_data
133
+ JSON.dump(@data)
134
+ end
135
+
136
+ def clear_fails
137
+ @last_error = nil
138
+ @last_fail_at = nil
139
+ end
140
+
141
+ private
142
+
143
+ def get_time(data)
144
+ case data
145
+ when NilClass
146
+ nil
147
+ when Numeric
148
+ Time.at data
149
+ when String
150
+ Time.parse data
151
+ when Time
152
+ data
153
+ else
154
+ return data.to_time if data.respond_to? :to_time
155
+
156
+ raise ArgumentError, "Unknown data type: #{ data.class } (#{ data })"
157
+ end
158
+ end
159
+
160
+ def validate_run_at(run_at, expire_at)
161
+ return unless run_at
162
+
163
+ if expire_at && run_at > expire_at
164
+ raise ArgumentError, "new run_at (#{ run_at }) is later than new expire_at (#{ expire_at })"
165
+ end
166
+
167
+ return unless @expire_at && run_at > @expire_at
168
+
169
+ raise ArgumentError, "new run_at (#{ run_at }) is later than existing expire_at (#{ @expire_at })"
170
+ end
171
+ end
172
+ end