procrastinator 0.6.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,116 +1,136 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Procrastinator
4
+ # A QueueWorker checks for tasks to run from the loader defined in the provided config and executes them,
5
+ # updating information in the task loader as necessary.
6
+ #
7
+ # @author Robin Miller
2
8
  class QueueWorker
3
- DEFAULT_TIMEOUT = 3600 # in seconds; one hour total
4
- DEFAULT_MAX_ATTEMPTS = 20
5
- DEFAULT_UPDATE_PERIOD = 10 # seconds
6
- DEFAULT_MAX_TASKS = 10
7
-
8
- attr_reader :name, :timeout, :max_attempts, :update_period, :max_tasks
9
-
10
- # Timeout is in seconds
11
- def initialize(name:,
12
- persister:,
13
- log_dir: nil,
14
- log_level: Logger::INFO,
15
- max_attempts: DEFAULT_MAX_ATTEMPTS,
16
- timeout: DEFAULT_TIMEOUT,
17
- update_period: DEFAULT_UPDATE_PERIOD,
18
- max_tasks: DEFAULT_MAX_TASKS)
19
- raise ArgumentError.new('Queue name may not be nil') unless name
20
- raise ArgumentError.new('Persister may not be nil') unless persister
21
-
22
- raise(MalformedTaskPersisterError.new('The supplied IO object must respond to #read_tasks')) unless persister.respond_to? :read_tasks
23
- raise(MalformedTaskPersisterError.new('The supplied IO object must respond to #update_task')) unless persister.respond_to? :update_task
24
- raise(MalformedTaskPersisterError.new('The supplied IO object must respond to #delete_task')) unless persister.respond_to? :delete_task
25
-
26
- @name = name.to_s.gsub(/\s/, '_').to_sym
27
- @timeout = timeout
28
- @max_attempts = max_attempts
29
- @update_period = update_period
30
- @max_tasks = max_tasks
31
- @persister = persister
32
- @log_dir = log_dir
33
- @log_level = log_level
9
+ extend Forwardable
34
10
 
35
- start_log
11
+ def_delegators :@queue, :name
12
+
13
+ # expected methods for all persistence strategies
14
+ PERSISTER_METHODS = [:read, :update, :delete].freeze
15
+
16
+ def initialize(queue:, config:, scheduler: nil)
17
+ @queue = queue
18
+ @config = config
19
+ @scheduler = scheduler
20
+
21
+ @logger = nil
36
22
  end
37
23
 
38
24
  def work
25
+ start_log
26
+
39
27
  begin
40
28
  loop do
41
- sleep(@update_period)
29
+ sleep(@queue.update_period)
42
30
 
43
31
  act
44
32
  end
45
33
  rescue StandardError => e
34
+ raise if @config.test_mode? || !@logger
35
+
46
36
  @logger.fatal(e)
47
- # raise e
48
37
  end
49
38
  end
50
39
 
51
40
  def act
52
- # shuffling and re-sorting to avoid worst case O(n^2) on quicksort (which is default ruby sort)
53
- # when receiving already sorted data. Ideally, we'd use a better algo, but this will do for now
54
- tasks = @persister.read_tasks(@name).reject { |t| t[:run_at].nil? }.shuffle.sort_by { |t| t[:run_at] }
41
+ persister = @config.loader
55
42
 
56
- tasks.first(@max_tasks).each do |task_data|
57
- if Time.now.to_i >= task_data[:run_at].to_i
58
- task_data.merge!(logger: @logger) if @logger
43
+ tasks = fetch_tasks(persister)
59
44
 
60
- tw = TaskWorker.new(task_data)
45
+ tasks.each do |metadata|
46
+ tw = build_worker(metadata)
61
47
 
62
- tw.work
48
+ tw.work
63
49
 
64
- if tw.successful?
65
- @persister.delete_task(task_data[:id])
66
- else
67
- @persister.update_task(tw.to_hash.merge(queue: @name))
68
- end
50
+ if tw.successful?
51
+ persister.delete(metadata.id)
52
+ else
53
+ persister.update(metadata.id, tw.to_h.merge(queue: @queue.name.to_s))
69
54
  end
70
55
  end
71
56
  end
72
57
 
73
58
  def long_name
74
- "#{@name}-queue-worker"
59
+ name = "#{ @queue.name }-queue-worker"
60
+
61
+ name = "#{ @config.prefix }-#{ name }" if @config.prefix
62
+
63
+ name
75
64
  end
76
65
 
77
66
  # Starts a log file and stores the logger within this queue worker.
78
67
  #
79
68
  # Separate from init because logging is context-dependent
80
69
  def start_log
81
- if @log_dir
82
- log_path = Pathname.new("#{@log_dir}/#{long_name}.log")
70
+ return if @logger || !@config.log_dir
83
71
 
84
- log_path.dirname.mkpath
85
- File.open(log_path.to_path, 'a+') do |f|
86
- f.write ''
87
- end
72
+ @logger = Logger.new(log_target, level: @config.log_level)
88
73
 
89
- @logger = Logger.new(log_path.to_path)
74
+ msg = <<~MSG
75
+ ======================================================================
76
+ Started worker process, #{ long_name }, to work off queue #{ @queue.name }.
77
+ Worker pid=#{ Process.pid }; parent pid=#{ Process.ppid }.
78
+ ======================================================================
79
+ MSG
90
80
 
91
- @logger.level = @log_level
81
+ @logger.info("\n#{ msg }")
82
+ end
83
+
84
+ private
85
+
86
+ def build_worker(metadata)
87
+ start_log
92
88
 
93
- @logger.info(['',
94
- '===================================',
95
- "Started worker process, #{long_name}, to work off queue #{@name}.",
96
- "Worker pid=#{Process.pid}; parent pid=#{Process.ppid}.",
97
- '==================================='].join("\n"))
89
+ TaskWorker.new(metadata: metadata,
90
+ queue: @queue,
91
+ scheduler: @scheduler,
92
+ context: @config.context,
93
+ logger: @logger)
94
+ end
95
+
96
+ def log_target
97
+ return $stdout if @config.test_mode?
98
+
99
+ log_path = @config.log_dir + "#{ long_name }.log"
100
+
101
+ write_log_file(log_path)
102
+
103
+ log_path.to_path
104
+ end
105
+
106
+ def write_log_file(log_path)
107
+ @config.log_dir.mkpath
108
+ File.open(log_path.to_path, 'a+') do |f|
109
+ f.write ''
98
110
  end
99
111
  end
100
112
 
101
- # Logs a termination due to parent process termination
102
- #
103
- # == Parameters:
104
- # @param ppid the parent's process id
105
- # @param pid the child's process id
106
- #
107
- def log_parent_exit(ppid:, pid:)
108
- raise RuntimeError.new('Cannot log when logger not defined. Call #start_log first.') unless @logger
113
+ def fetch_tasks(persister)
114
+ tasks = persister.read(queue: @queue.name).map(&:to_h).reject { |t| t[:run_at].nil? }
115
+
116
+ tasks = sort_tasks(tasks)
117
+
118
+ metas = tasks.collect do |t|
119
+ TaskMetaData.new(t.delete_if { |key| !TaskMetaData::EXPECTED_DATA.include?(key) })
120
+ end
121
+
122
+ metas.select(&:runnable?)
123
+ end
109
124
 
110
- @logger.error("Terminated worker process (pid=#{pid}) due to main process (ppid=#{ppid}) disappearing.")
125
+ def sort_tasks(tasks)
126
+ # shuffling and re-sorting to avoid worst case O(n^2) when receiving already sorted data
127
+ # on quicksort (which is default ruby sort). It is not unreasonable that the persister could return sorted
128
+ # results
129
+ # Ideally, we'd use a better algo than qsort for this, but this will do for now
130
+ tasks.shuffle.sort_by { |t| t[:run_at] }.first(@queue.max_tasks)
111
131
  end
112
132
  end
113
133
 
114
134
  class MalformedTaskPersisterError < StandardError
115
135
  end
116
- end
136
+ end
@@ -0,0 +1,171 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Procrastinator
4
+ # A Scheduler object provides the API for client applications to manage delayed tasks.
5
+ #
6
+ # Use #delay to schedule new tasks, #reschedule to alter existing tasks, and #cancel to remove unwanted tasks.
7
+ #
8
+ # @author Robin Miller
9
+ class Scheduler
10
+ extend Forwardable
11
+
12
+ def_delegators :@queue_manager, :act
13
+
14
+ def initialize(config, queue_manager)
15
+ @config = config
16
+ @queue_manager = queue_manager
17
+ end
18
+
19
+ # Records a new task to be executed at the given time.
20
+ #
21
+ # @param queue [Symbol] the symbol identifier for the queue to add a new task on
22
+ # @param run_at [Time, Integer] Optional time when this task should be executed. Defaults to the current time.
23
+ # @param data [Hash, Array] Optional simple data object to be provided to the task upon execution.
24
+ # @param expire_at [Time, Integer] Optional time when the task should be abandoned
25
+ def delay(queue = nil, data: nil, run_at: Time.now.to_i, expire_at: nil)
26
+ verify_queue_arg!(queue)
27
+
28
+ queue = @config.queue.name if @config.single_queue?
29
+
30
+ verify_queue_data!(queue, data)
31
+
32
+ loader.create(queue: queue.to_s,
33
+ run_at: run_at.to_i,
34
+ initial_run_at: run_at.to_i,
35
+ expire_at: expire_at.nil? ? nil : expire_at.to_i,
36
+ data: YAML.dump(data))
37
+ end
38
+
39
+ # Alters an existing task to run at a new time, expire at a new time, or both.
40
+ #
41
+ # Call #to on the result and pass in the new :run_at and/or :expire_at.
42
+ #
43
+ # Example:
44
+ #
45
+ # scheduler.reschedule(:alerts, data: {user_id: 5}).to(run_at: Time.now, expire_at: Time.now + 10)
46
+ #
47
+ # The identifier can include any data field stored in the task loader. Often this is the information in :data.
48
+ #
49
+ # @param queue [Symbol] the symbol identifier for the queue to add a new task on
50
+ # @param identifier [Hash] Some identifying information to find the appropriate task.
51
+ #
52
+ # @see TaskMetaData
53
+ def reschedule(queue, identifier)
54
+ UpdateProxy.new(@config, identifier: identifier.merge(queue: queue.to_s))
55
+ end
56
+
57
+ # Removes an existing task, as located by the givne identifying information.
58
+ #
59
+ # The identifier can include any data field stored in the task loader. Often this is the information in :data.
60
+ #
61
+ # @param queue [Symbol] the symbol identifier for the queue to add a new task on
62
+ # @param identifier [Hash] Some identifying information to find the appropriate task.
63
+ #
64
+ # @see TaskMetaData
65
+ def cancel(queue, identifier)
66
+ tasks = loader.read(identifier.merge(queue: queue.to_s))
67
+
68
+ raise "no task matches search: #{ identifier }" if tasks.empty?
69
+ raise "multiple tasks match search: #{ identifier }" if tasks.size > 1
70
+
71
+ loader.delete(tasks.first[:id])
72
+ end
73
+
74
+ # Provides a more natural syntax for rescheduling tasks
75
+ #
76
+ # @see Scheduler#reschedule
77
+ class UpdateProxy
78
+ def initialize(config, identifier:)
79
+ identifier[:data] = YAML.dump(identifier[:data]) if identifier[:data]
80
+
81
+ @config = config
82
+ @identifier = identifier
83
+ end
84
+
85
+ def to(run_at: nil, expire_at: nil)
86
+ task = fetch_task(@identifier)
87
+
88
+ verify_time_provided(run_at, expire_at)
89
+ validate_run_at(run_at, task[:expire_at], expire_at)
90
+
91
+ new_data = {
92
+ attempts: 0,
93
+ last_error: nil,
94
+ last_error_at: nil
95
+ }
96
+
97
+ new_data = new_data.merge(run_at: run_at.to_i, initial_run_at: run_at.to_i) if run_at
98
+ new_data = new_data.merge(expire_at: expire_at.to_i) if expire_at
99
+
100
+ @config.loader.update(task[:id], new_data)
101
+ end
102
+
103
+ alias at to
104
+
105
+ private
106
+
107
+ def verify_time_provided(run_at, expire_at)
108
+ raise ArgumentError, 'you must provide at least :run_at or :expire_at' if run_at.nil? && expire_at.nil?
109
+ end
110
+
111
+ def validate_run_at(run_at, saved_expire_at, expire_at)
112
+ return unless run_at
113
+
114
+ after_new_expire = expire_at && run_at.to_i > expire_at.to_i
115
+
116
+ raise "given run_at (#{ run_at }) is later than given expire_at (#{ expire_at })" if after_new_expire
117
+
118
+ after_old_expire = saved_expire_at && run_at.to_i > saved_expire_at
119
+
120
+ raise "given run_at (#{ run_at }) is later than saved expire_at (#{ saved_expire_at })" if after_old_expire
121
+ end
122
+
123
+ def fetch_task(identifier)
124
+ tasks = @config.loader.read(identifier)
125
+
126
+ raise "no task found matching #{ identifier }" if tasks.nil? || tasks.empty?
127
+ raise "too many (#{ tasks.size }) tasks match #{ identifier }. Found: #{ tasks }" if tasks.size > 1
128
+
129
+ tasks.first
130
+ end
131
+ end
132
+
133
+ private
134
+
135
+ # Scheduler must always get the loader indirectly. If it saves the loader to an instance variable,
136
+ # then that could hold a reference to a bad (ie. gone) connection on the previous process
137
+ def loader
138
+ @config.loader
139
+ end
140
+
141
+ def verify_queue_arg!(queue_name)
142
+ raise ArgumentError, <<~ERR if !queue_name.nil? && !queue_name.is_a?(Symbol)
143
+ must provide a queue name as the first argument. Received: #{ queue_name }
144
+ ERR
145
+
146
+ raise ArgumentError, <<~ERR if queue_name.nil? && !@config.single_queue?
147
+ queue must be specified when more than one is registered. Defined queues are: #{ @config.queues_string }
148
+ ERR
149
+ end
150
+
151
+ def verify_queue_data!(queue_name, data)
152
+ queue = @config.queue(name: queue_name)
153
+
154
+ unless queue
155
+ queue_list = @config.queues_string
156
+ raise ArgumentError, "there is no :#{ queue_name } queue registered. Defined queues are: #{ queue_list }"
157
+ end
158
+
159
+ if data.nil?
160
+ if queue.task_class.method_defined?(:data=)
161
+ raise ArgumentError, "task #{ queue.task_class } expects to receive :data. Provide :data to #delay."
162
+ end
163
+ elsif !queue.task_class.method_defined?(:data=)
164
+ raise ArgumentError, <<~ERROR
165
+ task #{ queue.task_class } does not import :data. Add this in your class definition:
166
+ task_attr :data
167
+ ERROR
168
+ end
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Procrastinator
4
+ # Module to be included by user-defined task classes. It provides some extra error checking and a convenient way
5
+ # for the task class to access additional information (data, logger, etc) from Procrastinator.
6
+ #
7
+ # If you are averse to including this in your task class, you can just declare an attr_accessor for the
8
+ # information you want Procrastinator to feed your task.
9
+ #
10
+ # @author Robin Miller
11
+ module Task
12
+ KNOWN_ATTRIBUTES = [:logger, :context, :data, :scheduler].freeze
13
+
14
+ def self.included(base)
15
+ base.extend(TaskClassMethods)
16
+ end
17
+
18
+ def respond_to_missing?(name, include_private)
19
+ super
20
+ end
21
+
22
+ def method_missing(method_name, *args, &block)
23
+ if KNOWN_ATTRIBUTES.include?(method_name)
24
+ raise NameError, "To access Procrastinator::Task attribute :#{ method_name }, " \
25
+ "call task_attr(:#{ method_name }) in your class definition."
26
+ end
27
+
28
+ super
29
+ end
30
+
31
+ # Module that provides the task_attr class method for task definitions to declare their expected information.
32
+ module TaskClassMethods
33
+ def task_attr(*fields)
34
+ attr_list = KNOWN_ATTRIBUTES.collect { |a| ':' + a.to_s }.join(', ')
35
+
36
+ fields.each do |field|
37
+ err = "Unknown Procrastinator::Task attribute :#{ field }. " \
38
+ "Importable attributes are: #{ attr_list }"
39
+ raise ArgumentError, err unless KNOWN_ATTRIBUTES.include?(field)
40
+ end
41
+
42
+ attr_accessor(*fields)
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,128 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Procrastinator
4
+ # TaskMetaData objects are State Patterns that record information about the work done on a particular task.
5
+ #
6
+ # It contains the specific information needed to run a task instance. Users define a task class, which describes
7
+ # the "how" of a task and TaskMetaData represents the "what" and "when".
8
+ #
9
+ # It contains task-specific data, timing information, and error records.
10
+ #
11
+ # All of its state is read-only.
12
+ #
13
+ # @author Robin Miller
14
+ #
15
+ # @!attribute [r] :id
16
+ # @return [Integer] the unique identifier for this task
17
+ # @!attribute [r] :run_at
18
+ # @return [Integer] Linux epoch timestamp of when to attempt this task next
19
+ # @!attribute [r] :initial_run_at
20
+ # @return [Integer] Linux epoch timestamp of the original value for run_at
21
+ # @!attribute [r] :expire_at
22
+ # @return [Integer] Linux epoch timestamp of when to consider this task obsolete
23
+ # @!attribute [r] :attempts
24
+ # @return [Integer] The number of times this task has been attempted
25
+ # @!attribute [r] :last_error
26
+ # @return [String] The message and stack trace of the error encountered on the most recent failed attempt
27
+ # @!attribute [r] :last_fail_at
28
+ # @return [Integer] Linux epoch timestamp of when the last_error was recorded
29
+ # @!attribute [r] :data
30
+ # @return [String] User-provided data serialized to string using YAML
31
+ class TaskMetaData
32
+ # These are the attributes expected to be in the persistence mechanism
33
+ EXPECTED_DATA = [:id, :run_at, :initial_run_at, :expire_at, :attempts, :last_error, :last_fail_at, :data].freeze
34
+
35
+ attr_reader(*EXPECTED_DATA)
36
+
37
+ def initialize(id: nil,
38
+ run_at: nil,
39
+ initial_run_at: nil,
40
+ expire_at: nil,
41
+ attempts: 0,
42
+ last_error: nil,
43
+ last_fail_at: nil,
44
+ data: nil)
45
+ @id = id
46
+ @run_at = run_at.nil? ? nil : run_at.to_i
47
+ @initial_run_at = initial_run_at.to_i
48
+ @expire_at = expire_at.nil? ? nil : expire_at.to_i
49
+ @attempts = attempts || 0
50
+ @last_error = last_error
51
+ @last_fail_at = last_fail_at
52
+ @data = data ? YAML.safe_load(data, [Symbol, Date]) : nil
53
+ end
54
+
55
+ def init_task(queue)
56
+ @data ? queue.task_class.new(@data) : queue.task_class.new
57
+ end
58
+
59
+ def add_attempt
60
+ @attempts += 1
61
+ end
62
+
63
+ def clear_fails
64
+ @last_error = nil
65
+ @last_fail_at = nil
66
+ end
67
+
68
+ def fail(msg, final: false)
69
+ @last_fail_at = Time.now.to_i
70
+ @last_error = msg
71
+ @run_at = nil if final
72
+ end
73
+
74
+ def final_fail?(queue)
75
+ too_many_fails?(queue) || expired?
76
+ end
77
+
78
+ def expired?
79
+ !@expire_at.nil? && Time.now.to_i > @expire_at
80
+ end
81
+
82
+ def too_many_fails?(queue)
83
+ !queue.max_attempts.nil? && @attempts >= queue.max_attempts
84
+ end
85
+
86
+ def runnable?
87
+ !(@run_at.nil? || Time.now.to_i < @run_at)
88
+ end
89
+
90
+ def successful?
91
+ raise 'you cannot check for success before running #work' if !expired? && @attempts <= 0
92
+
93
+ !expired? && @last_error.nil? && @last_fail_at.nil?
94
+ end
95
+
96
+ # TODO: This cop for ** is currently incorrect. This disable can be removed once they fix it.
97
+ # rubocop:disable Layout/SpaceAroundOperators
98
+ def reschedule
99
+ # (30 + n_attempts^4) seconds is chosen to rapidly expand
100
+ # but with the baseline of 30s to avoid hitting the disk too frequently.
101
+ @run_at += 30 + (@attempts ** 4) unless @run_at.nil?
102
+ end
103
+
104
+ # rubocop:enable Layout/SpaceAroundOperators
105
+
106
+ def serialized_data
107
+ YAML.dump(@data)
108
+ end
109
+
110
+ def verify_expiry!
111
+ raise TaskExpiredError, "task is over its expiry time of #{ @expire_at }" if expired?
112
+ end
113
+
114
+ def to_h
115
+ {id: @id,
116
+ run_at: @run_at,
117
+ initial_run_at: @initial_run_at,
118
+ expire_at: @expire_at,
119
+ attempts: @attempts,
120
+ last_fail_at: @last_fail_at,
121
+ last_error: @last_error,
122
+ data: serialized_data}
123
+ end
124
+ end
125
+
126
+ class TaskExpiredError < StandardError
127
+ end
128
+ end