procrastinator 0.6.1 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,116 +1,136 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Procrastinator
4
+ # A QueueWorker checks for tasks to run from the loader defined in the provided config and executes them,
5
+ # updating information in the task loader as necessary.
6
+ #
7
+ # @author Robin Miller
2
8
  class QueueWorker
3
- DEFAULT_TIMEOUT = 3600 # in seconds; one hour total
4
- DEFAULT_MAX_ATTEMPTS = 20
5
- DEFAULT_UPDATE_PERIOD = 10 # seconds
6
- DEFAULT_MAX_TASKS = 10
7
-
8
- attr_reader :name, :timeout, :max_attempts, :update_period, :max_tasks
9
-
10
- # Timeout is in seconds
11
- def initialize(name:,
12
- persister:,
13
- log_dir: nil,
14
- log_level: Logger::INFO,
15
- max_attempts: DEFAULT_MAX_ATTEMPTS,
16
- timeout: DEFAULT_TIMEOUT,
17
- update_period: DEFAULT_UPDATE_PERIOD,
18
- max_tasks: DEFAULT_MAX_TASKS)
19
- raise ArgumentError.new('Queue name may not be nil') unless name
20
- raise ArgumentError.new('Persister may not be nil') unless persister
21
-
22
- raise(MalformedTaskPersisterError.new('The supplied IO object must respond to #read_tasks')) unless persister.respond_to? :read_tasks
23
- raise(MalformedTaskPersisterError.new('The supplied IO object must respond to #update_task')) unless persister.respond_to? :update_task
24
- raise(MalformedTaskPersisterError.new('The supplied IO object must respond to #delete_task')) unless persister.respond_to? :delete_task
25
-
26
- @name = name.to_s.gsub(/\s/, '_').to_sym
27
- @timeout = timeout
28
- @max_attempts = max_attempts
29
- @update_period = update_period
30
- @max_tasks = max_tasks
31
- @persister = persister
32
- @log_dir = log_dir
33
- @log_level = log_level
9
+ extend Forwardable
34
10
 
35
- start_log
11
+ def_delegators :@queue, :name
12
+
13
+ # expected methods for all persistence strategies
14
+ PERSISTER_METHODS = [:read, :update, :delete].freeze
15
+
16
+ def initialize(queue:, config:, scheduler: nil)
17
+ @queue = queue
18
+ @config = config
19
+ @scheduler = scheduler
20
+
21
+ @logger = nil
36
22
  end
37
23
 
38
24
  def work
25
+ start_log
26
+
39
27
  begin
40
28
  loop do
41
- sleep(@update_period)
29
+ sleep(@queue.update_period)
42
30
 
43
31
  act
44
32
  end
45
33
  rescue StandardError => e
34
+ raise if @config.test_mode? || !@logger
35
+
46
36
  @logger.fatal(e)
47
- # raise e
48
37
  end
49
38
  end
50
39
 
51
40
  def act
52
- # shuffling and re-sorting to avoid worst case O(n^2) on quicksort (which is default ruby sort)
53
- # when receiving already sorted data. Ideally, we'd use a better algo, but this will do for now
54
- tasks = @persister.read_tasks(@name).reject { |t| t[:run_at].nil? }.shuffle.sort_by { |t| t[:run_at] }
41
+ persister = @config.loader
55
42
 
56
- tasks.first(@max_tasks).each do |task_data|
57
- if Time.now.to_i >= task_data[:run_at].to_i
58
- task_data.merge!(logger: @logger) if @logger
43
+ tasks = fetch_tasks(persister)
59
44
 
60
- tw = TaskWorker.new(task_data)
45
+ tasks.each do |metadata|
46
+ tw = build_worker(metadata)
61
47
 
62
- tw.work
48
+ tw.work
63
49
 
64
- if tw.successful?
65
- @persister.delete_task(task_data[:id])
66
- else
67
- @persister.update_task(tw.to_hash.merge(queue: @name))
68
- end
50
+ if tw.successful?
51
+ persister.delete(metadata.id)
52
+ else
53
+ persister.update(metadata.id, tw.to_h.merge(queue: @queue.name.to_s))
69
54
  end
70
55
  end
71
56
  end
72
57
 
73
58
  def long_name
74
- "#{@name}-queue-worker"
59
+ name = "#{ @queue.name }-queue-worker"
60
+
61
+ name = "#{ @config.prefix }-#{ name }" if @config.prefix
62
+
63
+ name
75
64
  end
76
65
 
77
66
  # Starts a log file and stores the logger within this queue worker.
78
67
  #
79
68
  # Separate from init because logging is context-dependent
80
69
  def start_log
81
- if @log_dir
82
- log_path = Pathname.new("#{@log_dir}/#{long_name}.log")
70
+ return if @logger || !@config.log_dir
83
71
 
84
- log_path.dirname.mkpath
85
- File.open(log_path.to_path, 'a+') do |f|
86
- f.write ''
87
- end
72
+ @logger = Logger.new(log_target, level: @config.log_level)
88
73
 
89
- @logger = Logger.new(log_path.to_path)
74
+ msg = <<~MSG
75
+ ======================================================================
76
+ Started worker process, #{ long_name }, to work off queue #{ @queue.name }.
77
+ Worker pid=#{ Process.pid }; parent pid=#{ Process.ppid }.
78
+ ======================================================================
79
+ MSG
90
80
 
91
- @logger.level = @log_level
81
+ @logger.info("\n#{ msg }")
82
+ end
83
+
84
+ private
85
+
86
+ def build_worker(metadata)
87
+ start_log
92
88
 
93
- @logger.info(['',
94
- '===================================',
95
- "Started worker process, #{long_name}, to work off queue #{@name}.",
96
- "Worker pid=#{Process.pid}; parent pid=#{Process.ppid}.",
97
- '==================================='].join("\n"))
89
+ TaskWorker.new(metadata: metadata,
90
+ queue: @queue,
91
+ scheduler: @scheduler,
92
+ context: @config.context,
93
+ logger: @logger)
94
+ end
95
+
96
+ def log_target
97
+ return $stdout if @config.test_mode?
98
+
99
+ log_path = @config.log_dir + "#{ long_name }.log"
100
+
101
+ write_log_file(log_path)
102
+
103
+ log_path.to_path
104
+ end
105
+
106
+ def write_log_file(log_path)
107
+ @config.log_dir.mkpath
108
+ File.open(log_path.to_path, 'a+') do |f|
109
+ f.write ''
98
110
  end
99
111
  end
100
112
 
101
- # Logs a termination due to parent process termination
102
- #
103
- # == Parameters:
104
- # @param ppid the parent's process id
105
- # @param pid the child's process id
106
- #
107
- def log_parent_exit(ppid:, pid:)
108
- raise RuntimeError.new('Cannot log when logger not defined. Call #start_log first.') unless @logger
113
+ def fetch_tasks(persister)
114
+ tasks = persister.read(queue: @queue.name).map(&:to_h).reject { |t| t[:run_at].nil? }
115
+
116
+ tasks = sort_tasks(tasks)
117
+
118
+ metas = tasks.collect do |t|
119
+ TaskMetaData.new(t.delete_if { |key| !TaskMetaData::EXPECTED_DATA.include?(key) })
120
+ end
121
+
122
+ metas.select(&:runnable?)
123
+ end
109
124
 
110
- @logger.error("Terminated worker process (pid=#{pid}) due to main process (ppid=#{ppid}) disappearing.")
125
+ def sort_tasks(tasks)
126
+ # shuffling and re-sorting to avoid worst case O(n^2) when receiving already sorted data
127
+ # on quicksort (which is default ruby sort). It is not unreasonable that the persister could return sorted
128
+ # results
129
+ # Ideally, we'd use a better algo than qsort for this, but this will do for now
130
+ tasks.shuffle.sort_by { |t| t[:run_at] }.first(@queue.max_tasks)
111
131
  end
112
132
  end
113
133
 
114
134
  class MalformedTaskPersisterError < StandardError
115
135
  end
116
- end
136
+ end
@@ -0,0 +1,171 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Procrastinator
4
+ # A Scheduler object provides the API for client applications to manage delayed tasks.
5
+ #
6
+ # Use #delay to schedule new tasks, #reschedule to alter existing tasks, and #cancel to remove unwanted tasks.
7
+ #
8
+ # @author Robin Miller
9
+ class Scheduler
10
+ extend Forwardable
11
+
12
+ def_delegators :@queue_manager, :act
13
+
14
+ def initialize(config, queue_manager)
15
+ @config = config
16
+ @queue_manager = queue_manager
17
+ end
18
+
19
+ # Records a new task to be executed at the given time.
20
+ #
21
+ # @param queue [Symbol] the symbol identifier for the queue to add a new task on
22
+ # @param run_at [Time, Integer] Optional time when this task should be executed. Defaults to the current time.
23
+ # @param data [Hash, Array] Optional simple data object to be provided to the task upon execution.
24
+ # @param expire_at [Time, Integer] Optional time when the task should be abandoned
25
+ def delay(queue = nil, data: nil, run_at: Time.now.to_i, expire_at: nil)
26
+ verify_queue_arg!(queue)
27
+
28
+ queue = @config.queue.name if @config.single_queue?
29
+
30
+ verify_queue_data!(queue, data)
31
+
32
+ loader.create(queue: queue.to_s,
33
+ run_at: run_at.to_i,
34
+ initial_run_at: run_at.to_i,
35
+ expire_at: expire_at.nil? ? nil : expire_at.to_i,
36
+ data: YAML.dump(data))
37
+ end
38
+
39
+ # Alters an existing task to run at a new time, expire at a new time, or both.
40
+ #
41
+ # Call #to on the result and pass in the new :run_at and/or :expire_at.
42
+ #
43
+ # Example:
44
+ #
45
+ # scheduler.reschedule(:alerts, data: {user_id: 5}).to(run_at: Time.now, expire_at: Time.now + 10)
46
+ #
47
+ # The identifier can include any data field stored in the task loader. Often this is the information in :data.
48
+ #
49
+ # @param queue [Symbol] the symbol identifier for the queue to add a new task on
50
+ # @param identifier [Hash] Some identifying information to find the appropriate task.
51
+ #
52
+ # @see TaskMetaData
53
+ def reschedule(queue, identifier)
54
+ UpdateProxy.new(@config, identifier: identifier.merge(queue: queue.to_s))
55
+ end
56
+
57
+ # Removes an existing task, as located by the givne identifying information.
58
+ #
59
+ # The identifier can include any data field stored in the task loader. Often this is the information in :data.
60
+ #
61
+ # @param queue [Symbol] the symbol identifier for the queue to add a new task on
62
+ # @param identifier [Hash] Some identifying information to find the appropriate task.
63
+ #
64
+ # @see TaskMetaData
65
+ def cancel(queue, identifier)
66
+ tasks = loader.read(identifier.merge(queue: queue.to_s))
67
+
68
+ raise "no task matches search: #{ identifier }" if tasks.empty?
69
+ raise "multiple tasks match search: #{ identifier }" if tasks.size > 1
70
+
71
+ loader.delete(tasks.first[:id])
72
+ end
73
+
74
+ # Provides a more natural syntax for rescheduling tasks
75
+ #
76
+ # @see Scheduler#reschedule
77
+ class UpdateProxy
78
+ def initialize(config, identifier:)
79
+ identifier[:data] = YAML.dump(identifier[:data]) if identifier[:data]
80
+
81
+ @config = config
82
+ @identifier = identifier
83
+ end
84
+
85
+ def to(run_at: nil, expire_at: nil)
86
+ task = fetch_task(@identifier)
87
+
88
+ verify_time_provided(run_at, expire_at)
89
+ validate_run_at(run_at, task[:expire_at], expire_at)
90
+
91
+ new_data = {
92
+ attempts: 0,
93
+ last_error: nil,
94
+ last_error_at: nil
95
+ }
96
+
97
+ new_data = new_data.merge(run_at: run_at.to_i, initial_run_at: run_at.to_i) if run_at
98
+ new_data = new_data.merge(expire_at: expire_at.to_i) if expire_at
99
+
100
+ @config.loader.update(task[:id], new_data)
101
+ end
102
+
103
+ alias at to
104
+
105
+ private
106
+
107
+ def verify_time_provided(run_at, expire_at)
108
+ raise ArgumentError, 'you must provide at least :run_at or :expire_at' if run_at.nil? && expire_at.nil?
109
+ end
110
+
111
+ def validate_run_at(run_at, saved_expire_at, expire_at)
112
+ return unless run_at
113
+
114
+ after_new_expire = expire_at && run_at.to_i > expire_at.to_i
115
+
116
+ raise "given run_at (#{ run_at }) is later than given expire_at (#{ expire_at })" if after_new_expire
117
+
118
+ after_old_expire = saved_expire_at && run_at.to_i > saved_expire_at
119
+
120
+ raise "given run_at (#{ run_at }) is later than saved expire_at (#{ saved_expire_at })" if after_old_expire
121
+ end
122
+
123
+ def fetch_task(identifier)
124
+ tasks = @config.loader.read(identifier)
125
+
126
+ raise "no task found matching #{ identifier }" if tasks.nil? || tasks.empty?
127
+ raise "too many (#{ tasks.size }) tasks match #{ identifier }. Found: #{ tasks }" if tasks.size > 1
128
+
129
+ tasks.first
130
+ end
131
+ end
132
+
133
+ private
134
+
135
+ # Scheduler must always get the loader indirectly. If it saves the loader to an instance variable,
136
+ # then that could hold a reference to a bad (ie. gone) connection on the previous process
137
+ def loader
138
+ @config.loader
139
+ end
140
+
141
+ def verify_queue_arg!(queue_name)
142
+ raise ArgumentError, <<~ERR if !queue_name.nil? && !queue_name.is_a?(Symbol)
143
+ must provide a queue name as the first argument. Received: #{ queue_name }
144
+ ERR
145
+
146
+ raise ArgumentError, <<~ERR if queue_name.nil? && !@config.single_queue?
147
+ queue must be specified when more than one is registered. Defined queues are: #{ @config.queues_string }
148
+ ERR
149
+ end
150
+
151
+ def verify_queue_data!(queue_name, data)
152
+ queue = @config.queue(name: queue_name)
153
+
154
+ unless queue
155
+ queue_list = @config.queues_string
156
+ raise ArgumentError, "there is no :#{ queue_name } queue registered. Defined queues are: #{ queue_list }"
157
+ end
158
+
159
+ if data.nil?
160
+ if queue.task_class.method_defined?(:data=)
161
+ raise ArgumentError, "task #{ queue.task_class } expects to receive :data. Provide :data to #delay."
162
+ end
163
+ elsif !queue.task_class.method_defined?(:data=)
164
+ raise ArgumentError, <<~ERROR
165
+ task #{ queue.task_class } does not import :data. Add this in your class definition:
166
+ task_attr :data
167
+ ERROR
168
+ end
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Procrastinator
4
+ # Module to be included by user-defined task classes. It provides some extra error checking and a convenient way
5
+ # for the task class to access additional information (data, logger, etc) from Procrastinator.
6
+ #
7
+ # If you are averse to including this in your task class, you can just declare an attr_accessor for the
8
+ # information you want Procrastinator to feed your task.
9
+ #
10
+ # @author Robin Miller
11
+ module Task
12
+ KNOWN_ATTRIBUTES = [:logger, :context, :data, :scheduler].freeze
13
+
14
+ def self.included(base)
15
+ base.extend(TaskClassMethods)
16
+ end
17
+
18
+ def respond_to_missing?(name, include_private)
19
+ super
20
+ end
21
+
22
+ def method_missing(method_name, *args, &block)
23
+ if KNOWN_ATTRIBUTES.include?(method_name)
24
+ raise NameError, "To access Procrastinator::Task attribute :#{ method_name }, " \
25
+ "call task_attr(:#{ method_name }) in your class definition."
26
+ end
27
+
28
+ super
29
+ end
30
+
31
+ # Module that provides the task_attr class method for task definitions to declare their expected information.
32
+ module TaskClassMethods
33
+ def task_attr(*fields)
34
+ attr_list = KNOWN_ATTRIBUTES.collect { |a| ':' + a.to_s }.join(', ')
35
+
36
+ fields.each do |field|
37
+ err = "Unknown Procrastinator::Task attribute :#{ field }. " \
38
+ "Importable attributes are: #{ attr_list }"
39
+ raise ArgumentError, err unless KNOWN_ATTRIBUTES.include?(field)
40
+ end
41
+
42
+ attr_accessor(*fields)
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,128 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Procrastinator
4
+ # TaskMetaData objects are State Patterns that record information about the work done on a particular task.
5
+ #
6
+ # It contains the specific information needed to run a task instance. Users define a task class, which describes
7
+ # the "how" of a task and TaskMetaData represents the "what" and "when".
8
+ #
9
+ # It contains task-specific data, timing information, and error records.
10
+ #
11
+ # All of its state is read-only.
12
+ #
13
+ # @author Robin Miller
14
+ #
15
+ # @!attribute [r] :id
16
+ # @return [Integer] the unique identifier for this task
17
+ # @!attribute [r] :run_at
18
+ # @return [Integer] Linux epoch timestamp of when to attempt this task next
19
+ # @!attribute [r] :initial_run_at
20
+ # @return [Integer] Linux epoch timestamp of the original value for run_at
21
+ # @!attribute [r] :expire_at
22
+ # @return [Integer] Linux epoch timestamp of when to consider this task obsolete
23
+ # @!attribute [r] :attempts
24
+ # @return [Integer] The number of times this task has been attempted
25
+ # @!attribute [r] :last_error
26
+ # @return [String] The message and stack trace of the error encountered on the most recent failed attempt
27
+ # @!attribute [r] :last_fail_at
28
+ # @return [Integer] Linux epoch timestamp of when the last_error was recorded
29
+ # @!attribute [r] :data
30
+ # @return [String] User-provided data serialized to string using YAML
31
+ class TaskMetaData
32
+ # These are the attributes expected to be in the persistence mechanism
33
+ EXPECTED_DATA = [:id, :run_at, :initial_run_at, :expire_at, :attempts, :last_error, :last_fail_at, :data].freeze
34
+
35
+ attr_reader(*EXPECTED_DATA)
36
+
37
+ def initialize(id: nil,
38
+ run_at: nil,
39
+ initial_run_at: nil,
40
+ expire_at: nil,
41
+ attempts: 0,
42
+ last_error: nil,
43
+ last_fail_at: nil,
44
+ data: nil)
45
+ @id = id
46
+ @run_at = run_at.nil? ? nil : run_at.to_i
47
+ @initial_run_at = initial_run_at.to_i
48
+ @expire_at = expire_at.nil? ? nil : expire_at.to_i
49
+ @attempts = attempts || 0
50
+ @last_error = last_error
51
+ @last_fail_at = last_fail_at
52
+ @data = data ? YAML.safe_load(data, [Symbol, Date]) : nil
53
+ end
54
+
55
+ def init_task(queue)
56
+ @data ? queue.task_class.new(@data) : queue.task_class.new
57
+ end
58
+
59
+ def add_attempt
60
+ @attempts += 1
61
+ end
62
+
63
+ def clear_fails
64
+ @last_error = nil
65
+ @last_fail_at = nil
66
+ end
67
+
68
+ def fail(msg, final: false)
69
+ @last_fail_at = Time.now.to_i
70
+ @last_error = msg
71
+ @run_at = nil if final
72
+ end
73
+
74
+ def final_fail?(queue)
75
+ too_many_fails?(queue) || expired?
76
+ end
77
+
78
+ def expired?
79
+ !@expire_at.nil? && Time.now.to_i > @expire_at
80
+ end
81
+
82
+ def too_many_fails?(queue)
83
+ !queue.max_attempts.nil? && @attempts >= queue.max_attempts
84
+ end
85
+
86
+ def runnable?
87
+ !(@run_at.nil? || Time.now.to_i < @run_at)
88
+ end
89
+
90
+ def successful?
91
+ raise 'you cannot check for success before running #work' if !expired? && @attempts <= 0
92
+
93
+ !expired? && @last_error.nil? && @last_fail_at.nil?
94
+ end
95
+
96
+ # TODO: This cop for ** is currently incorrect. This disable can be removed once they fix it.
97
+ # rubocop:disable Layout/SpaceAroundOperators
98
+ def reschedule
99
+ # (30 + n_attempts^4) seconds is chosen to rapidly expand
100
+ # but with the baseline of 30s to avoid hitting the disk too frequently.
101
+ @run_at += 30 + (@attempts ** 4) unless @run_at.nil?
102
+ end
103
+
104
+ # rubocop:enable Layout/SpaceAroundOperators
105
+
106
+ def serialized_data
107
+ YAML.dump(@data)
108
+ end
109
+
110
+ def verify_expiry!
111
+ raise TaskExpiredError, "task is over its expiry time of #{ @expire_at }" if expired?
112
+ end
113
+
114
+ def to_h
115
+ {id: @id,
116
+ run_at: @run_at,
117
+ initial_run_at: @initial_run_at,
118
+ expire_at: @expire_at,
119
+ attempts: @attempts,
120
+ last_fail_at: @last_fail_at,
121
+ last_error: @last_error,
122
+ data: serialized_data}
123
+ end
124
+ end
125
+
126
+ class TaskExpiredError < StandardError
127
+ end
128
+ end