procrastinator 0.9.0 → 1.0.0.pre.rc2

Sign up to get free protection for your applications and to get access to all the features.
data/RELEASE_NOTES.md ADDED
@@ -0,0 +1,44 @@
1
+ # Release Notes
2
+
3
+ ## 1.0.0 ( )
4
+
5
+ ### Major Changes
6
+
7
+ * Minimum supported Ruby is now 2.4
8
+ * Added generic `Procrastinator::Config#log_with`
9
+ * Removed `Procrastinator::Config#log_inside`
10
+ * Removed `Procrastinator::Config#log_at_level`
11
+ * falsey log level is now the control for whether logging occurs, instead of falsey log directory
12
+ * Queues are managed as threads rather than sub processes
13
+ * These unnecessary methods no longer exist:
14
+ * `Procrastinator.test_mode`
15
+ * `Procrastinator::Config#enable_test_mode`
16
+ * `Procrastinator::Config#test_mode?`
17
+ * `Procrastinator::Config#test_mode`
18
+ * `Procrastinator::Config#prefix`
19
+ * `Procrastinator::Config#pid_dir`
20
+ * `Procrastinator::Config#each_process`
21
+ * `Procrastinator::Config#run_process_block`
22
+ * Removed use of envvar `PROCRASTINATOR_STOP`
23
+ * `Procrastinator::QueueManager` is merged into `Procrastinator::Scheduler`
24
+ * Removed rake task to halt queue processes
25
+ * Renamed `Procrastinator::Config#provide_context` to `provide_container`
26
+ * You must now call `Scheduler#work` on the result of `Procrastinator.config`
27
+ * Use a dedicated process monitor (like `monit`) instead in production environments
28
+ * Suuply a block to `daemonized!` to run code in the spawned process.
29
+ * `max_tasks` is removed as it only added concurrency complexity
30
+ * Data is now stored as JSON instead of YAML
31
+ * Added with_store that applies its settings to its block
32
+ * `load_with` has been removed
33
+ * Removed `task_attr` and `Procrastinator::Task` module. Tasks is now duck-type checked for accessors instead.
34
+
35
+ ### Minor Changes
36
+
37
+ * Started release notes file
38
+ * Updated development gems
39
+ * Logs now include the queue name in log lines
40
+ * Logs can now set the shift size or age (like Ruby's Logger)
41
+
42
+ ### Bugfixes
43
+
44
+ * none
@@ -1,45 +1,59 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'time'
4
+
3
5
  module Procrastinator
4
6
  # Configuration object (State Pattern) used to coordinate settings across
5
7
  # various components within Procrastinator.
6
8
  #
7
- # All of its state is read-only, set using the methods in the DSL module below.
9
+ # It is immutable after init; use the config DSL in the configuration block to set its state.
8
10
  #
9
11
  # @author Robin Miller
10
12
  #
11
- # @!attribute [r] :test_mode?
12
- # @return [Boolean] Whether test mode is enabled
13
13
  # @!attribute [r] :queues
14
14
  # @return [Array] List of defined queues
15
- # @!attribute [r] :context
16
- # @return [Object] Provided context object that will be forwarded to tasks
17
- # @!attribute [r] :loader
18
- # @return [Object] Provided persistence strategy object to use for task I/O
15
+ # @!attribute [r] :container
16
+ # @return [Object] Container object that will be forwarded to tasks
19
17
  # @!attribute [r] :log_dir
20
18
  # @return [Pathname] Directory to write log files in
21
19
  # @!attribute [r] :log_level
22
20
  # @return [Integer] Logging level to use
23
- # @!attribute [r] :prefix
24
- # @return [String] The prefix to prepend to process names
25
- # @!attribute [r] :pid_dir
26
- # @return [Pathname] Directory to write process ID records in
21
+ # @!attribute [r] :log_shift_age
22
+ # @return [Integer] Number of previous files to keep (see Ruby Logger for details)
23
+ # @!attribute [r] :log_shift_size
24
+ # @return [Integer] Filesize before rotating to a new logfile (see Ruby Logger for details)
27
25
  class Config
28
- attr_reader :queues, :log_dir, :log_level, :prefix, :test_mode, :context, :loader, :pid_dir
29
- alias test_mode? test_mode
30
-
31
- DEFAULT_LOG_DIRECTORY = 'log/'
32
- DEFAULT_PID_DIRECTORY = 'pid/'
26
+ attr_reader :queues, :log_dir, :log_level, :log_shift_age, :log_shift_size, :container
27
+
28
+ DEFAULT_LOG_DIRECTORY = Pathname.new('log').freeze
29
+ DEFAULT_LOG_SHIFT_AGE = 0
30
+ DEFAULT_LOG_SHIFT_SIZE = 2 ** 20 # 1 MB
31
+ DEFAULT_LOG_FORMATTER = proc do |severity, datetime, progname, msg|
32
+ [datetime.iso8601(8),
33
+ severity,
34
+ "#{ progname } (#{ Process.pid }):",
35
+ msg].join("\t") << "\n"
36
+ end
33
37
 
34
38
  def initialize
35
- @test_mode = false
36
- @queues = []
37
- @loader = nil
38
- @context = nil
39
- @subprocess_block = nil
40
- @log_dir = Pathname.new(DEFAULT_LOG_DIRECTORY)
41
- @log_level = Logger::INFO
42
- @pid_dir = Pathname.new(DEFAULT_PID_DIRECTORY)
39
+ @queues = []
40
+ @container = nil
41
+ @log_dir = DEFAULT_LOG_DIRECTORY
42
+ @log_level = Logger::INFO
43
+ @log_shift_age = DEFAULT_LOG_SHIFT_AGE
44
+ @log_shift_size = DEFAULT_LOG_SHIFT_SIZE
45
+
46
+ with_store(csv: TaskStore::SimpleCommaStore::DEFAULT_FILE) do
47
+ if block_given?
48
+ yield(self)
49
+ raise SetupError, SetupError::ERR_NO_QUEUE if @queues.empty?
50
+ end
51
+ end
52
+
53
+ @log_dir = @log_dir.expand_path
54
+
55
+ @queues.freeze
56
+ freeze
43
57
  end
44
58
 
45
59
  # Collection of all of the methods intended for use within Procrastinator.setup
@@ -47,139 +61,89 @@ module Procrastinator
47
61
  # @see Procrastinator
48
62
  module DSL
49
63
  # Assigns a task loader
50
- # It should be called in an each_process block as well so that they get
51
- # distinct resources (eg. DB connections) from the parent process.
52
- def load_with(loader)
53
- if loader.is_a? Hash
54
- unless loader.key? :location
55
- raise ArgumentError, 'Must pass keyword :location if specifying a location for CSV file'
56
- end
57
-
58
- loader = Loader::CSVLoader.new(loader[:location])
59
- end
60
-
61
- raise MalformedTaskLoaderError, 'task loader cannot be nil' if loader.nil?
64
+ def with_store(store)
65
+ raise(ArgumentError, 'with_store must be provided a block') unless block_given?
62
66
 
63
- [:read, :create, :update, :delete].each do |method|
64
- unless loader.respond_to? method
65
- raise MalformedTaskLoaderError, "task loader #{ loader.class } must respond to ##{ method }"
66
- end
67
- end
68
-
69
- @loader = loader
67
+ old_store = @default_store
68
+ @default_store = interpret_store(store)
69
+ yield
70
+ @default_store = old_store
70
71
  end
71
72
 
72
- def provide_context(context)
73
- @context = context
74
- end
75
-
76
- # Accepts a block that will be executed on the queue sub-processes. Use it to control resource allocations.
77
- def each_process(prefix: nil, pid_dir: DEFAULT_PID_DIRECTORY, &block)
78
- @prefix = prefix
79
- @subprocess_block = block
80
- @pid_dir = Pathname.new(pid_dir)
73
+ def provide_container(container)
74
+ @container = container
81
75
  end
82
76
 
83
77
  def define_queue(name, task_class, properties = {})
84
78
  raise ArgumentError, 'queue name cannot be nil' if name.nil?
85
79
  raise ArgumentError, 'queue task class cannot be nil' if task_class.nil?
86
80
 
87
- verify_task_class(task_class)
88
-
89
- @queues << Queue.new(properties.merge(name: name, task_class: task_class))
90
- end
91
-
92
- def enable_test_mode
93
- @test_mode = true
94
- end
81
+ properties[:store] = interpret_store(properties[:store]) if properties.key? :store
95
82
 
96
- def log_inside(path)
97
- @log_dir = path ? Pathname.new(path) : path
83
+ @queues << Queue.new(**{name: name, task_class: task_class, store: @default_store}.merge(properties))
98
84
  end
99
85
 
100
- def log_at_level(lvl)
101
- @log_level = lvl
86
+ # Sets details of logging behaviour
87
+ #
88
+ # @param directory [Pathname,String] the directory to save logs within.
89
+ # @param level [Logger::UNKNOWN,Logger::FATAL,Logger::ERROR,Logger::WARN,Logger::INFO,Logger::DEBUG,Integer,Boolean] the Ruby Logger level to use. If falsey, no logging is performed.
90
+ # @param shift_age [Integer] number of old log files to keep (see Ruby Logger for details)
91
+ # @param shift_size [Integer] filesize before log is rotated to a fresh file (see Ruby Logger for details)
92
+ def log_with(directory: @log_dir, level: @log_level, shift_age: @log_shift_age, shift_size: @log_shift_size)
93
+ @log_dir = directory ? Pathname.new(directory) : directory
94
+ @log_level = level
95
+ @log_shift_age = shift_age
96
+ @log_shift_size = shift_size
102
97
  end
103
98
  end
104
99
 
105
100
  include DSL
106
101
 
107
- def setup(test_mode = false)
108
- yield(self)
109
-
110
- enable_test_mode if test_mode
111
-
112
- load_with(Loader::CSVLoader.new) unless @loader
113
-
114
- raise 'setup block must call #define_queue on the environment' if @queues.empty?
115
-
116
- if @context && @queues.none? { |queue| queue.task_class.method_defined?(:context=) }
117
- raise <<~ERROR
118
- setup block called #provide_context, but no queue task classes import :context.
119
-
120
- Add this to your Task classes that expect to receive the context:
121
-
122
- include Procrastinator::Task
123
-
124
- task_attr :context
125
- ERROR
126
- end
127
-
128
- self
102
+ def queue(name: nil)
103
+ queue = if name
104
+ @queues.find do |q|
105
+ q.name == name
106
+ end
107
+ else
108
+ if name.nil? && @queues.length > 1
109
+ raise ArgumentError,
110
+ "queue must be specified when more than one is defined. #{ known_queues }"
111
+ end
112
+
113
+ @queues.first
114
+ end
115
+
116
+ raise ArgumentError, "there is no :#{ name } queue registered. #{ known_queues }" unless queue
117
+
118
+ queue
129
119
  end
130
120
 
131
- def queues_string
132
- # it drops the colon if you call #to_s on a symbol, so we need to add it back
133
- @queues.map { |queue| ":#{ queue.name }" }.join(', ')
134
- end
121
+ private
135
122
 
136
- def single_queue?
137
- @queues.size == 1
123
+ def known_queues
124
+ "Known queues are: #{ @queues.map { |queue| ":#{ queue.name }" }.join(', ') }"
138
125
  end
139
126
 
140
- def run_process_block
141
- @subprocess_block&.call
142
- end
127
+ def interpret_store(store)
128
+ raise(ArgumentError, 'task store cannot be nil') if store.nil?
143
129
 
144
- def queue(name: nil)
145
- if name
146
- @queues.find do |q|
147
- q.name == name
130
+ case store
131
+ when Hash
132
+ store_strategy = :csv
133
+ unless store.key? store_strategy
134
+ raise ArgumentError, "Must pass keyword :#{ store_strategy } if specifying a location for CSV file"
148
135
  end
136
+
137
+ TaskStore::SimpleCommaStore.new(store[store_strategy])
138
+ when String, Pathname
139
+ TaskStore::SimpleCommaStore.new(store)
149
140
  else
150
- @queues.first
141
+ store
151
142
  end
152
143
  end
153
144
 
154
- private
155
-
156
- def verify_task_class(task_class)
157
- unless task_class.method_defined? :run
158
- raise MalformedTaskError, "task #{ task_class } does not support #run method"
159
- end
160
-
161
- # We're checking the interface compliance on init because it's one of those extremely rare cases where
162
- # you'd want to know early because the sub-processes would crash async, which is harder to debug.
163
- # It's a bit belt-and suspenders, but UX is important for devs, too. - robinetmiller
164
- if task_class.method_defined?(:run) && task_class.instance_method(:run).arity.positive?
165
- err = "task #{ task_class } cannot require parameters to its #run method"
166
-
167
- raise MalformedTaskError, err
168
- end
169
-
170
- expected_arity = 1
171
-
172
- [:success, :fail, :final_fail].each do |method_name|
173
- next unless task_class.method_defined?(method_name)
174
- next if task_class.instance_method(method_name).arity == expected_arity
175
-
176
- err = "task #{ task_class } must accept #{ expected_arity } parameter to its ##{ method_name } method"
177
-
178
- raise MalformedTaskError, err
179
- end
145
+ class SetupError < RuntimeError
146
+ ERR_NO_QUEUE = 'setup block must call #define_queue on the environment'
180
147
  end
181
148
  end
182
-
183
- class MalformedTaskLoaderError < StandardError
184
- end
185
149
  end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'ostruct'
5
+ require 'timeout'
6
+ # require 'forwardable'
7
+ require 'delegate'
8
+ require_relative 'task'
9
+
10
+ module Procrastinator
11
+ # Task wrapper that adds logging to each step.
12
+ #
13
+ # @author Robin Miller
14
+ #
15
+ # @see Task
16
+ class LoggedTask < DelegateClass(Task)
17
+ # extend Forwardable
18
+ #
19
+ # def_delegators :@task, :id, :to_h
20
+
21
+ attr_reader :logger
22
+
23
+ alias task __getobj__
24
+
25
+ def initialize(task, logger: Logger.new(StringIO.new))
26
+ super task
27
+ @logger = logger || raise(ArgumentError, 'Logger cannot be nil')
28
+ end
29
+
30
+ def run
31
+ task.run
32
+
33
+ begin
34
+ @logger.info("Task completed: #{ task }")
35
+ rescue StandardError => e
36
+ warn "Task logging error: #{ e.message }"
37
+ end
38
+ end
39
+
40
+ def fail(error)
41
+ hook = task.fail(error)
42
+ begin
43
+ @logger.error("Task #{ hook }ed: #{ task }")
44
+ rescue StandardError => e
45
+ warn "Task logging error: #{ e.message }"
46
+ end
47
+ hook
48
+ end
49
+ end
50
+ end
@@ -14,37 +14,193 @@ module Procrastinator
14
14
  # @!attribute [r] :max_attempts
15
15
  # @return [Object] Maximum number of attempts for tasks in this queue.
16
16
  # @!attribute [r] :update_period
17
- # @return [Pathname] Delay (seconds) between reloads of tasks from the task loader.
18
- # @!attribute [r] :max_tasks
19
- # @return [Pathname] The maximum number of tasks to run concurrently within a queue worker process.
17
+ # @return [Pathname] Delay (seconds) between reloads of tasks from the task store.
20
18
  class Queue
19
+ extend Forwardable
20
+
21
21
  DEFAULT_TIMEOUT = 3600 # in seconds; one hour total
22
22
  DEFAULT_MAX_ATTEMPTS = 20
23
23
  DEFAULT_UPDATE_PERIOD = 10 # seconds
24
- DEFAULT_MAX_TASKS = 10
25
24
 
26
- attr_reader :name, :task_class, :max_attempts, :timeout, :update_period, :max_tasks
25
+ attr_reader :name, :max_attempts, :timeout, :update_period, :task_store, :task_class
26
+
27
+ alias store task_store
28
+ alias storage task_store
29
+
30
+ def_delegators :@task_store, :read, :update, :delete
27
31
 
28
32
  # Timeout is in seconds
29
- def initialize(name:,
30
- task_class:,
33
+ def initialize(name:, task_class:,
31
34
  max_attempts: DEFAULT_MAX_ATTEMPTS,
32
35
  timeout: DEFAULT_TIMEOUT,
33
36
  update_period: DEFAULT_UPDATE_PERIOD,
34
- max_tasks: DEFAULT_MAX_TASKS)
35
- raise ArgumentError, ':name may not be nil' unless name
36
- raise ArgumentError, ':task_class may not be nil' unless task_class
37
+ store: TaskStore::SimpleCommaStore.new)
38
+ raise ArgumentError, ':name cannot be nil' unless name
37
39
 
40
+ raise ArgumentError, ':task_class cannot be nil' unless task_class
38
41
  raise ArgumentError, 'Task class must be initializable' unless task_class.respond_to? :new
39
42
 
40
- raise ArgumentError, 'timeout cannot be negative' if timeout&.negative?
43
+ raise ArgumentError, ':timeout cannot be negative' if timeout&.negative?
41
44
 
42
45
  @name = name.to_s.strip.gsub(/[^A-Za-z0-9]+/, '_').to_sym
43
46
  @task_class = task_class
47
+ @task_store = store
44
48
  @max_attempts = max_attempts
45
49
  @timeout = timeout
46
50
  @update_period = update_period
47
- @max_tasks = max_tasks
51
+
52
+ validate!
53
+
54
+ freeze
55
+ end
56
+
57
+ def next_task(logger: Logger.new(StringIO.new), container: nil, scheduler: nil)
58
+ metadata = next_metas.find(&:runnable?)
59
+
60
+ return nil unless metadata
61
+
62
+ task = Task.new(metadata, task_handler(data: metadata.data,
63
+ container: container,
64
+ logger: logger,
65
+ scheduler: scheduler))
66
+
67
+ LoggedTask.new(task, logger: logger)
68
+ end
69
+
70
+ def fetch_task(identifier)
71
+ identifier[:data] = JSON.dump(identifier[:data]) if identifier[:data]
72
+
73
+ tasks = read(**identifier)
74
+
75
+ raise "no task found matching #{ identifier }" if tasks.nil? || tasks.empty?
76
+ raise "too many (#{ tasks.size }) tasks match #{ identifier }. Found: #{ tasks }" if tasks.size > 1
77
+
78
+ TaskMetaData.new(tasks.first.merge(queue: self))
79
+ end
80
+
81
+ def create(run_at:, expire_at:, data:)
82
+ if data.nil? && expects_data?
83
+ raise ArgumentError, "task #{ @task_class } expects to receive :data. Provide :data to #delay."
84
+ end
85
+
86
+ unless data.nil? || expects_data?
87
+ raise MalformedTaskError, <<~ERROR
88
+ found unexpected :data argument. Either do not provide :data when scheduling a task,
89
+ or add this in the #{ @task_class } class definition:
90
+ attr_accessor :data
91
+ ERROR
92
+ end
93
+
94
+ # TODO: shorten to using slice once updated to Ruby 2.5+
95
+ attrs = {queue: self, run_at: run_at, initial_run_at: run_at, expire_at: expire_at, data: JSON.dump(data)}
96
+
97
+ create_data = TaskMetaData.new(**attrs).to_h
98
+ create_data.delete(:id)
99
+ create_data.delete(:attempts)
100
+ create_data.delete(:last_fail_at)
101
+ create_data.delete(:last_error)
102
+ @task_store.create(**create_data)
103
+ end
104
+
105
+ def expects_data?
106
+ @task_class.method_defined?(:data=)
107
+ end
108
+
109
+ private
110
+
111
+ def task_handler(data: nil, container: nil, logger: nil, scheduler: nil)
112
+ handler = @task_class.new
113
+ handler.data = data if handler.respond_to?(:data=)
114
+ handler.container = container
115
+ handler.logger = logger
116
+ handler.scheduler = scheduler
117
+ handler
118
+ end
119
+
120
+ def next_metas
121
+ tasks = read(queue: @name).reject { |t| t[:run_at].nil? }.collect do |t|
122
+ t.to_h.delete_if { |key| !TaskMetaData::EXPECTED_DATA.include?(key) }.merge(queue: self)
123
+ end
124
+
125
+ sort_tasks(tasks.collect { |t| TaskMetaData.new(**t) })
126
+ end
127
+
128
+ def sort_tasks(tasks)
129
+ # TODO: improve this
130
+ # shuffling and re-sorting to avoid worst case O(n^2) when receiving already sorted data
131
+ # on quicksort (which is default ruby sort). It is not unreasonable that the persister could return sorted
132
+ # results
133
+ # Ideally, we'd use a better algo than qsort for this, but this will do for now
134
+ tasks.shuffle.sort_by(&:run_at)
135
+ end
136
+
137
+ # Internal queue validator
138
+ module QueueValidation
139
+ def validate!
140
+ verify_task_class!
141
+ verify_task_store!
142
+ end
143
+
144
+ def verify_task_class!
145
+ verify_run_method!
146
+ verify_accessors!
147
+ verify_hooks!
148
+ end
149
+
150
+ # The interface compliance is checked on init because it's one of those rare cases where you want to know early;
151
+ # otherwise, you wouldn't know until task execution and that could be far in the future.
152
+ # UX is important for devs, too.
153
+ # - R
154
+ def verify_run_method!
155
+ unless @task_class.method_defined? :run
156
+ raise MalformedTaskError, "task #{ @task_class } does not support #run method"
157
+ end
158
+
159
+ return unless @task_class.instance_method(:run).arity.positive?
160
+
161
+ raise MalformedTaskError, "task #{ @task_class } cannot require parameters to its #run method"
162
+ end
163
+
164
+ def verify_accessors!
165
+ [:logger, :container, :scheduler].each do |method_name|
166
+ next if @task_class.method_defined?(method_name) && @task_class.method_defined?("#{ method_name }=")
167
+
168
+ raise MalformedTaskError, <<~ERR
169
+ Task handler is missing a #{ method_name } accessor. Add this to the #{ @task_class } class definition:
170
+ attr_accessor :logger, :container, :scheduler
171
+ ERR
172
+ end
173
+ end
174
+
175
+ def verify_hooks!
176
+ expected_arity = 1
177
+
178
+ [:success, :fail, :final_fail].each do |method_name|
179
+ next unless @task_class.method_defined?(method_name)
180
+ next if @task_class.instance_method(method_name).arity == expected_arity
181
+
182
+ err = "task #{ @task_class } must accept #{ expected_arity } parameter to its ##{ method_name } method"
183
+
184
+ raise MalformedTaskError, err
185
+ end
186
+ end
187
+
188
+ def verify_task_store!
189
+ raise ArgumentError, ':store cannot be nil' if @task_store.nil?
190
+
191
+ [:read, :create, :update, :delete].each do |method|
192
+ unless @task_store.respond_to? method
193
+ raise MalformedTaskStoreError, "task store #{ @task_store.class } must respond to ##{ method }"
194
+ end
195
+ end
196
+ end
48
197
  end
198
+ include QueueValidation
199
+ end
200
+
201
+ class MalformedTaskError < StandardError
202
+ end
203
+
204
+ class MalformedTaskStoreError < RuntimeError
49
205
  end
50
206
  end