procrastinator 0.6.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -1,6 +1,8 @@
1
- require "bundler/gem_tasks"
2
- require "rspec/core/rake_task"
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
3
5
 
4
6
  RSpec::Core::RakeTask.new(:spec)
5
7
 
6
- task :default => :spec
8
+ task default: :spec
@@ -1,27 +1,33 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'procrastinator/version'
2
- require 'procrastinator/queue_worker'
4
+ require 'procrastinator/task_meta_data'
3
5
  require 'procrastinator/task_worker'
4
- require 'procrastinator/environment'
5
- require 'logger'
6
-
6
+ require 'procrastinator/queue'
7
+ require 'procrastinator/queue_worker'
8
+ require 'procrastinator/config'
9
+ require 'procrastinator/queue_manager'
10
+ require 'procrastinator/task'
11
+ require 'procrastinator/scheduler'
12
+ require 'procrastinator/loaders/csv_loader'
7
13
 
14
+ require 'logger'
15
+ require 'pathname'
16
+
17
+ # Top-level module for the Procrastinator Gem.
18
+ #
19
+ # Call Procrastinator.setup with a block to initialize and run independent worker sub processes to complete tasks
20
+ # asynchronously from your main application.
21
+ #
22
+ # Read the README for details.
23
+ #
24
+ # @author Robin Miller
25
+ #
26
+ # @see https://github.com/TenjinInc/procrastinator
8
27
  module Procrastinator
28
+ # rubocop:disable Style/ClassVars
9
29
  @@test_mode = false
10
30
 
11
- def self.setup(&block)
12
- raise ArgumentError.new('Procrastinator.setup must be given a block') if block.nil?
13
-
14
- env = Environment.new(test_mode: @@test_mode)
15
-
16
- yield(env)
17
-
18
- raise RuntimeError.new('setup block must call #persister_factory on the environment') if env.persister.nil?
19
- raise RuntimeError.new('setup block must call #define_queue on the environment') if env.queue_definitions.empty?
20
- env.spawn_workers
21
-
22
- env
23
- end
24
-
25
31
  def self.test_mode=(value)
26
32
  @@test_mode = value
27
33
  end
@@ -29,4 +35,19 @@ module Procrastinator
29
35
  def self.test_mode
30
36
  @@test_mode
31
37
  end
38
+
39
+ # rubocop:enable Style/ClassVars
40
+
41
+ # Creates a configuration object and passes it into the given block.
42
+ #
43
+ # @yield the created configuration object
44
+ def self.setup(&block)
45
+ raise ArgumentError, 'Procrastinator.setup must be given a block' unless block_given?
46
+
47
+ config = Config.new
48
+
49
+ config.setup(@@test_mode, &block)
50
+
51
+ QueueManager.new(config).spawn_workers
52
+ end
32
53
  end
@@ -0,0 +1,185 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Procrastinator
4
+ # Configuration object (State Pattern) used to coordinate settings across
5
+ # various components within Procrastinator.
6
+ #
7
+ # All of its state is read-only, set using the methods in the DSL module below.
8
+ #
9
+ # @author Robin Miller
10
+ #
11
+ # @!attribute [r] :test_mode?
12
+ # @return [Boolean] Whether test mode is enabled
13
+ # @!attribute [r] :queues
14
+ # @return [Array] List of defined queues
15
+ # @!attribute [r] :context
16
+ # @return [Object] Provided context object that will be forwarded to tasks
17
+ # @!attribute [r] :loader
18
+ # @return [Object] Provided persistence strategy object to use for task I/O
19
+ # @!attribute [r] :log_dir
20
+ # @return [Pathname] Directory to write log files in
21
+ # @!attribute [r] :log_level
22
+ # @return [Integer] Logging level to use
23
+ # @!attribute [r] :prefix
24
+ # @return [String] The prefix to prepend to process names
25
+ # @!attribute [r] :pid_dir
26
+ # @return [Pathname] Directory to write process ID records in
27
+ class Config
28
+ attr_reader :queues, :log_dir, :log_level, :prefix, :test_mode, :context, :loader, :pid_dir
29
+ alias test_mode? test_mode
30
+
31
+ DEFAULT_LOG_DIRECTORY = 'log/'
32
+ DEFAULT_PID_DIRECTORY = 'pid/'
33
+
34
+ def initialize
35
+ @test_mode = false
36
+ @queues = []
37
+ @loader = nil
38
+ @context = nil
39
+ @subprocess_block = nil
40
+ @log_dir = Pathname.new(DEFAULT_LOG_DIRECTORY)
41
+ @log_level = Logger::INFO
42
+ @pid_dir = Pathname.new(DEFAULT_PID_DIRECTORY)
43
+ end
44
+
45
+ # Collection of all of the methods intended for use within Procrastinator.setup
46
+ #
47
+ # @see Procrastinator
48
+ module DSL
49
+ # Assigns a task loader
50
+ # It should be called in an each_process block as well so that they get
51
+ # distinct resources (eg. DB connections) from the parent process.
52
+ def load_with(loader)
53
+ if loader.is_a? Hash
54
+ unless loader.key? :location
55
+ raise ArgumentError, 'Must pass keyword :location if specifying a location for CSV file'
56
+ end
57
+
58
+ loader = Loader::CSVLoader.new(loader[:location])
59
+ end
60
+
61
+ raise MalformedTaskLoaderError, 'task loader cannot be nil' if loader.nil?
62
+
63
+ [:read, :create, :update, :delete].each do |method|
64
+ unless loader.respond_to? method
65
+ raise MalformedTaskLoaderError, "task loader #{ loader.class } must respond to ##{ method }"
66
+ end
67
+ end
68
+
69
+ @loader = loader
70
+ end
71
+
72
+ def provide_context(context)
73
+ @context = context
74
+ end
75
+
76
+ # Accepts a block that will be executed on the queue sub-processes. Use it to control resource allocations.
77
+ def each_process(prefix: nil, pid_dir: DEFAULT_PID_DIRECTORY, &block)
78
+ @prefix = prefix
79
+ @subprocess_block = block
80
+ @pid_dir = Pathname.new(pid_dir)
81
+ end
82
+
83
+ def define_queue(name, task_class, properties = {})
84
+ raise ArgumentError, 'queue name cannot be nil' if name.nil?
85
+ raise ArgumentError, 'queue task class cannot be nil' if task_class.nil?
86
+
87
+ verify_task_class(task_class)
88
+
89
+ @queues << Queue.new(properties.merge(name: name, task_class: task_class))
90
+ end
91
+
92
+ def enable_test_mode
93
+ @test_mode = true
94
+ end
95
+
96
+ def log_inside(path)
97
+ @log_dir = path ? Pathname.new(path) : path
98
+ end
99
+
100
+ def log_at_level(lvl)
101
+ @log_level = lvl
102
+ end
103
+ end
104
+
105
+ include DSL
106
+
107
+ def setup(test_mode = false)
108
+ yield(self)
109
+
110
+ enable_test_mode if test_mode
111
+
112
+ load_with(Loader::CSVLoader.new) unless @loader
113
+
114
+ raise 'setup block must call #define_queue on the environment' if @queues.empty?
115
+
116
+ if @context && @queues.none? { |queue| queue.task_class.method_defined?(:context=) }
117
+ raise <<~ERROR
118
+ setup block called #provide_context, but no queue task classes import :context.
119
+
120
+ Add this to your Task classes that expect to receive the context:
121
+
122
+ include Procrastinator::Task
123
+
124
+ task_attr :context
125
+ ERROR
126
+ end
127
+
128
+ self
129
+ end
130
+
131
+ def queues_string
132
+ # it drops the colon if you call #to_s on a symbol, so we need to add it back
133
+ @queues.map { |queue| ":#{ queue.name }" }.join(', ')
134
+ end
135
+
136
+ def single_queue?
137
+ @queues.size == 1
138
+ end
139
+
140
+ def run_process_block
141
+ @subprocess_block&.call
142
+ end
143
+
144
+ def queue(name: nil)
145
+ if name
146
+ @queues.find do |q|
147
+ q.name == name
148
+ end
149
+ else
150
+ @queues.first
151
+ end
152
+ end
153
+
154
+ private
155
+
156
+ def verify_task_class(task_class)
157
+ unless task_class.method_defined? :run
158
+ raise MalformedTaskError, "task #{ task_class } does not support #run method"
159
+ end
160
+
161
+ # We're checking the interface compliance on init because it's one of those extremely rare cases where
162
+ # you'd want to know early because the sub-processes would crash async, which is harder to debug.
163
+ # It's a bit belt-and suspenders, but UX is important for devs, too. - robinetmiller
164
+ if task_class.method_defined?(:run) && task_class.instance_method(:run).arity.positive?
165
+ err = "task #{ task_class } cannot require parameters to its #run method"
166
+
167
+ raise MalformedTaskError, err
168
+ end
169
+
170
+ expected_arity = 1
171
+
172
+ [:success, :fail, :final_fail].each do |method_name|
173
+ next unless task_class.method_defined?(method_name)
174
+ next if task_class.instance_method(method_name).arity == expected_arity
175
+
176
+ err = "task #{ task_class } must accept #{ expected_arity } parameter to its ##{ method_name } method"
177
+
178
+ raise MalformedTaskError, err
179
+ end
180
+ end
181
+ end
182
+
183
+ class MalformedTaskLoaderError < StandardError
184
+ end
185
+ end
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+ require 'pathname'
5
+
6
+ module Procrastinator
7
+ module Loader
8
+ # Simple Task I/O object that writes task information (ie. TaskMetaData attributes) to a CSV file.
9
+ #
10
+ # @author Robin Miller
11
+ class CSVLoader
12
+ # ordered
13
+ HEADERS = [:id, :queue, :run_at, :initial_run_at, :expire_at,
14
+ :attempts, :last_fail_at, :last_error, :data].freeze
15
+
16
+ DEFAULT_FILE = 'procrastinator-tasks.csv'
17
+
18
+ def initialize(file_path = DEFAULT_FILE)
19
+ @path = Pathname.new(file_path)
20
+
21
+ if @path.directory? || @path.to_s.end_with?('/')
22
+ @path += DEFAULT_FILE
23
+ elsif @path.extname.empty?
24
+ @path = Pathname.new("#{ file_path }.csv")
25
+ end
26
+ end
27
+
28
+ def read
29
+ data = CSV.table(@path.to_s, force_quotes: false).to_a
30
+
31
+ headers = data.shift
32
+
33
+ data.collect do |d|
34
+ hash = Hash[headers.zip(d)]
35
+
36
+ hash[:data] = hash[:data].gsub('""', '"')
37
+
38
+ hash
39
+ end
40
+ end
41
+
42
+ def create(queue:, run_at:, initial_run_at:, expire_at:, data: '')
43
+ existing_data = begin
44
+ read
45
+ rescue Errno::ENOENT
46
+ []
47
+ end
48
+
49
+ max_id = existing_data.collect { |task| task[:id] }.max || 0
50
+
51
+ new_data = {
52
+ id: max_id + 1,
53
+ queue: queue,
54
+ run_at: run_at,
55
+ initial_run_at: initial_run_at,
56
+ expire_at: expire_at,
57
+ attempts: 0,
58
+ data: data
59
+ }
60
+
61
+ write(existing_data + [new_data])
62
+ end
63
+
64
+ def update(id, data)
65
+ existing_data = begin
66
+ read
67
+ rescue Errno::ENOENT
68
+ []
69
+ end
70
+
71
+ task_data = existing_data.find do |task|
72
+ task[:id] == id
73
+ end
74
+
75
+ task_data.merge!(data)
76
+
77
+ write(existing_data)
78
+ end
79
+
80
+ def delete(id)
81
+ existing_data = begin
82
+ read
83
+ rescue Errno::ENOENT
84
+ []
85
+ end
86
+
87
+ existing_data.delete_if do |task|
88
+ task[:id] == id
89
+ end
90
+
91
+ write(existing_data)
92
+ end
93
+
94
+ def write(data)
95
+ lines = data.collect do |d|
96
+ CSV.generate_line(d, headers: HEADERS, force_quotes: true)
97
+ end
98
+
99
+ @path.dirname.mkpath
100
+ @path.open('w') do |f|
101
+ f.puts HEADERS.join(',')
102
+ f.puts lines.join
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Procrastinator
4
+ # A Queue defines how a certain type task will be processed.
5
+ #
6
+ # @author Robin Miller
7
+ #
8
+ # @!attribute [r] :name
9
+ # @return [Symbol] The queue's identifier symbol
10
+ # @!attribute [r] :task_class
11
+ # @return [Class] Class that defines the work to be done for jobs in this queue.
12
+ # @!attribute [r] :timeout
13
+ # @return [Object] Duration (seconds) after which tasks in this queue should fail for taking too long.
14
+ # @!attribute [r] :max_attempts
15
+ # @return [Object] Maximum number of attempts for tasks in this queue.
16
+ # @!attribute [r] :update_period
17
+ # @return [Pathname] Delay (seconds) between reloads of tasks from the task loader.
18
+ # @!attribute [r] :max_tasks
19
+ # @return [Pathname] The maximum number of tasks to run concurrently within a queue worker process.
20
+ class Queue
21
+ DEFAULT_TIMEOUT = 3600 # in seconds; one hour total
22
+ DEFAULT_MAX_ATTEMPTS = 20
23
+ DEFAULT_UPDATE_PERIOD = 10 # seconds
24
+ DEFAULT_MAX_TASKS = 10
25
+
26
+ attr_reader :name, :task_class, :max_attempts, :timeout, :update_period, :max_tasks
27
+
28
+ # Timeout is in seconds
29
+ def initialize(name:,
30
+ task_class:,
31
+ max_attempts: DEFAULT_MAX_ATTEMPTS,
32
+ timeout: DEFAULT_TIMEOUT,
33
+ update_period: DEFAULT_UPDATE_PERIOD,
34
+ max_tasks: DEFAULT_MAX_TASKS)
35
+ raise ArgumentError, ':name may not be nil' unless name
36
+ raise ArgumentError, ':task_class may not be nil' unless task_class
37
+
38
+ raise ArgumentError, 'Task class must be initializable' unless task_class.respond_to? :new
39
+
40
+ raise ArgumentError, 'timeout cannot be negative' if timeout&.negative?
41
+
42
+ @name = name.to_s.strip.gsub(/[^A-Za-z0-9]+/, '_').to_sym
43
+ @task_class = task_class
44
+ @max_attempts = max_attempts
45
+ @timeout = timeout
46
+ @update_period = update_period
47
+ @max_tasks = max_tasks
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,201 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Procrastinator
4
+ # Spawns and manages work queue subprocesses.
5
+ #
6
+ # This is where all of the multi-process logic should be kept to.
7
+ #
8
+ # @author Robin Miller
9
+ #
10
+ # @!attribute [r] :workers
11
+ # @return [Hash] Maps the constructed QueueWorkers to their process ID.
12
+ class QueueManager
13
+ attr_reader :workers
14
+
15
+ def initialize(config)
16
+ @workers = {}
17
+ @config = config
18
+ @logger = start_log
19
+ end
20
+
21
+ # Shuts down any remaining old queue workers and spawns a new one for each queue defined in the config
22
+ #
23
+ # @return [Scheduler] a scheduler object that can be used to interact with the queues
24
+ def spawn_workers
25
+ scheduler = Scheduler.new(@config, self)
26
+
27
+ kill_old_workers
28
+
29
+ if ENV['PROCRASTINATOR_STOP']
30
+ @logger.warn('Cannot spawn queue workers because environment variable PROCRASTINATOR_STOP is set')
31
+ else
32
+ @config.queues.each do |queue|
33
+ spawn_worker(queue, scheduler: scheduler)
34
+ end
35
+ end
36
+
37
+ scheduler
38
+ end
39
+
40
+ # Produces a new QueueWorker for the given queue.
41
+ #
42
+ # If Test Mode is disabled in the config, then it will also fork a new independent process for that worker
43
+ # to work in.
44
+ #
45
+ # @param queue [Queue] the queue to build a worker for
46
+ # @param scheduler [Scheduler] an optional scheduler instance to pass to the worker
47
+ def spawn_worker(queue, scheduler: nil)
48
+ worker = QueueWorker.new(queue: queue,
49
+ config: @config,
50
+ scheduler: scheduler)
51
+ if @config.test_mode?
52
+ @workers[worker] = Process.pid
53
+ else
54
+ check_for_name(worker.long_name)
55
+
56
+ pid = fork
57
+
58
+ if pid
59
+ # === PARENT PROCESS ===
60
+ Process.detach(pid)
61
+ @workers[worker] = pid
62
+ else
63
+ deamonize(worker.long_name)
64
+
65
+ worker.work
66
+ shutdown_worker
67
+ end
68
+ end
69
+ end
70
+
71
+ def act(*queue_names)
72
+ unless @config.test_mode?
73
+ raise <<~ERR
74
+ Procrastinator.act called outside Test Mode.
75
+ Either use Procrastinator.spawn_workers or call #enable_test_mode in Procrastinator.setup.
76
+ ERR
77
+ end
78
+
79
+ workers = @workers.keys
80
+
81
+ if queue_names.empty?
82
+ workers.each(&:act)
83
+ else
84
+ queue_names.each do |name|
85
+ workers.find { |worker| worker.name == name }.act
86
+ end
87
+ end
88
+ end
89
+
90
+ private
91
+
92
+ def start_log
93
+ directory = @config.log_dir
94
+
95
+ return unless directory
96
+
97
+ log_path = directory + 'queue-manager.log'
98
+
99
+ directory.mkpath
100
+ File.open(log_path.to_path, 'a+') { |f| f.write '' }
101
+
102
+ logger = Logger.new(log_path.to_path)
103
+
104
+ logger.level = @config.log_level
105
+
106
+ # @logger.info(['',
107
+ # '===================================',
108
+ # "Started worker process, #{long_name}, to work off queue #{@queue.name}.",
109
+ # "Worker pid=#{Process.pid}; parent pid=#{Process.ppid}.",
110
+ # '==================================='].join("\n"))
111
+
112
+ logger
113
+ end
114
+
115
+ # Methods exclusive to the child process
116
+ module ChildMethods
117
+ def deamonize(name)
118
+ Process.daemon(true)
119
+ Process.setsid
120
+ srand
121
+ Process.setproctitle(name)
122
+ close_io
123
+
124
+ write_pid_file(Process.pid, name)
125
+
126
+ @config.run_process_block
127
+ end
128
+
129
+ # Make sure all input/output streams are closed
130
+ def close_io
131
+ stds = [$stdin, $stdout, $stderr]
132
+
133
+ # Part 1: close all IO objects (except for $stdin/$stdout/$stderr)
134
+ ObjectSpace.each_object(IO) do |io|
135
+ next if stds.include?(io)
136
+
137
+ begin
138
+ io.close
139
+ rescue IOError
140
+ next
141
+ end
142
+ end
143
+
144
+ # Part 2: redirect STD connections
145
+ stds.each do |io|
146
+ io.reopen '/dev/null'
147
+ end
148
+
149
+ # TODO: redirect OUT or ERR to logger?
150
+ end
151
+
152
+ # Wrapping #exit to allow for tests to easily stub out this behaviour.
153
+ # If #exit isn't prevented, the test framework will break,
154
+ # but #exit can't be directly stubbed either (because it's a required Kernel method)
155
+ def shutdown_worker
156
+ exit
157
+ end
158
+ end
159
+
160
+ # Methods exclusive to the main/parent process
161
+ module ParentMethods
162
+ def kill_old_workers
163
+ @config.pid_dir.mkpath
164
+
165
+ @config.pid_dir.each_child do |file|
166
+ pid = file.read.to_i
167
+
168
+ begin
169
+ Process.kill('KILL', pid)
170
+ @logger.info("Killing old worker process pid: #{ pid }")
171
+ rescue Errno::ESRCH
172
+ @logger.info("Expected old worker process pid=#{ pid }, but none was found")
173
+ end
174
+
175
+ file.delete
176
+ end
177
+ end
178
+
179
+ def write_pid_file(pid, filename)
180
+ @config.pid_dir.mkpath
181
+
182
+ pid_file = @config.pid_dir + "#{ filename }.pid"
183
+
184
+ File.open(pid_file.to_path, 'w') do |f|
185
+ f.print(pid)
186
+ end
187
+ end
188
+
189
+ def check_for_name(name)
190
+ # better to use backticks so we can get the info and not spam user's stdout
191
+ warn <<~WARNING unless `pgrep -f #{ name }`.empty?
192
+ Warning: there is another process named "#{ name }". Use #each_process(prefix: '') in
193
+ Procrastinator setup if you want to help yourself distinguish them.
194
+ WARNING
195
+ end
196
+ end
197
+
198
+ include ChildMethods
199
+ include ParentMethods
200
+ end
201
+ end