async-background 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +48 -7
- data/async-background.gemspec +2 -1
- data/lib/async/background/job.rb +5 -3
- data/lib/async/background/metrics.rb +157 -86
- data/lib/async/background/queue/client.rb +33 -15
- data/lib/async/background/queue/options.rb +70 -0
- data/lib/async/background/queue/schema.rb +160 -0
- data/lib/async/background/queue/sql.rb +205 -0
- data/lib/async/background/queue/store.rb +270 -148
- data/lib/async/background/runner/queue_execution.rb +199 -0
- data/lib/async/background/runner/schedule.rb +127 -0
- data/lib/async/background/runner.rb +99 -231
- data/lib/async/background/version.rb +1 -1
- metadata +27 -2
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Async
|
|
4
|
+
module Background
|
|
5
|
+
class Runner
|
|
6
|
+
# Queue-only lifecycle. Keeping it separate from recurring scheduling makes
|
|
7
|
+
# the different delivery guarantees visible without adding runtime objects.
|
|
8
|
+
module QueueExecution
|
|
9
|
+
private
|
|
10
|
+
|
|
11
|
+
def setup_queue(queue_socket_dir, queue_db_path, queue_mmap)
|
|
12
|
+
@listen_queue = !!queue_socket_dir && !isolated_worker?
|
|
13
|
+
return unless @listen_queue
|
|
14
|
+
|
|
15
|
+
require_relative '../queue/client'
|
|
16
|
+
require_relative '../queue/socket_waker'
|
|
17
|
+
require_relative '../queue/store'
|
|
18
|
+
|
|
19
|
+
@queue_store = Queue::Store.new(
|
|
20
|
+
path: queue_db_path || Queue::Store.default_path,
|
|
21
|
+
options: {mmap: queue_mmap}
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
@queue_waker = Queue::SocketWaker.new(queue_socket_path(queue_socket_dir))
|
|
25
|
+
@queue_waker.open!
|
|
26
|
+
recover_queue_jobs
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def start_queue_listener(task)
|
|
30
|
+
@queue_waker.start_accept_loop(task)
|
|
31
|
+
|
|
32
|
+
task.async do
|
|
33
|
+
logger.info { "Async::Background queue: listening on worker #{worker_index}" }
|
|
34
|
+
|
|
35
|
+
while running?
|
|
36
|
+
@queue_waker.wait(timeout: next_wait_timeout)
|
|
37
|
+
dispatch_available_queue_jobs
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def next_wait_timeout
|
|
43
|
+
next_due = @queue_store.next_pending_run_at
|
|
44
|
+
return QUEUE_POLL_INTERVAL unless next_due
|
|
45
|
+
|
|
46
|
+
remaining = next_due - realtime_now
|
|
47
|
+
return MIN_QUEUE_WAIT if remaining <= 0
|
|
48
|
+
|
|
49
|
+
[remaining, QUEUE_POLL_INTERVAL].min
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def run_queue_job(job_task, job)
|
|
53
|
+
class_name = job[:class_name]
|
|
54
|
+
claim_token = job[:claim_token]
|
|
55
|
+
options = nil
|
|
56
|
+
started_at = nil
|
|
57
|
+
metrics_started = false
|
|
58
|
+
|
|
59
|
+
klass = resolve_job_class(class_name)
|
|
60
|
+
options = parse_job_options(job[:options])
|
|
61
|
+
return unless start_queue_job!(job, class_name, claim_token)
|
|
62
|
+
|
|
63
|
+
metrics.job_started(nil)
|
|
64
|
+
metrics_started = true
|
|
65
|
+
started_at = monotonic_now
|
|
66
|
+
job_task.with_timeout(options.timeout) { klass.perform_now(*job[:args]) }
|
|
67
|
+
|
|
68
|
+
complete_queue_job!(job, class_name, claim_token, started_at)
|
|
69
|
+
rescue ConfigError => error
|
|
70
|
+
record_invalid_queue_job!(job, class_name, claim_token, error)
|
|
71
|
+
rescue ::Async::TimeoutError => error
|
|
72
|
+
handle_queue_failure(
|
|
73
|
+
job,
|
|
74
|
+
options,
|
|
75
|
+
"timed out after #{options&.timeout}s",
|
|
76
|
+
error: error,
|
|
77
|
+
duration: started_at && (monotonic_now - started_at),
|
|
78
|
+
timeout: true,
|
|
79
|
+
backtrace: nil
|
|
80
|
+
)
|
|
81
|
+
rescue StandardError => error
|
|
82
|
+
handle_queue_failure(
|
|
83
|
+
job,
|
|
84
|
+
options,
|
|
85
|
+
"#{error.class} #{error.message}",
|
|
86
|
+
error: error,
|
|
87
|
+
duration: started_at && (monotonic_now - started_at),
|
|
88
|
+
timeout: false,
|
|
89
|
+
backtrace: error.backtrace
|
|
90
|
+
)
|
|
91
|
+
ensure
|
|
92
|
+
metrics.job_stopped(nil) if metrics_started
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def handle_queue_failure(job, options, message, error:, duration:, timeout:, backtrace:)
|
|
96
|
+
class_name = job[:class_name]
|
|
97
|
+
result = @queue_store.retry_or_fail(
|
|
98
|
+
job[:id],
|
|
99
|
+
claim_token: job[:claim_token],
|
|
100
|
+
error_class: error.class,
|
|
101
|
+
error_message: timeout ? message : error.message,
|
|
102
|
+
fallback_options: options,
|
|
103
|
+
duration_ms: duration_ms(duration)
|
|
104
|
+
)
|
|
105
|
+
return log_stale_queue_failure(job, class_name, timeout) unless result
|
|
106
|
+
|
|
107
|
+
timeout ? metrics.job_timed_out(nil) : metrics.job_failed(nil, error)
|
|
108
|
+
result == :retried ? log_queue_retry(job, class_name, message, options) :
|
|
109
|
+
log_queue_failure(class_name, message, backtrace)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def parse_job_options(raw)
|
|
113
|
+
Job::Options.new(**(raw || {}))
|
|
114
|
+
rescue ArgumentError, TypeError => error
|
|
115
|
+
raise ConfigError, "invalid queue options: #{error.message}"
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def isolated_worker?
|
|
119
|
+
ENV.fetch('ISOLATION_FORKS', '').split(',').map(&:to_i).include?(worker_index)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def queue_socket_path(directory)
|
|
123
|
+
File.join(directory, "async_bg_worker_#{worker_index}.sock")
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def recover_queue_jobs
|
|
127
|
+
recovered = @queue_store.recover(worker_index)
|
|
128
|
+
logger.info { "Async::Background queue: recovered #{recovered} stale jobs" } if recovered.positive?
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def dispatch_available_queue_jobs
|
|
132
|
+
while running?
|
|
133
|
+
job = @queue_store.fetch(worker_index)
|
|
134
|
+
break unless job
|
|
135
|
+
|
|
136
|
+
semaphore.async { |job_task| run_queue_job(job_task, job) }
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def start_queue_job!(job, class_name, claim_token)
|
|
141
|
+
return true if @queue_store.mark_started!(job[:id], claim_token: claim_token)
|
|
142
|
+
|
|
143
|
+
logger.warn('Async::Background') {
|
|
144
|
+
"queue(#{class_name}): lost lease before start for job #{job[:id]}, ignored"
|
|
145
|
+
}
|
|
146
|
+
false
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def complete_queue_job!(job, class_name, claim_token, started_at)
|
|
150
|
+
duration = monotonic_now - started_at
|
|
151
|
+
if @queue_store.complete(job[:id], claim_token: claim_token, duration_ms: duration_ms(duration))
|
|
152
|
+
metrics.job_succeeded(nil, duration)
|
|
153
|
+
logger.info('Async::Background') { "queue(#{class_name}): completed in #{duration.round(2)}s" }
|
|
154
|
+
else
|
|
155
|
+
logger.warn('Async::Background') {
|
|
156
|
+
"queue(#{class_name}): complete: stale lease for job #{job[:id]}, ignored"
|
|
157
|
+
}
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def record_invalid_queue_job!(job, class_name, claim_token, error)
|
|
162
|
+
recorded = @queue_store.fail(
|
|
163
|
+
job[:id],
|
|
164
|
+
claim_token: claim_token,
|
|
165
|
+
error_class: error.class,
|
|
166
|
+
error_message: error.message
|
|
167
|
+
)
|
|
168
|
+
metrics.job_failed(nil, error) if recorded
|
|
169
|
+
logger.error('Async::Background') { "queue(#{class_name}): #{error.class} #{error.message}" }
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def log_stale_queue_failure(job, class_name, timeout)
|
|
173
|
+
logger.warn('Async::Background') do
|
|
174
|
+
kind = timeout ? 'timeout' : 'failure'
|
|
175
|
+
"queue(#{class_name}): #{kind} on stale lease for job #{job[:id]}, ignored"
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def log_queue_retry(job, class_name, message, options)
|
|
180
|
+
@queue_waker&.signal
|
|
181
|
+
logger.warn('Async::Background') do
|
|
182
|
+
"queue(#{class_name}): #{message}; retry #{options&.next_attempt}/#{options&.retry}"
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def log_queue_failure(class_name, message, backtrace)
|
|
187
|
+
tail = backtrace ? "\n#{backtrace.join("\n")}" : ''
|
|
188
|
+
logger.error('Async::Background') { "queue(#{class_name}): #{message}#{tail}" }
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def duration_ms(duration)
|
|
192
|
+
return if duration.nil? || duration.negative?
|
|
193
|
+
|
|
194
|
+
(duration * 1000).to_i
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
end
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'yaml'
|
|
4
|
+
require 'zlib'
|
|
5
|
+
|
|
6
|
+
module Async
|
|
7
|
+
module Background
|
|
8
|
+
class Runner
|
|
9
|
+
# Pure schedule parsing and heap construction. It deliberately returns the
|
|
10
|
+
# existing Hash contract from #build_task_config because specs and callers
|
|
11
|
+
# use that shape while the Runner owns execution state.
|
|
12
|
+
module Schedule
|
|
13
|
+
private
|
|
14
|
+
|
|
15
|
+
def build_heap(config_path)
|
|
16
|
+
schedule = load_schedule(config_path)
|
|
17
|
+
build_entries(schedule, monotonic_now)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def build_task_config(name, config)
|
|
21
|
+
class_name = config&.dig('class').to_s.strip
|
|
22
|
+
raise ConfigError, "[#{name}] missing class" if class_name.empty?
|
|
23
|
+
|
|
24
|
+
job_class = resolve_scheduled_job(name, class_name)
|
|
25
|
+
interval = parse_interval(name, config['every'])
|
|
26
|
+
cron = parse_cron(name, config['cron'])
|
|
27
|
+
validate_schedule_frequency!(name, interval, cron)
|
|
28
|
+
|
|
29
|
+
{
|
|
30
|
+
job_class: job_class,
|
|
31
|
+
interval: interval,
|
|
32
|
+
cron: cron,
|
|
33
|
+
timeout: parse_timeout(name, config)
|
|
34
|
+
}
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def resolve_job_class(class_name)
|
|
38
|
+
raise ConfigError, 'empty class name in queue job' if class_name.nil? || class_name.to_s.strip.empty?
|
|
39
|
+
|
|
40
|
+
klass = class_name.split('::').reduce(Object) do |namespace, name|
|
|
41
|
+
raise ConfigError, "unknown class: #{class_name}" unless namespace.const_defined?(name, false)
|
|
42
|
+
|
|
43
|
+
namespace.const_get(name, false)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
return klass if klass.respond_to?(:perform_now)
|
|
47
|
+
|
|
48
|
+
raise ConfigError, "#{class_name} must include Async::Background::Job"
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def load_schedule(path)
|
|
52
|
+
raise ConfigError, "Schedule file not found: #{path}" unless File.exist?(path)
|
|
53
|
+
|
|
54
|
+
YAML.safe_load_file(path).tap do |schedule|
|
|
55
|
+
raise ConfigError, "Empty schedule: #{path}" unless schedule&.any?
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def build_entries(schedule, now)
|
|
60
|
+
schedule.each_with_object(MinHeap.new) do |(name, config), heap|
|
|
61
|
+
next unless assigned_worker(config, name) == worker_index
|
|
62
|
+
|
|
63
|
+
task = build_task_config(name, config)
|
|
64
|
+
heap.push(build_entry(name, task, now))
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def assigned_worker(config, name)
|
|
69
|
+
config['worker']&.to_i || ((Zlib.crc32(name) % total_workers) + 1)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def build_entry(name, task, now)
|
|
73
|
+
Entry.new(
|
|
74
|
+
name: name,
|
|
75
|
+
job_class: task[:job_class],
|
|
76
|
+
interval: task[:interval],
|
|
77
|
+
cron: task[:cron],
|
|
78
|
+
timeout: task[:timeout],
|
|
79
|
+
next_run_at: initial_next_run_at(task, now)
|
|
80
|
+
)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def initial_next_run_at(task, now)
|
|
84
|
+
jitter = rand * [task[:interval] || MAX_JITTER, MAX_JITTER].min
|
|
85
|
+
return now + jitter + task[:interval] if task[:interval]
|
|
86
|
+
|
|
87
|
+
wall_now = Time.now
|
|
88
|
+
wait = task[:cron].next_time(wall_now).to_f - wall_now.to_f
|
|
89
|
+
now + jitter + [wait, MIN_SLEEP_TIME].max
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def resolve_scheduled_job(name, class_name)
|
|
93
|
+
resolve_job_class(class_name)
|
|
94
|
+
rescue ConfigError => error
|
|
95
|
+
raise ConfigError, "[#{name}] #{error.message}"
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def parse_interval(name, value)
|
|
99
|
+
value&.then do |interval|
|
|
100
|
+
interval = interval.to_i
|
|
101
|
+
raise ConfigError, "[#{name}] 'every' must be > 0" unless interval.positive?
|
|
102
|
+
|
|
103
|
+
interval
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def parse_cron(name, value)
|
|
108
|
+
value&.then do |expression|
|
|
109
|
+
Fugit::Cron.new(expression) || raise(ConfigError, "[#{name}] invalid cron: #{expression}")
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def parse_timeout(name, config)
|
|
114
|
+
Job::Options.new(timeout: config.fetch('timeout', DEFAULT_TIMEOUT)).timeout
|
|
115
|
+
rescue ArgumentError, TypeError => error
|
|
116
|
+
raise ConfigError, "[#{name}] #{error.message}"
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def validate_schedule_frequency!(name, interval, cron)
|
|
120
|
+
return if interval || cron
|
|
121
|
+
|
|
122
|
+
raise ConfigError, "[#{name}] specify 'every' or 'cron'"
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|