async-background 0.7.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Async
4
+ module Background
5
+ class Runner
6
+ # Queue-only lifecycle. Keeping it separate from recurring scheduling makes
7
+ # the different delivery guarantees visible without adding runtime objects.
8
+ module QueueExecution
9
+ private
10
+
11
+ def setup_queue(queue_socket_dir, queue_db_path, queue_mmap)
12
+ @listen_queue = !!queue_socket_dir && !isolated_worker?
13
+ return unless @listen_queue
14
+
15
+ require_relative '../queue/client'
16
+ require_relative '../queue/socket_waker'
17
+ require_relative '../queue/store'
18
+
19
+ @queue_store = Queue::Store.new(
20
+ path: queue_db_path || Queue::Store.default_path,
21
+ options: {mmap: queue_mmap}
22
+ )
23
+
24
+ @queue_waker = Queue::SocketWaker.new(queue_socket_path(queue_socket_dir))
25
+ @queue_waker.open!
26
+ recover_queue_jobs
27
+ end
28
+
29
+ def start_queue_listener(task)
30
+ @queue_waker.start_accept_loop(task)
31
+
32
+ task.async do
33
+ logger.info { "Async::Background queue: listening on worker #{worker_index}" }
34
+
35
+ while running?
36
+ @queue_waker.wait(timeout: next_wait_timeout)
37
+ dispatch_available_queue_jobs
38
+ end
39
+ end
40
+ end
41
+
42
+ def next_wait_timeout
43
+ next_due = @queue_store.next_pending_run_at
44
+ return QUEUE_POLL_INTERVAL unless next_due
45
+
46
+ remaining = next_due - realtime_now
47
+ return MIN_QUEUE_WAIT if remaining <= 0
48
+
49
+ [remaining, QUEUE_POLL_INTERVAL].min
50
+ end
51
+
52
+ def run_queue_job(job_task, job)
53
+ class_name = job[:class_name]
54
+ claim_token = job[:claim_token]
55
+ options = nil
56
+ started_at = nil
57
+ metrics_started = false
58
+
59
+ klass = resolve_job_class(class_name)
60
+ options = parse_job_options(job[:options])
61
+ return unless start_queue_job!(job, class_name, claim_token)
62
+
63
+ metrics.job_started(nil)
64
+ metrics_started = true
65
+ started_at = monotonic_now
66
+ job_task.with_timeout(options.timeout) { klass.perform_now(*job[:args]) }
67
+
68
+ complete_queue_job!(job, class_name, claim_token, started_at)
69
+ rescue ConfigError => error
70
+ record_invalid_queue_job!(job, class_name, claim_token, error)
71
+ rescue ::Async::TimeoutError => error
72
+ handle_queue_failure(
73
+ job,
74
+ options,
75
+ "timed out after #{options&.timeout}s",
76
+ error: error,
77
+ duration: started_at && (monotonic_now - started_at),
78
+ timeout: true,
79
+ backtrace: nil
80
+ )
81
+ rescue StandardError => error
82
+ handle_queue_failure(
83
+ job,
84
+ options,
85
+ "#{error.class} #{error.message}",
86
+ error: error,
87
+ duration: started_at && (monotonic_now - started_at),
88
+ timeout: false,
89
+ backtrace: error.backtrace
90
+ )
91
+ ensure
92
+ metrics.job_stopped(nil) if metrics_started
93
+ end
94
+
95
+ def handle_queue_failure(job, options, message, error:, duration:, timeout:, backtrace:)
96
+ class_name = job[:class_name]
97
+ result = @queue_store.retry_or_fail(
98
+ job[:id],
99
+ claim_token: job[:claim_token],
100
+ error_class: error.class,
101
+ error_message: timeout ? message : error.message,
102
+ fallback_options: options,
103
+ duration_ms: duration_ms(duration)
104
+ )
105
+ return log_stale_queue_failure(job, class_name, timeout) unless result
106
+
107
+ timeout ? metrics.job_timed_out(nil) : metrics.job_failed(nil, error)
108
+ result == :retried ? log_queue_retry(job, class_name, message, options) :
109
+ log_queue_failure(class_name, message, backtrace)
110
+ end
111
+
112
+ def parse_job_options(raw)
113
+ Job::Options.new(**(raw || {}))
114
+ rescue ArgumentError, TypeError => error
115
+ raise ConfigError, "invalid queue options: #{error.message}"
116
+ end
117
+
118
+ def isolated_worker?
119
+ ENV.fetch('ISOLATION_FORKS', '').split(',').map(&:to_i).include?(worker_index)
120
+ end
121
+
122
+ def queue_socket_path(directory)
123
+ File.join(directory, "async_bg_worker_#{worker_index}.sock")
124
+ end
125
+
126
+ def recover_queue_jobs
127
+ recovered = @queue_store.recover(worker_index)
128
+ logger.info { "Async::Background queue: recovered #{recovered} stale jobs" } if recovered.positive?
129
+ end
130
+
131
+ def dispatch_available_queue_jobs
132
+ while running?
133
+ job = @queue_store.fetch(worker_index)
134
+ break unless job
135
+
136
+ semaphore.async { |job_task| run_queue_job(job_task, job) }
137
+ end
138
+ end
139
+
140
+ def start_queue_job!(job, class_name, claim_token)
141
+ return true if @queue_store.mark_started!(job[:id], claim_token: claim_token)
142
+
143
+ logger.warn('Async::Background') {
144
+ "queue(#{class_name}): lost lease before start for job #{job[:id]}, ignored"
145
+ }
146
+ false
147
+ end
148
+
149
+ def complete_queue_job!(job, class_name, claim_token, started_at)
150
+ duration = monotonic_now - started_at
151
+ if @queue_store.complete(job[:id], claim_token: claim_token, duration_ms: duration_ms(duration))
152
+ metrics.job_succeeded(nil, duration)
153
+ logger.info('Async::Background') { "queue(#{class_name}): completed in #{duration.round(2)}s" }
154
+ else
155
+ logger.warn('Async::Background') {
156
+ "queue(#{class_name}): complete: stale lease for job #{job[:id]}, ignored"
157
+ }
158
+ end
159
+ end
160
+
161
+ def record_invalid_queue_job!(job, class_name, claim_token, error)
162
+ recorded = @queue_store.fail(
163
+ job[:id],
164
+ claim_token: claim_token,
165
+ error_class: error.class,
166
+ error_message: error.message
167
+ )
168
+ metrics.job_failed(nil, error) if recorded
169
+ logger.error('Async::Background') { "queue(#{class_name}): #{error.class} #{error.message}" }
170
+ end
171
+
172
+ def log_stale_queue_failure(job, class_name, timeout)
173
+ logger.warn('Async::Background') do
174
+ kind = timeout ? 'timeout' : 'failure'
175
+ "queue(#{class_name}): #{kind} on stale lease for job #{job[:id]}, ignored"
176
+ end
177
+ end
178
+
179
+ def log_queue_retry(job, class_name, message, options)
180
+ @queue_waker&.signal
181
+ logger.warn('Async::Background') do
182
+ "queue(#{class_name}): #{message}; retry #{options&.next_attempt}/#{options&.retry}"
183
+ end
184
+ end
185
+
186
+ def log_queue_failure(class_name, message, backtrace)
187
+ tail = backtrace ? "\n#{backtrace.join("\n")}" : ''
188
+ logger.error('Async::Background') { "queue(#{class_name}): #{message}#{tail}" }
189
+ end
190
+
191
+ def duration_ms(duration)
192
+ return if duration.nil? || duration.negative?
193
+
194
+ (duration * 1000).to_i
195
+ end
196
+ end
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,129 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'yaml'
4
+ require 'zlib'
5
+
6
+ module Async
7
+ module Background
8
+ class Runner
9
+ # Pure schedule parsing and heap construction. It deliberately returns the
10
+ # existing Hash contract from #build_task_config because specs and callers
11
+ # use that shape while the Runner owns execution state.
12
+ module Schedule
13
+ private
14
+
15
+ def build_heap(config_path)
16
+ return MinHeap.new if config_path.nil?
17
+
18
+ schedule = load_schedule(config_path)
19
+ build_entries(schedule, monotonic_now)
20
+ end
21
+
22
+ def build_task_config(name, config)
23
+ class_name = config&.dig('class').to_s.strip
24
+ raise ConfigError, "[#{name}] missing class" if class_name.empty?
25
+
26
+ job_class = resolve_scheduled_job(name, class_name)
27
+ interval = parse_interval(name, config['every'])
28
+ cron = parse_cron(name, config['cron'])
29
+ validate_schedule_frequency!(name, interval, cron)
30
+
31
+ {
32
+ job_class: job_class,
33
+ interval: interval,
34
+ cron: cron,
35
+ timeout: parse_timeout(name, config)
36
+ }
37
+ end
38
+
39
+ def resolve_job_class(class_name)
40
+ raise ConfigError, 'empty class name in queue job' if class_name.nil? || class_name.to_s.strip.empty?
41
+
42
+ klass = class_name.split('::').reduce(Object) do |namespace, name|
43
+ raise ConfigError, "unknown class: #{class_name}" unless namespace.const_defined?(name, false)
44
+
45
+ namespace.const_get(name, false)
46
+ end
47
+
48
+ return klass if klass.respond_to?(:perform_now)
49
+
50
+ raise ConfigError, "#{class_name} must include Async::Background::Job"
51
+ end
52
+
53
+ def load_schedule(path)
54
+ raise ConfigError, "Schedule file not found: #{path}" unless File.exist?(path)
55
+
56
+ YAML.safe_load_file(path).tap do |schedule|
57
+ raise ConfigError, "Empty schedule: #{path}" unless schedule&.any?
58
+ end
59
+ end
60
+
61
+ def build_entries(schedule, now)
62
+ schedule.each_with_object(MinHeap.new) do |(name, config), heap|
63
+ next unless assigned_worker(config, name) == worker_index
64
+
65
+ task = build_task_config(name, config)
66
+ heap.push(build_entry(name, task, now))
67
+ end
68
+ end
69
+
70
+ def assigned_worker(config, name)
71
+ config['worker']&.to_i || ((Zlib.crc32(name) % total_workers) + 1)
72
+ end
73
+
74
+ def build_entry(name, task, now)
75
+ Entry.new(
76
+ name: name,
77
+ job_class: task[:job_class],
78
+ interval: task[:interval],
79
+ cron: task[:cron],
80
+ timeout: task[:timeout],
81
+ next_run_at: initial_next_run_at(task, now)
82
+ )
83
+ end
84
+
85
+ def initial_next_run_at(task, now)
86
+ jitter = rand * [task[:interval] || MAX_JITTER, MAX_JITTER].min
87
+ return now + jitter + task[:interval] if task[:interval]
88
+
89
+ wall_now = Time.now
90
+ wait = task[:cron].next_time(wall_now).to_f - wall_now.to_f
91
+ now + jitter + [wait, MIN_SLEEP_TIME].max
92
+ end
93
+
94
+ def resolve_scheduled_job(name, class_name)
95
+ resolve_job_class(class_name)
96
+ rescue ConfigError => error
97
+ raise ConfigError, "[#{name}] #{error.message}"
98
+ end
99
+
100
+ def parse_interval(name, value)
101
+ value&.then do |interval|
102
+ interval = interval.to_i
103
+ raise ConfigError, "[#{name}] 'every' must be > 0" unless interval.positive?
104
+
105
+ interval
106
+ end
107
+ end
108
+
109
+ def parse_cron(name, value)
110
+ value&.then do |expression|
111
+ Fugit::Cron.new(expression) || raise(ConfigError, "[#{name}] invalid cron: #{expression}")
112
+ end
113
+ end
114
+
115
+ def parse_timeout(name, config)
116
+ Job::Options.new(timeout: config.fetch('timeout', DEFAULT_TIMEOUT)).timeout
117
+ rescue ArgumentError, TypeError => error
118
+ raise ConfigError, "[#{name}] #{error.message}"
119
+ end
120
+
121
+ def validate_schedule_frequency!(name, interval, cron)
122
+ return if interval || cron
123
+
124
+ raise ConfigError, "[#{name}] specify 'every' or 'cron'"
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end