async-background 0.7.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,39 +1,64 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'yaml'
4
- require 'zlib'
3
+ require 'async/barrier'
4
+
5
+ require_relative 'runner/queue_execution'
6
+ require_relative 'runner/schedule'
5
7
 
6
8
  module Async
7
9
  module Background
8
10
  class ConfigError < StandardError; end
9
11
 
10
- DEFAULT_TIMEOUT = 30
11
- MIN_SLEEP_TIME = 0.1
12
- MAX_JITTER = 5
12
+ DEFAULT_TIMEOUT = 30
13
+ MIN_SLEEP_TIME = 0.1
14
+ MAX_JITTER = 5
13
15
  QUEUE_POLL_INTERVAL = 5
16
+ MIN_QUEUE_WAIT = 0.001
14
17
 
15
18
  class Runner
16
19
  include Clock
17
-
18
- attr_reader :logger, :semaphore, :heap, :worker_index, :total_workers, :shutdown, :metrics, :queue_store
19
-
20
+ include QueueExecution
21
+ include Schedule
22
+
23
+ attr_reader :logger,
24
+ :semaphore,
25
+ :heap,
26
+ :worker_index,
27
+ :total_workers,
28
+ :shutdown,
29
+ :metrics,
30
+ :queue_store
31
+
32
+ # `config_path: nil` explicitly disables recurring jobs. This keeps the
33
+ # dynamic SQLite queue usable on its own; a supplied path remains strict
34
+ # so a typo cannot silently disable scheduled work.
20
35
  def initialize(
21
- config_path:, job_count: 2, worker_index:, total_workers:,
22
- queue_socket_dir: nil, queue_db_path: nil, queue_mmap: true
36
+ config_path: nil,
37
+ job_count: 2,
38
+ worker_index:,
39
+ total_workers:,
40
+ queue_socket_dir: nil,
41
+ queue_db_path: nil,
42
+ queue_mmap: true,
43
+ metrics_shm_path: Metrics.default_shm_path
23
44
  )
24
- @logger = Console.logger
25
- @worker_index = worker_index
45
+ @logger = Console.logger
46
+ @worker_index = worker_index
26
47
  @total_workers = total_workers
27
- @running = true
28
- @shutdown = ::Async::Condition.new
29
- @metrics = Metrics.new(worker_index: worker_index, total_workers: total_workers)
30
-
48
+ @running = true
49
+ @shutdown = ::Async::Condition.new
50
+ @metrics = Metrics.new(
51
+ worker_index: worker_index,
52
+ total_workers: total_workers,
53
+ shm_path: metrics_shm_path
54
+ )
31
55
  logger.info { "Async::Background worker_index=#{worker_index}/#{total_workers}, job_count=#{job_count}" }
32
56
 
33
- @semaphore = ::Async::Semaphore.new(job_count)
34
- @heap = build_heap(config_path)
35
-
57
+ @drain_barrier = ::Async::Barrier.new
58
+ @semaphore = ::Async::Semaphore.new(job_count, parent: @drain_barrier)
59
+ @heap = config_path.nil? ? MinHeap.new : build_heap(config_path)
36
60
  setup_queue(queue_socket_dir, queue_db_path, queue_mmap)
61
+ validate_work_source!(config_path)
37
62
  end
38
63
 
39
64
  def run
@@ -43,10 +68,7 @@ module Async
43
68
  start_queue_listener(task) if @listen_queue
44
69
 
45
70
  scheduler_loop(task)
46
-
47
- semaphore.acquire {}
48
- @queue_store&.close
49
- @queue_waker&.close
71
+ drain_and_close_queue
50
72
  end
51
73
  end
52
74
 
@@ -54,154 +76,97 @@ module Async
54
76
  return unless @running
55
77
 
56
78
  @running = false
57
- logger.info { "Async::Background: stopping gracefully" }
79
+ logger.info { 'Async::Background: stopping gracefully' }
58
80
  shutdown.signal
59
81
  @queue_waker&.signal
60
82
  end
61
83
 
62
- def running?
63
- @running
64
- end
84
+ def running? = @running
65
85
 
66
86
  private
67
87
 
68
- def setup_queue(queue_socket_dir, queue_db_path, queue_mmap)
69
- @listen_queue = false
70
- return unless queue_socket_dir
71
-
72
- isolated = ENV.fetch("ISOLATION_FORKS", "").split(",").map(&:to_i)
73
- return if isolated.include?(worker_index)
74
-
75
- require_relative 'queue/store'
76
- require_relative 'queue/socket_waker'
77
- require_relative 'queue/client'
78
-
79
- @listen_queue = true
80
- @queue_store = Queue::Store.new(
81
- path: queue_db_path || Queue::Store.default_path,
82
- options: { mmap: queue_mmap }
83
- )
84
-
85
- socket_path = File.join(queue_socket_dir, "async_bg_worker_#{worker_index}.sock")
86
- @queue_waker = Queue::SocketWaker.new(socket_path)
87
- @queue_waker.open!
88
-
89
- recovered = @queue_store.recover(worker_index)
90
- logger.info { "Async::Background queue: recovered #{recovered} stale jobs" } if recovered > 0
91
- end
92
-
93
- def start_queue_listener(task)
94
- @queue_waker.start_accept_loop(task)
95
-
96
- task.async do
97
- logger.info { "Async::Background queue: listening on worker #{worker_index}" }
88
+ def scheduler_loop(task)
89
+ # Queue-only workers have no heap entry to sleep on. Keep the runner
90
+ # alive until #stop / SIGTERM wakes this condition; the queue listener
91
+ # continues independently in its own Async task.
92
+ return shutdown.wait if heap.empty? && @listen_queue
98
93
 
99
- while running?
100
- @queue_waker.wait(timeout: QUEUE_POLL_INTERVAL)
94
+ loop do
95
+ entry = heap.peek
96
+ break unless entry
101
97
 
102
- while running?
103
- job = @queue_store.fetch(worker_index)
104
- break unless job
98
+ wait_for_next_entry(task, entry)
99
+ break unless running?
105
100
 
106
- semaphore.async { |job_task| run_queue_job(job_task, job) }
107
- end
108
- end
101
+ dispatch_due_entries
109
102
  end
110
103
  end
111
104
 
112
- def run_queue_job(job_task, job)
113
- class_name = job[:class_name]
114
- klass = resolve_job_class(class_name)
115
- options = parse_job_options(job[:options])
116
-
117
- metrics.job_started(nil)
118
- started = monotonic_now
119
- job_task.with_timeout(options.timeout) { klass.perform_now(*job[:args]) }
120
- duration = monotonic_now - started
121
-
122
- metrics.job_finished(nil, duration)
123
- @queue_store.complete(job[:id])
124
- logger.info('Async::Background') { "queue(#{class_name}): completed in #{duration.round(2)}s" }
125
- rescue ConfigError => e
126
- metrics.job_failed(nil, e) if options
127
- @queue_store.fail(job[:id])
128
- logger.error('Async::Background') { "queue(#{class_name}): #{e.class} #{e.message}" }
129
- rescue ::Async::TimeoutError
130
- metrics.job_timed_out(nil)
131
- handle_queue_failure(job, options, "timed out after #{options.timeout}s", backtrace: nil)
132
- rescue => e
133
- metrics.job_failed(nil, e)
134
- handle_queue_failure(job, options, "#{e.class} #{e.message}", backtrace: e.backtrace)
105
+ def validate_work_source!(config_path)
106
+ return unless config_path.nil? && !@listen_queue
107
+
108
+ raise ConfigError, 'Runner requires config_path or queue_socket_dir'
135
109
  end
136
110
 
137
- def parse_job_options(raw)
138
- Job::Options.new(**(raw || {}))
139
- rescue ArgumentError, TypeError => e
140
- raise ConfigError, "invalid queue options: #{e.message}"
111
+ def wait_for_next_entry(task, entry)
112
+ wait = [entry.next_run_at - monotonic_now, MIN_SLEEP_TIME].max
113
+ wait_with_shutdown(task, wait)
141
114
  end
142
115
 
143
- def handle_queue_failure(job, options, message, backtrace:)
144
- result = @queue_store.retry_or_fail(job[:id], fallback_options: options)
145
- class_name = job[:class_name]
116
+ def dispatch_due_entries
117
+ now = monotonic_now
118
+ while (entry = heap.peek) && entry.next_run_at <= now
119
+ break unless running?
146
120
 
147
- if result == :retried
148
- @queue_waker&.signal
149
- attempt = options.next_attempt
150
- logger.warn('Async::Background') do
151
- "queue(#{class_name}): #{message}; retry #{attempt}/#{options.retry}"
152
- end
153
- else
154
- tail = backtrace ? "\n#{backtrace.join("\n")}" : ''
155
- logger.error('Async::Background') { "queue(#{class_name}): #{message}#{tail}" }
121
+ dispatch_entry(entry)
156
122
  end
157
123
  end
158
124
 
159
- def resolve_job_class(class_name)
160
- raise ConfigError, "empty class name in queue job" if class_name.nil? || class_name.to_s.strip.empty?
161
-
162
- klass = class_name.split("::").reduce(Object) do |mod, name|
163
- raise ConfigError, "unknown class: #{class_name}" unless mod.const_defined?(name, false)
164
- mod.const_get(name, false)
125
+ def dispatch_entry(entry)
126
+ if entry.running
127
+ skip_entry(entry)
128
+ else
129
+ execute_entry(entry)
165
130
  end
166
131
 
167
- raise ConfigError, "#{class_name} must include Async::Background::Job" unless klass.respond_to?(:perform_now)
168
-
169
- klass
132
+ entry.reschedule(monotonic_now)
133
+ heap.replace_top(entry)
170
134
  end
171
135
 
172
- def scheduler_loop(task)
173
- loop do
174
- entry = heap.peek
175
- break unless entry
136
+ def skip_entry(entry)
137
+ logger.warn('Async::Background') { "#{entry.name}: skipped, previous run still active" }
138
+ metrics.job_skipped(entry)
139
+ end
176
140
 
177
- now = monotonic_now
178
- wait = [entry.next_run_at - now, MIN_SLEEP_TIME].max
179
- wait_with_shutdown(task, wait)
180
- break unless running?
141
+ def execute_entry(entry)
142
+ entry.running = true
143
+ semaphore.async do |job_task|
144
+ run_job(job_task, entry)
145
+ ensure
146
+ entry.running = false
147
+ end
148
+ end
181
149
 
182
- now = monotonic_now
183
- while (entry = heap.peek) && entry.next_run_at <= now
184
- break unless running?
150
+ def run_job(job_task, entry)
151
+ metrics_started = false
152
+ metrics.job_started(entry)
153
+ metrics_started = true
154
+ started_at = monotonic_now
155
+ job_task.with_timeout(entry.timeout) { entry.job_class.perform_now }
185
156
 
186
- if entry.running
187
- logger.warn('Async::Background') { "#{entry.name}: skipped, previous run still active" }
188
- metrics.job_skipped(entry)
189
- entry.reschedule(monotonic_now)
190
- heap.replace_top(entry)
191
- next
192
- end
193
-
194
- entry.running = true
195
- semaphore.async do |job_task|
196
- run_job(job_task, entry)
197
- ensure
198
- entry.running = false
199
- end
200
-
201
- entry.reschedule(monotonic_now)
202
- heap.replace_top(entry)
203
- end
204
- end
157
+ duration = monotonic_now - started_at
158
+ metrics.job_succeeded(entry, duration)
159
+ logger.info('Async::Background') { "#{entry.name}: completed in #{duration.round(2)}s" }
160
+ rescue ::Async::TimeoutError
161
+ metrics.job_timed_out(entry)
162
+ logger.error('Async::Background') { "#{entry.name}: timed out after #{entry.timeout}s" }
163
+ rescue StandardError => error
164
+ metrics.job_failed(entry, error)
165
+ logger.error('Async::Background') {
166
+ "#{entry.name}: #{error.class} #{error.message}\n#{error.backtrace.join("\n")}"
167
+ }
168
+ ensure
169
+ metrics.job_stopped(entry) if metrics_started
205
170
  end
206
171
 
207
172
  def setup_signal_handlers
@@ -232,92 +197,10 @@ module Async
232
197
  rescue ::Async::TimeoutError
233
198
  end
234
199
 
235
- def build_heap(config_path)
236
- raise ConfigError, "Schedule file not found: #{config_path}" unless File.exist?(config_path)
237
-
238
- raw = YAML.safe_load_file(config_path)
239
- raise ConfigError, "Empty schedule: #{config_path}" unless raw&.any?
240
-
241
- heap = MinHeap.new
242
- now = monotonic_now
243
-
244
- raw.each do |name, config|
245
- assigned = config['worker']&.to_i || ((Zlib.crc32(name) % total_workers) + 1)
246
- next unless assigned == worker_index
247
-
248
- task_config = build_task_config(name, config)
249
- jitter = rand * [task_config[:interval] || MAX_JITTER, MAX_JITTER].min
250
-
251
- next_run_at = if task_config[:interval]
252
- now + jitter + task_config[:interval]
253
- else
254
- now_wall = Time.now
255
- wall_wait = task_config[:cron].next_time(now_wall).to_f - now_wall.to_f
256
- now + jitter + [wall_wait, MIN_SLEEP_TIME].max
257
- end
258
-
259
- heap.push(Entry.new(
260
- name: name,
261
- job_class: task_config[:job_class],
262
- interval: task_config[:interval],
263
- cron: task_config[:cron],
264
- timeout: task_config[:timeout],
265
- next_run_at: next_run_at
266
- ))
267
- end
268
-
269
- heap
270
- end
271
-
272
- def build_task_config(name, config)
273
- class_name = config&.dig('class').to_s.strip
274
- raise ConfigError, "[#{name}] missing class" if class_name.empty?
275
-
276
- job_class = begin
277
- resolve_job_class(class_name)
278
- rescue ConfigError => e
279
- raise ConfigError, "[#{name}] #{e.message}"
280
- end
281
-
282
- interval = config['every']&.then { |v|
283
- int = v.to_i
284
- raise ConfigError, "[#{name}] 'every' must be > 0" unless int.positive?
285
- int
286
- }
287
-
288
- cron = config['cron']&.then { |c|
289
- Fugit::Cron.new(c) || raise(ConfigError, "[#{name}] invalid cron: #{c}")
290
- }
291
-
292
- raise ConfigError, "[#{name}] specify 'every' or 'cron'" unless interval || cron
293
-
294
- timeout = begin
295
- Job::Options.new(timeout: config.fetch('timeout', DEFAULT_TIMEOUT)).timeout
296
- rescue ArgumentError, TypeError => e
297
- raise ConfigError, "[#{name}] #{e.message}"
298
- end
299
-
300
- { job_class: job_class, interval: interval, cron: cron, timeout: timeout }
301
- end
302
-
303
- def run_job(job_task, entry)
304
- metrics.job_started(entry)
305
- t = monotonic_now
306
- job_task.with_timeout(entry.timeout) { entry.job_class.perform_now }
307
-
308
- duration = monotonic_now - t
309
- metrics.job_finished(entry, duration)
310
- logger.info('Async::Background') {
311
- "#{entry.name}: completed in #{duration.round(2)}s"
312
- }
313
- rescue ::Async::TimeoutError
314
- metrics.job_timed_out(entry)
315
- logger.error('Async::Background') { "#{entry.name}: timed out after #{entry.timeout}s" }
316
- rescue => e
317
- metrics.job_failed(entry, e)
318
- logger.error('Async::Background') {
319
- "#{entry.name}: #{e.class} #{e.message}\n#{e.backtrace.join("\n")}"
320
- }
200
+ def drain_and_close_queue
201
+ @drain_barrier.wait
202
+ @queue_store&.close
203
+ @queue_waker&.close
321
204
  end
322
205
  end
323
206
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Async
4
4
  module Background
5
- VERSION = '0.7.1'
5
+ VERSION = '1.0.0'
6
6
  end
7
7
  end
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Async
4
+ module Background
5
+ module Web
6
+ class App
7
+ def initialize(config)
8
+ @config = config.validate!
9
+ @auth = Auth.new(@config.auth)
10
+ @snapshot = Snapshot.new(path: @config.queue_path, counts_cache_ttl: @config.counts_cache_ttl).open!
11
+ @metrics_reader = build_metrics_reader
12
+ @serializer = Serializer.new(@config)
13
+ @event_hub = build_event_hub
14
+ @router = Router.new
15
+ end
16
+
17
+ def call(env)
18
+ return Response.unauthorized unless @auth.authorized?(env)
19
+
20
+ route = @router.match(env)
21
+ return Response.not_found unless route
22
+
23
+ dispatch(route, env)
24
+ rescue RequestError => error
25
+ Response.bad_request(error.message)
26
+ rescue UnavailableError, ClosedError
27
+ Response.unavailable
28
+ rescue StandardError
29
+ # Do not turn internal class names, paths or database errors into an
30
+ # unauthenticated information disclosure channel.
31
+ Response.internal_error
32
+ end
33
+
34
+ def close
35
+ @event_hub&.close
36
+ @snapshot.close
37
+ self
38
+ end
39
+
40
+ private
41
+
42
+ def build_metrics_reader
43
+ return unless @config.metrics_enabled?
44
+
45
+ MetricsReader.new(path: @config.metrics_path, total_workers: @config.total_workers)
46
+ end
47
+
48
+ def build_event_hub
49
+ return unless @config.transport == :sse
50
+
51
+ EventHub.new(
52
+ @snapshot,
53
+ @serializer,
54
+ metrics_reader: @metrics_reader,
55
+ poll_seconds: @config.stream_poll_seconds
56
+ )
57
+ end
58
+
59
+ def dispatch(route, env)
60
+ case route
61
+ when :index then Response.html(Assets.render_index(@config))
62
+ when :javascript then Response.javascript(Assets::JS)
63
+ when :stylesheet then Response.stylesheet(Assets::CSS)
64
+ when :overview then overview_response
65
+ when :executing then in_flight_response(:executing, env)
66
+ when :claimed then in_flight_response(:claimed, env)
67
+ when :done then terminal_response(:done, env)
68
+ when :failed then terminal_response(:failed, env)
69
+ when :pending then pending_response(env)
70
+ when :metrics then metrics_response
71
+ when :config then config_response
72
+ when :stream then stream_response
73
+ else Response.not_found
74
+ end
75
+ end
76
+
77
+ def overview_response
78
+ Response.json(@serializer.overview(@snapshot.overview, metrics_payload))
79
+ end
80
+
81
+ def in_flight_response(kind, env)
82
+ request = Request.new(env, @config)
83
+ rows = kind == :executing ? @snapshot.executing(limit: request.limit) : @snapshot.claimed(limit: request.limit)
84
+ payload = kind == :executing ? @serializer.executing(rows) : @serializer.claimed(rows)
85
+ Response.json({items: payload})
86
+ end
87
+
88
+ def terminal_response(kind, env)
89
+ request = Request.new(env, @config)
90
+ cursor = request.finished_cursor
91
+ rows = kind == :done ? @snapshot.recent_done(limit: request.limit, cursor: cursor) :
92
+ @snapshot.recent_failed(limit: request.limit, cursor: cursor)
93
+ payload = kind == :done ? @serializer.done(rows) : @serializer.failed(rows)
94
+ Response.json(payload)
95
+ end
96
+
97
+ def pending_response(env)
98
+ request = Request.new(env, @config)
99
+ rows = @snapshot.pending(limit: request.limit, cursor: request.pending_cursor)
100
+ Response.json(@serializer.pending(rows))
101
+ end
102
+
103
+ def metrics_response
104
+ Response.json(metrics_payload || {available: false, workers: [], totals: MetricsReader::EMPTY_TOTALS})
105
+ end
106
+
107
+ def metrics_payload
108
+ @metrics_reader&.aggregated
109
+ end
110
+
111
+ def config_response
112
+ Response.json(
113
+ {
114
+ title: @config.title,
115
+ poll_interval_ms: @config.poll_interval_ms,
116
+ transport: @config.transport.to_s,
117
+ expose_args: @config.expose_args,
118
+ list_limit: @config.list_limit,
119
+ mount_path: @config.mount_path
120
+ }
121
+ )
122
+ end
123
+
124
+ def stream_response
125
+ return Response.not_found unless @config.transport == :sse
126
+
127
+ Response.sse(
128
+ Stream.new(
129
+ @event_hub,
130
+ heartbeat_seconds: @config.stream_heartbeat_seconds,
131
+ retry_ms: @config.stream_retry_ms
132
+ )
133
+ )
134
+ end
135
+ end
136
+ end
137
+ end
138
+ end