zizq 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +94 -0
  4. data/bin/profile-worker +145 -0
  5. data/bin/zizq-worker +174 -0
  6. data/lib/active_job/queue_adapters/zizq_adapter.rb +109 -0
  7. data/lib/zizq/ack_processor.rb +132 -0
  8. data/lib/zizq/active_job_config.rb +122 -0
  9. data/lib/zizq/backoff.rb +50 -0
  10. data/lib/zizq/bulk_enqueue.rb +87 -0
  11. data/lib/zizq/client.rb +982 -0
  12. data/lib/zizq/configuration.rb +164 -0
  13. data/lib/zizq/enqueue_request.rb +178 -0
  14. data/lib/zizq/enqueue_with.rb +109 -0
  15. data/lib/zizq/error.rb +43 -0
  16. data/lib/zizq/job.rb +188 -0
  17. data/lib/zizq/job_config.rb +244 -0
  18. data/lib/zizq/lifecycle.rb +58 -0
  19. data/lib/zizq/middleware.rb +79 -0
  20. data/lib/zizq/query.rb +566 -0
  21. data/lib/zizq/resources/error_enumerator.rb +241 -0
  22. data/lib/zizq/resources/error_page.rb +19 -0
  23. data/lib/zizq/resources/error_record.rb +19 -0
  24. data/lib/zizq/resources/job.rb +124 -0
  25. data/lib/zizq/resources/job_page.rb +57 -0
  26. data/lib/zizq/resources/page.rb +77 -0
  27. data/lib/zizq/resources/resource.rb +45 -0
  28. data/lib/zizq/resources.rb +16 -0
  29. data/lib/zizq/version.rb +9 -0
  30. data/lib/zizq/worker.rb +467 -0
  31. data/lib/zizq.rb +269 -0
  32. data/sig/generated/zizq/ack_processor.rbs +73 -0
  33. data/sig/generated/zizq/active_job_config.rbs +74 -0
  34. data/sig/generated/zizq/backoff.rbs +34 -0
  35. data/sig/generated/zizq/bulk_enqueue.rbs +72 -0
  36. data/sig/generated/zizq/client.rbs +419 -0
  37. data/sig/generated/zizq/configuration.rbs +95 -0
  38. data/sig/generated/zizq/enqueue_request.rbs +94 -0
  39. data/sig/generated/zizq/enqueue_with.rbs +88 -0
  40. data/sig/generated/zizq/error.rbs +41 -0
  41. data/sig/generated/zizq/job.rbs +136 -0
  42. data/sig/generated/zizq/job_config.rbs +150 -0
  43. data/sig/generated/zizq/lifecycle.rbs +34 -0
  44. data/sig/generated/zizq/middleware.rbs +50 -0
  45. data/sig/generated/zizq/query.rbs +327 -0
  46. data/sig/generated/zizq/resources/error_enumerator.rbs +148 -0
  47. data/sig/generated/zizq/resources/error_page.rbs +13 -0
  48. data/sig/generated/zizq/resources/error_record.rbs +20 -0
  49. data/sig/generated/zizq/resources/job.rbs +89 -0
  50. data/sig/generated/zizq/resources/job_page.rbs +33 -0
  51. data/sig/generated/zizq/resources/page.rbs +47 -0
  52. data/sig/generated/zizq/resources/resource.rbs +26 -0
  53. data/sig/generated/zizq/version.rbs +5 -0
  54. data/sig/generated/zizq/worker.rbs +152 -0
  55. data/sig/generated/zizq.rbs +180 -0
  56. data/sig/zizq.rbs +111 -0
  57. metadata +134 -0
@@ -0,0 +1,467 @@
1
+ # Copyright (c) 2026 Chris Corbyn <chris@zizq.io>
2
+ # Licensed under the MIT License. See LICENSE file for details.
3
+
4
+ # rbs_inline: enabled
5
+ # frozen_string_literal: true
6
+
7
+ require "logger"
8
+
9
+ module Zizq
10
+ # Top-level worker process which orchestrates fetching jobs from the server
11
+ # and dispatching them to a pool of worker tasks for processing.
12
+ #
13
+ # Fiber support (when `fiber_count > 1`) creates an Async context. When
14
+ # `fiber_count == 1`, no Async context is created.
15
+ #
16
+ # Total concurrency is calculated as `thread_count * fiber_count`.
17
+ class Worker
18
+ DEFAULT_THREADS = 5 #: Integer
19
+ DEFAULT_FIBERS = 1 #: Integer
20
+ DEFAULT_RETRY_MIN_WAIT = 1
21
+ DEFAULT_RETRY_MAX_WAIT = 30
22
+ DEFAULT_RETRY_MULTIPLIER = 2
23
+
24
+ # Convenience class method to create and run a worker.
25
+ def self.run(...) #: (**untyped) -> void
26
+ new(...).run
27
+ end
28
+
29
+ # The total number of worker threads to run.
30
+ #
31
+ # For applications that are not threadsafe, this should be set to 1
32
+ # (default: 5).
33
+ attr_reader :thread_count #: Integer
34
+
35
+ # The total number of fibers to run within each worker thread.
36
+ #
37
+ # For applications that cannot handle multi-fiber execution, this should be
38
+ # set to 1. Any value greater than 1 runs workers inside an Async context
39
+ # (default: 1).
40
+ attr_reader :fiber_count #: Integer
41
+
42
+ # The set of queues from which to fetch jobs.
43
+ #
44
+ # An empty set (default) means all queues.
45
+ attr_reader :queues #: Array[String]
46
+
47
+ # The total number of jobs to allow to be sent from the server at once.
48
+ #
49
+ # Defaults to 2x the total concurrency (threads * fibers) to keep the
50
+ # pipeline full while ack round-trips are in flight.
51
+ attr_reader :prefetch #: Integer
52
+
53
+ # Proc to derive a worker ID string for each thread and fiber.
54
+ #
55
+ # When not present, the Zizq server assigns a random worker ID.
56
+ attr_reader :worker_id_proc #: (^(Integer, Integer) -> String?)?
57
+
58
+ # An instance of a Logger to be used for worker logging.
59
+ attr_reader :logger #: Logger
60
+
61
+ # The dispatcher used to handle each job.
62
+ #
63
+ # Defaults to the globally-configured `dequeue_middleware` chain.
64
+ # When a custom dispatcher is provided to `#initialize`, it is used as-is
65
+ # and the configured middleware chain is ignored. Caller may construct
66
+ # their own `Zizq::Middleware::Chain` if middleware needs to be applied.
67
+ attr_reader :dispatcher #: ^(Resources::Job) -> void
68
+
69
+ # @rbs queues: Array[String]
70
+ # @rbs thread_count: Integer
71
+ # @rbs fiber_count: Integer
72
+ # @rbs prefetch: Integer?
73
+ # @rbs retry_min_wait: (Float | Integer)
74
+ # @rbs retry_max_wait: (Float | Integer)
75
+ # @rbs retry_multiplier: (Float | Integer)
76
+ # @rbs worker_id: (^(Integer, Integer) -> String?)?
77
+ # @rbs logger: Logger?
78
+ # @rbs dispatcher: (^(Resources::Job) -> void)?
79
+ # @rbs return: void
80
+ def initialize(
81
+ queues: [],
82
+ thread_count: DEFAULT_THREADS,
83
+ fiber_count: DEFAULT_FIBERS,
84
+ prefetch: nil,
85
+ retry_min_wait: DEFAULT_RETRY_MIN_WAIT,
86
+ retry_max_wait: DEFAULT_RETRY_MAX_WAIT,
87
+ retry_multiplier: DEFAULT_RETRY_MULTIPLIER,
88
+ worker_id: nil,
89
+ logger: nil,
90
+ dispatcher: nil
91
+ )
92
+ raise ArgumentError, "thread_count must be at least 1 (got #{thread_count})" if thread_count < 1
93
+ raise ArgumentError, "fiber_count must be at least 1 (got #{fiber_count})" if fiber_count < 1
94
+
95
+ Zizq.configuration.validate!
96
+
97
+ @queues = queues
98
+ @thread_count = thread_count
99
+ @fiber_count = fiber_count
100
+ @prefetch = prefetch || thread_count * fiber_count * 2
101
+ @retry_min_wait = retry_min_wait
102
+ @retry_max_wait = retry_max_wait
103
+ @retry_multiplier = retry_multiplier
104
+ @worker_id_proc = worker_id
105
+ @logger = logger || Zizq.configuration.logger
106
+ @dispatcher = dispatcher || Zizq.configuration.dequeue_middleware
107
+
108
+ reset_runtime_state
109
+ end
110
+
111
+ # Request a graceful shutdown.
112
+ #
113
+ # Transitions the lifecycle to `:draining` and closes the dispatch
114
+ # queue. Worker threads finish any in-flight jobs, the ack processor
115
+ # flushes pending acks, and the producer stays connected to the server
116
+ # while all of that drains — only then is the streaming connection
117
+ # closed and `#run` returns.
118
+ #
119
+ # Safe to call from a signal handler (uses only atomic ivar assignment
120
+ # and `Thread::Queue#close`).
121
+ def stop #: () -> void
122
+ @lifecycle.drain!
123
+ @dispatch_queue.close rescue nil
124
+ end
125
+
126
+ # Request an immediate shutdown.
127
+ #
128
+ # Like `#stop`, but the streaming connection is closed immediately
129
+ # during teardown (rather than after workers drain), so the server
130
+ # re-dispatches any in-flight jobs after its visibility timeout. Use
131
+ # this when `#stop` has been given adequate time and still hasn't
132
+ # returned.
133
+ #
134
+ # In-progress jobs on worker threads continue to completion — we
135
+ # don't interrupt user code mid-execution — but no new jobs are
136
+ # pulled from the queue and cleanup uses short deadlines.
137
+ #
138
+ # Safe to call from a signal handler.
139
+ def kill #: () -> void
140
+ @killing = true
141
+ @lifecycle.drain!
142
+ @dispatch_queue.close rescue nil
143
+ end
144
+
145
+ # Start the worker.
146
+ #
147
+ # Spawns the desired number of worker threads and fibers, distributes
148
+ # jobs to those workers and then blocks until shutdown. Safe to call
149
+ # multiple times on the same Worker instance — all mutable runtime
150
+ # state (lifecycle, dispatch queue, ack processor, backoff) is reset
151
+ # at the start of each run.
152
+ def run #: () -> void
153
+ reset_runtime_state
154
+
155
+ logger.info do
156
+ format(
157
+ "Zizq worker starting: %d threads, %d fibers, prefetch=%d",
158
+ thread_count,
159
+ fiber_count,
160
+ prefetch,
161
+ )
162
+ end
163
+
164
+ logger.info { "Queues: #{queues.empty? ? '(all)' : queues.join(', ')}" }
165
+
166
+ # Everything runs in the background initially.
167
+ @ack_processor.start
168
+ worker_threads = start_worker_threads
169
+ producer_thread = start_producer_thread
170
+
171
+ # Block until the lifecycle leaves :running (stop, kill, or crash).
172
+ @lifecycle.wait_while_running
173
+
174
+ if @killing
175
+ logger.info { "Killing. Closing stream and forcing shutdown..." }
176
+
177
+ # Close the streaming response immediately so the server
178
+ # re-dispatches any in-flight jobs after its visibility timeout.
179
+ # This also unblocks the producer's IO read.
180
+ @streaming_response&.close rescue nil
181
+
182
+ # Workers will finish their current job (can't be interrupted)
183
+ # and then see the closed dispatch queue and exit.
184
+ worker_threads.each(&:join)
185
+
186
+ # Drain whatever acks happen to flush before their fibers exit.
187
+ # No timeout — workers finish their current job and exit quickly.
188
+ @ack_processor.stop
189
+ else
190
+ logger.info { "Shutting down. Waiting for workers to finish..." }
191
+
192
+ # Workers drain remaining jobs from the closed dispatch queue.
193
+ # The producer stays connected so in-flight jobs aren't requeued
194
+ # by the server while workers are still finishing them.
195
+ worker_threads.each(&:join)
196
+
197
+ # Drain pending acks/nacks while the connection is still open.
198
+ @ack_processor.stop
199
+
200
+ # Close the streaming response to unblock the producer's IO read.
201
+ # This happens after workers and acks have drained so the server
202
+ # doesn't requeue in-flight jobs while workers are still finishing.
203
+ @streaming_response&.close rescue nil
204
+ end
205
+
206
+ # Signal the producer that cleanup is complete. The watcher fiber
207
+ # inside the producer's Sync block wakes up on this and cancels
208
+ # the producer's main task, so the stream is closed from its own
209
+ # reactor rather than via a cross-thread close.
210
+ @lifecycle.stop!
211
+ producer_thread.join
212
+
213
+ logger.info { "Zizq worker stopped" }
214
+ end
215
+
216
+ private
217
+
218
+ # Reset all mutable runtime state so `#run` can be called multiple
219
+ # times on the same Worker instance. Called from `#initialize` and
220
+ # from the top of `#run`.
221
+ def reset_runtime_state #: () -> void
222
+ @backoff = Backoff.new(
223
+ min_wait: @retry_min_wait,
224
+ max_wait: @retry_max_wait,
225
+ multiplier: @retry_multiplier,
226
+ )
227
+ @lifecycle = Lifecycle.new
228
+ @dispatch_queue = Thread::Queue.new
229
+ @streaming_response = nil #: untyped
230
+ @killing = false
231
+ @ack_processor = AckProcessor.new(
232
+ client: Zizq.client,
233
+ capacity: @prefetch * 2,
234
+ logger: @logger,
235
+ backoff: @backoff,
236
+ )
237
+ end
238
+
239
+ def start_producer_thread #: () -> Thread
240
+ Thread.new do
241
+ Thread.current.name = "zizq-producer"
242
+
243
+ logger.info { "Zizq producer thread started" }
244
+
245
+ # The producer runs inside its own Sync block so we can spawn a
246
+ # watcher fiber that cancels the main producer task on final
247
+ # shutdown. `task.stop` raises `Async::Stop` at the next fiber
248
+ # yield point, which means we can interrupt the producer
249
+ # wherever it's currently blocked — inside `stream_http.get`
250
+ # reading response headers, inside `parse_ndjson` waiting on
251
+ # the body, or inside `wait_until_stopped`. All of those are
252
+ # fiber yield points, so the cancellation is immediate.
253
+ #
254
+ # The watcher waits on `wait_until_stopped` (not
255
+ # `wait_while_running`) so the producer stays connected through
256
+ # the worker+ack drain phase. Only once main has finished
257
+ # cleanup and called `@lifecycle.stop!` does the producer get
258
+ # cancelled.
259
+ Sync do |task|
260
+ task.async do
261
+ @lifecycle.wait_until_stopped
262
+ task.stop
263
+ end
264
+
265
+ while @lifecycle.running?
266
+ begin
267
+ client = Zizq.client
268
+ logger.info { "Connecting to #{client.url}..." }
269
+
270
+ client.take_jobs(
271
+ prefetch:,
272
+ queues:,
273
+ on_connect: -> {
274
+ logger.info { "Connected. Listening for jobs." }
275
+ @backoff.reset
276
+ },
277
+ on_response: ->(resp) { @streaming_response = resp },
278
+ ) do |job|
279
+ begin
280
+ logger.debug do
281
+ format(
282
+ "Received %s (%s), dispatch queue: %d",
283
+ job.type,
284
+ job.id,
285
+ @dispatch_queue.size
286
+ )
287
+ end
288
+
289
+ @dispatch_queue.push(job)
290
+ rescue ClosedQueueError
291
+ # Shutdown in progress. Stay connected so in-flight jobs
292
+ # aren't requeued while workers and acks drain. The
293
+ # watcher fiber will cancel this task when main calls
294
+ # `@lifecycle.stop!` at the end of cleanup.
295
+ @lifecycle.wait_until_stopped
296
+ break
297
+ end
298
+ end
299
+
300
+ # Stream ended normally — clear stale reference and reset backoff.
301
+ @streaming_response = nil
302
+ @backoff.reset
303
+ rescue Async::Stop
304
+ # Watcher fiber cancelled us — shutdown is complete.
305
+ break
306
+ rescue Zizq::ConnectionError, Zizq::StreamError => error
307
+ break unless @lifecycle.running?
308
+
309
+ logger.warn do
310
+ format(
311
+ "%s: %s. Reconnecting in %.2fs...",
312
+ error.class,
313
+ error.message,
314
+ @backoff.duration,
315
+ )
316
+ end
317
+
318
+ @backoff.wait
319
+ rescue => error
320
+ break unless @lifecycle.running?
321
+
322
+ logger.error { "Error: #{error.class}: #{error.message}" }
323
+ logger.debug { error.backtrace&.join("\n") }
324
+ @backoff.wait
325
+ end
326
+ end
327
+ end
328
+
329
+ # Ensure queue is closed so workers can drain and exit
330
+ @dispatch_queue.close rescue nil
331
+ logger.info { "Zizq producer thread stopped" }
332
+ ensure
333
+ # Wake the main thread if the producer crashes during normal
334
+ # operation (before a shutdown signal).
335
+ @lifecycle.drain!
336
+ end
337
+ end
338
+
339
+ def start_worker_threads #: () -> Array[Thread]
340
+ (0...thread_count).map do |thread_idx|
341
+ Thread.new(thread_idx) do |tidx|
342
+ Thread.current.name = "zizq-worker-#{tidx}"
343
+
344
+ if fiber_count > 1
345
+ run_fiber_workers(tidx)
346
+ else
347
+ run_loop(tidx, 0)
348
+ end
349
+ end
350
+ end
351
+ end
352
+
353
+ # Internal worker run loop.
354
+ #
355
+ # Each worker thread or fiber continually pops jobs from the internal queue
356
+ # and dispatches them to the correct job class until the queue is closed
357
+ # and drained.
358
+ def run_loop(thread_idx, fiber_idx) #: (Integer, Integer) -> void
359
+ logger.info do
360
+ format("Worker %d:%d started", thread_idx, fiber_idx)
361
+ end
362
+
363
+ wid = resolve_worker_id(thread_idx, fiber_idx)
364
+
365
+ loop do
366
+ # pop returns nil when queue is closed and empty
367
+ job = @dispatch_queue.pop
368
+ break if job.nil?
369
+
370
+ dispatch(job, wid)
371
+ end
372
+
373
+ logger.info do
374
+ format("Worker %d:%d stopped", thread_idx, fiber_idx)
375
+ end
376
+ end
377
+
378
+ # Fiber-based worker loop. Requires the `async` gem.
379
+ def run_fiber_workers(thread_idx) #: (Integer) -> void
380
+ require "async"
381
+
382
+ Async do |task|
383
+ fiber_count.times do |fiber_idx|
384
+ task.async do
385
+ run_loop(thread_idx, fiber_idx)
386
+ end
387
+ end
388
+ end
389
+ end
390
+
391
+ # Process a single job.
392
+ #
393
+ # Delegates to the configured dispatcher (default: `Zizq::Job.dispatch`)
394
+ # and reports success or failure.
395
+ def dispatch(job, worker_id) #: (Resources::Job, String?) -> void
396
+ job_id, job_type = job.id, job.type
397
+
398
+ begin
399
+ start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
400
+
401
+ begin
402
+ @dispatcher.call(job)
403
+ ensure
404
+ finish_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
405
+ elapsed_time = finish_time - start_time
406
+ end
407
+ rescue Exception => error
408
+ raise if !@lifecycle.running? && error.is_a?(Async::Stop)
409
+
410
+ logger.error do
411
+ format(
412
+ "Job %s (%s) failed in %.4fs: %s: %s",
413
+ job_type,
414
+ job_id,
415
+ elapsed_time,
416
+ error.class,
417
+ error.message
418
+ )
419
+ end
420
+
421
+ push_nack(job_id, error)
422
+ return
423
+ end
424
+
425
+ push_ack(job_id)
426
+
427
+ logger.debug do
428
+ format(
429
+ "Job %s (%s) completed in %.4fs",
430
+ job_type,
431
+ job_id,
432
+ elapsed_time
433
+ )
434
+ end
435
+ rescue Async::Stop, ClosedQueueError
436
+ # In the case jobs take too long to terminate, they are force killed
437
+ # which produces errors as they attempt to ack/nack etc.
438
+ #
439
+ # This means those jobs terminate without finishing their work but the
440
+ # Zizq backend automatically returns them to the queue when the client
441
+ # disconnects, so they'll be received by another worker when one connects.
442
+ logger.debug { "Job #{job_type} (#{job_id}) interrupted during shutdown" }
443
+ end
444
+
445
+ # @rbs job_id: String
446
+ # @rbs return: void
447
+ def push_ack(job_id)
448
+ @ack_processor.push(AckProcessor::Ack.new(job_id:))
449
+ end
450
+
451
+ # @rbs job_id: String
452
+ # @rbs error: Exception
453
+ # @rbs return: void
454
+ def push_nack(job_id, error)
455
+ @ack_processor.push(AckProcessor::Nack.new(
456
+ job_id: job_id,
457
+ message: "#{error.class}: #{error.message}",
458
+ error_type: error.class.name,
459
+ backtrace: error.backtrace&.join("\n")
460
+ ))
461
+ end
462
+
463
+ def resolve_worker_id(thread_idx, fiber_idx) #: (Integer, Integer) -> String?
464
+ worker_id_proc&.call(thread_idx, fiber_idx)
465
+ end
466
+ end
467
+ end