pgbus 0.4.1 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,7 +11,17 @@ module Pgbus
11
11
  attr_accessor :default_queue, :queue_prefix
12
12
 
13
13
  # Worker settings
14
- attr_accessor :workers, :polling_interval, :visibility_timeout, :prefetch_limit
14
+ attr_accessor :polling_interval, :prefetch_limit
15
+ attr_reader :workers, :visibility_timeout # rubocop:disable Style/AccessorGrouping
16
+
17
+ # Supervisor role selection.
18
+ # nil = boot all roles (default behavior).
19
+ # Array of role symbols = boot only the listed roles.
20
+ # Set via the CLI flags --workers-only / --scheduler-only / --dispatcher-only,
21
+ # or directly in an initializer for advanced cases.
22
+ attr_reader :roles
23
+
24
+ VALID_ROLES = %i[workers dispatcher scheduler consumers outbox].freeze
15
25
 
16
26
  # Worker recycling
17
27
  attr_accessor :max_jobs_per_worker, :max_memory_mb, :max_worker_lifetime
@@ -19,30 +29,37 @@ module Pgbus
19
29
  # Dispatcher settings
20
30
  attr_accessor :dispatch_interval
21
31
 
22
- # Circuit breaker
23
- attr_accessor :circuit_breaker_enabled, :circuit_breaker_threshold,
24
- :circuit_breaker_base_backoff, :circuit_breaker_max_backoff
32
+ # Circuit breaker. Only `enabled` is user-facing — the trip threshold and
33
+ # backoff curve are tuned via constants on Pgbus::CircuitBreaker because
34
+ # they are implementation details that have never been worth exposing.
35
+ attr_accessor :circuit_breaker_enabled
25
36
 
26
37
  # Dead letter queue
27
- attr_accessor :max_retries, :dead_letter_queue_suffix
38
+ attr_accessor :max_retries
28
39
 
29
40
  # Priority queues
30
41
  attr_accessor :priority_levels, :default_priority
31
42
 
32
- # Archive compaction
33
- attr_accessor :archive_retention, :archive_compaction_interval, :archive_compaction_batch_size
43
+ # Archive compaction. Only the user-facing retention window is configurable;
44
+ # the loop interval and batch size are tuned via constants on
45
+ # Pgbus::Process::Dispatcher.
46
+ attr_reader :archive_retention
34
47
 
35
48
  # Transactional outbox
36
- attr_accessor :outbox_enabled, :outbox_poll_interval, :outbox_batch_size, :outbox_retention
49
+ attr_accessor :outbox_enabled, :outbox_poll_interval, :outbox_batch_size
50
+ attr_reader :outbox_retention # rubocop:disable Style/AccessorGrouping
37
51
 
38
52
  # Event bus
39
- attr_accessor :idempotency_ttl, :allowed_global_id_models
53
+ attr_accessor :allowed_global_id_models
54
+ attr_reader :idempotency_ttl # rubocop:disable Style/AccessorGrouping
40
55
 
41
56
  # Logging
42
57
  attr_accessor :logger
43
58
 
44
- # LISTEN/NOTIFY
45
- attr_accessor :listen_notify, :notify_throttle_ms
59
+ # LISTEN/NOTIFY. Only the on/off switch is user-facing — the throttle
60
+ # interval is a Postgres-side tuning knob that lives as a constant on
61
+ # Pgbus::Client (NOTIFY_THROTTLE_MS).
62
+ attr_accessor :listen_notify
46
63
 
47
64
  # PGMQ schema installation mode (:auto, :extension, :embedded)
48
65
  attr_reader :pgmq_schema_mode
@@ -51,8 +68,8 @@ module Pgbus
51
68
  attr_accessor :event_consumers
52
69
 
53
70
  # Recurring jobs
54
- attr_accessor :recurring_tasks, :recurring_schedule_interval, :recurring_tasks_file,
55
- :skip_recurring, :recurring_execution_retention
71
+ attr_accessor :recurring_tasks, :recurring_schedule_interval, :recurring_tasks_file, :skip_recurring
72
+ attr_reader :recurring_execution_retention # rubocop:disable Style/AccessorGrouping
56
73
 
57
74
  # Multi-database support (optional separate database for pgbus tables)
58
75
  # Set to { database: { writing: :pgbus, reading: :pgbus } } to use a separate database.
@@ -60,7 +77,8 @@ module Pgbus
60
77
  attr_accessor :connects_to
61
78
 
62
79
  # Job stats
63
- attr_accessor :stats_retention, :stats_enabled
80
+ attr_accessor :stats_enabled
81
+ attr_reader :stats_retention # rubocop:disable Style/AccessorGrouping
64
82
 
65
83
  # Web dashboard
66
84
  attr_accessor :web_auth, :web_refresh_interval, :web_per_page, :web_live_updates, :web_data_source,
@@ -69,13 +87,14 @@ module Pgbus
69
87
  def initialize
70
88
  @database_url = nil
71
89
  @connection_params = nil
72
- @pool_size = 5
90
+ @pool_size = nil
73
91
  @pool_timeout = 5
74
92
 
75
93
  @default_queue = "default"
76
94
  @queue_prefix = "pgbus"
77
95
 
78
96
  @workers = [{ queues: %w[default], threads: 5 }]
97
+ @roles = nil
79
98
  @polling_interval = 0.1
80
99
  @visibility_timeout = 30
81
100
 
@@ -88,19 +107,13 @@ module Pgbus
88
107
  @dispatch_interval = 1.0
89
108
 
90
109
  @circuit_breaker_enabled = true
91
- @circuit_breaker_threshold = 5
92
- @circuit_breaker_base_backoff = 30
93
- @circuit_breaker_max_backoff = 600
94
110
 
95
111
  @max_retries = 5
96
- @dead_letter_queue_suffix = "_dlq"
97
112
 
98
113
  @priority_levels = nil
99
114
  @default_priority = 1
100
115
 
101
116
  @archive_retention = 7 * 24 * 3600 # 7 days
102
- @archive_compaction_interval = 3600
103
- @archive_compaction_batch_size = 1000
104
117
 
105
118
  @outbox_enabled = false
106
119
  @outbox_poll_interval = 1.0
@@ -113,7 +126,6 @@ module Pgbus
113
126
  @logger = (defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger) || Logger.new($stdout)
114
127
 
115
128
  @listen_notify = true
116
- @notify_throttle_ms = 250
117
129
 
118
130
  @pgmq_schema_mode = :auto
119
131
 
@@ -147,7 +159,7 @@ module Pgbus
147
159
  end
148
160
 
149
161
  def dead_letter_queue_name(name)
150
- "#{queue_name(name)}#{dead_letter_queue_suffix}"
162
+ "#{queue_name(name)}#{Pgbus::DEAD_LETTER_SUFFIX}"
151
163
  end
152
164
 
153
165
  def priority_queue_name(name, priority)
@@ -172,13 +184,16 @@ module Pgbus
172
184
  end
173
185
 
174
186
  def validate!
175
- raise ArgumentError, "pool_size must be > 0" unless pool_size.is_a?(Numeric) && pool_size.positive?
187
+ if pool_size && !(pool_size.is_a?(Numeric) && pool_size.positive?)
188
+ raise ArgumentError, "pool_size must be a positive number or nil (auto-tune)"
189
+ end
190
+
176
191
  raise ArgumentError, "pool_timeout must be > 0" unless pool_timeout.is_a?(Numeric) && pool_timeout.positive?
177
192
  raise ArgumentError, "polling_interval must be > 0" unless polling_interval.is_a?(Numeric) && polling_interval.positive?
178
193
  raise ArgumentError, "visibility_timeout must be > 0" unless visibility_timeout.is_a?(Numeric) && visibility_timeout.positive?
179
194
  raise ArgumentError, "max_retries must be >= 0" unless max_retries.is_a?(Integer) && max_retries >= 0
180
195
 
181
- workers.each do |w|
196
+ Array(workers).each do |w|
182
197
  threads = w[:threads] || w["threads"] || 5
183
198
  raise ArgumentError, "worker threads must be > 0" unless threads.is_a?(Integer) && threads.positive?
184
199
  end
@@ -196,6 +211,199 @@ module Pgbus
196
211
  self
197
212
  end
198
213
 
214
+ # Set the worker capsule list. Accepts:
215
+ #
216
+ # String — parsed via Pgbus::Configuration::CapsuleDSL into capsules
217
+ # with auto-generated names (each capsule's :name is its
218
+ # first queue token).
219
+ #
220
+ # c.workers "*: 5"
221
+ # c.workers "critical: 5; default, mailers: 10"
222
+ #
223
+ # Array — legacy explicit form. Each entry is a Hash with :queues
224
+ # and :threads (and optionally :name, :single_active_consumer,
225
+ # :consumer_priority, :prefetch_limit).
226
+ #
227
+ # c.workers [{ queues: %w[default], threads: 5 }]
228
+ #
229
+ # nil — no workers configured (used when running scheduler-only or
230
+ # dispatcher-only processes).
231
+ #
232
+ # Raises ArgumentError for any other type.
233
+ #
234
+ # NAMING SEMANTICS for the String form:
235
+ #
236
+ # The parser produces anonymous capsules (no :name). The setter then
237
+ # auto-assigns a :name to capsules whose first queue would yield a
238
+ # *unique* name across the parsed list AND is not the bare wildcard
239
+ # (`*`). Anything else stays anonymous.
240
+ #
241
+ # "critical: 5; default: 10" -> two NAMED capsules ("critical", "default")
242
+ # "*: 5" -> one anonymous capsule (wildcard never names)
243
+ # "*: 3; *: 3; *: 3" -> three anonymous capsules — legal,
244
+ # represents "3 forks all reading every
245
+ # queue", restoring the legacy YAML
246
+ # `5 × {queues: ["*"], threads: 3}` shape
247
+ # "default: 5; default: 3" -> two anonymous capsules — same logic
248
+ #
249
+ # The point of the carve-out is the legacy "I want N forks of the same
250
+ # worker pool" pattern: it must keep working since PGMQ tolerates it
251
+ # natively (multiple processes reading the same queue with FOR UPDATE
252
+ # SKIP LOCKED). The CLI's --capsule selector only matches NAMED
253
+ # capsules, so anonymous duplicates can't be ambiguously addressed.
254
+ def workers=(value)
255
+ @workers = case value
256
+ when nil
257
+ nil
258
+ when String
259
+ parsed = CapsuleDSL.parse(value)
260
+ assign_auto_names(parsed)
261
+ when Array
262
+ value
263
+ else
264
+ raise ArgumentError,
265
+ "workers must be a String (DSL), Array (legacy form), or nil — got #{value.class}"
266
+ end
267
+ end
268
+
269
+ # Define a named capsule and append it to the workers list.
270
+ #
271
+ # c.capsule :critical, queues: %w[critical], threads: 5
272
+ # c.capsule :gated, queues: %w[gated], threads: 1, single_active_consumer: true
273
+ #
274
+ # Names must be unique. Queues must not overlap with capsules already
275
+ # defined (would cause double-processing). Composes with the string DSL —
276
+ # +c.workers "..."+ followed by +c.capsule :name, ...+ appends the
277
+ # named capsule to the list parsed from the string.
278
+ def capsule(name, queues:, threads:, **)
279
+ raise ArgumentError, "capsule queues must be a non-empty Array" unless queues.is_a?(Array) && queues.any?
280
+ raise ArgumentError, "capsule threads must be a positive Integer" unless threads.is_a?(Integer) && threads.positive?
281
+
282
+ normalized_name = name.to_s
283
+ @workers ||= []
284
+
285
+ raise ArgumentError, "capsule #{name.inspect} is already defined" if @workers.any? { |c| capsule_name(c) == normalized_name }
286
+
287
+ validate_no_queue_overlap!(queues)
288
+
289
+ @workers << { name: normalized_name, queues: queues, threads: threads, ** }
290
+ end
291
+
292
+ # Look up a capsule by its name. Accepts symbol or string. Returns the
293
+ # matching Hash, or nil. Used by the CLI's --capsule selector.
294
+ def capsule_named(name)
295
+ return nil unless @workers
296
+
297
+ key = name.to_s
298
+ @workers.find { |c| capsule_name(c) == key }
299
+ end
300
+
301
+ # Returns true if the given role should be booted by the supervisor.
302
+ # When +roles+ is nil (the default), every role is enabled — this matches
303
+ # the legacy single-process behavior. When +roles+ is set (e.g. via the
304
+ # CLI's --workers-only / --scheduler-only / --dispatcher-only flags),
305
+ # only the listed roles boot.
306
+ #
307
+ # Accepts symbol or string for case-insensitive comparison.
308
+ def role_enabled?(role)
309
+ return true if @roles.nil?
310
+
311
+ @roles.include?(role.to_s.downcase.to_sym)
312
+ end
313
+
314
+ # Set the supervisor role filter. Accepts:
315
+ #
316
+ # nil — boot all roles (default)
317
+ # Symbol/String — wraps into a single-element array
318
+ # Array — list of roles to boot
319
+ #
320
+ # Each role is normalized to a downcased symbol and validated against
321
+ # VALID_ROLES. Unknown role names raise ArgumentError immediately so
322
+ # typos like `[:workres]` fail loud at boot rather than leaving the
323
+ # supervisor idling with no children.
324
+ def roles=(value)
325
+ if value.nil?
326
+ @roles = nil
327
+ return
328
+ end
329
+
330
+ normalized = Array(value).map { |r| r.to_s.downcase.to_sym }.uniq
331
+ invalid = normalized - VALID_ROLES
332
+ if invalid.any?
333
+ raise ArgumentError,
334
+ "invalid role(s) #{invalid.inspect} — valid roles are: #{VALID_ROLES.join(", ")}"
335
+ end
336
+
337
+ @roles = normalized
338
+ end
339
+
340
+ # Duration setters: each accepts either a Numeric (seconds) or an
341
+ # ActiveSupport::Duration (e.g. 10.minutes, 7.days). Validation runs
342
+ # immediately on assignment so misconfigurations crash at boot rather
343
+ # than leaving stale state until a `validate!` call somewhere.
344
+ #
345
+ # Numeric values are stored unchanged (preserving Float for sub-second
346
+ # values). Duration values are coerced to Integer seconds via .to_i.
347
+
348
+ def visibility_timeout=(value)
349
+ @visibility_timeout = coerce_duration!(value, :visibility_timeout)
350
+ end
351
+
352
+ def archive_retention=(value)
353
+ @archive_retention = coerce_duration!(value, :archive_retention)
354
+ end
355
+
356
+ def outbox_retention=(value)
357
+ @outbox_retention = coerce_duration!(value, :outbox_retention)
358
+ end
359
+
360
+ def idempotency_ttl=(value)
361
+ @idempotency_ttl = coerce_duration!(value, :idempotency_ttl)
362
+ end
363
+
364
+ def stats_retention=(value)
365
+ @stats_retention = coerce_duration!(value, :stats_retention)
366
+ end
367
+
368
+ def recurring_execution_retention=(value)
369
+ @recurring_execution_retention = coerce_duration!(value, :recurring_execution_retention)
370
+ end
371
+
372
+ # Returns the connection pool size to use for the PGMQ client.
373
+ #
374
+ # If +pool_size+ was explicitly set, returns that value unchanged. Otherwise
375
+ # auto-derives from the threads needed by the roles this process actually
376
+ # runs (respects +Configuration#roles+ from --workers-only / --scheduler-only
377
+ # / --dispatcher-only):
378
+ #
379
+ # workers role → sum(workers.threads)
380
+ # consumers role → sum(event_consumers.threads)
381
+ # dispatcher role → +1
382
+ # scheduler role → +1
383
+ #
384
+ # A --scheduler-only deployment that has 50 worker threads configured
385
+ # only needs 1 connection (for the scheduler), not 52.
386
+ #
387
+ # Auto-tune protects users from the common pitfall of running 15 worker
388
+ # threads with a hand-set pool_size of 5 (resulting in ConnectionPool
389
+ # timeouts under load). Setting pool_size explicitly is still supported
390
+ # for advanced cases where you need a tighter or looser pool than the
391
+ # default formula provides.
392
+ POOL_SIZE_WARN_THRESHOLD = 50
393
+
394
+ def resolved_pool_size
395
+ return pool_size if pool_size
396
+
397
+ total = 0
398
+ total += sum_thread_counts(workers, default_threads: 5, group: "worker") if role_enabled?(:workers)
399
+ total += sum_thread_counts(event_consumers, default_threads: 3, group: "event_consumer") if role_enabled?(:consumers)
400
+ total += 1 if role_enabled?(:dispatcher)
401
+ total += 1 if role_enabled?(:scheduler)
402
+
403
+ warn_if_oversized(total)
404
+ total
405
+ end
406
+
199
407
  def connection_options
200
408
  if database_url
201
409
  database_url
@@ -217,6 +425,125 @@ module Pgbus
217
425
 
218
426
  private
219
427
 
428
+ # Coerce a duration setting value to a positive Numeric.
429
+ #
430
+ # Accepts an ActiveSupport::Duration (coerced to Integer seconds via .to_i)
431
+ # or a Numeric (stored as-is, preserving Float for sub-second values).
432
+ # Raises ArgumentError immediately for nil, zero, negative, or non-numeric
433
+ # input — callers crash at boot rather than carrying silently-broken state.
434
+ def coerce_duration!(value, name)
435
+ # nil is a valid sentinel for "feature disabled" (e.g. archive_retention,
436
+ # idempotency_ttl, recurring_execution_retention all use nil to skip the
437
+ # corresponding maintenance task in the dispatcher).
438
+ return nil if value.nil?
439
+
440
+ # Check Duration FIRST because ActiveSupport overrides Numeric#is_a?
441
+ # to return true for Integer, so a duration would otherwise be caught
442
+ # by the Numeric branch and stored as-is (uncoerced).
443
+ duration_class_loaded = defined?(ActiveSupport::Duration)
444
+ return validate_positive_duration!(value.to_i, name) if duration_class_loaded && value.is_a?(ActiveSupport::Duration)
445
+
446
+ # Plain Numeric (Integer, Float, Rational). Use class identity rather
447
+ # than is_a? for the Duration exclusion because ActiveSupport overrides
448
+ # is_a? — see comment above.
449
+ if value.is_a?(Numeric) && (!defined?(ActiveSupport::Duration) || value.class != ActiveSupport::Duration)
450
+ return validate_positive_duration!(value, name)
451
+ end
452
+
453
+ raise ArgumentError,
454
+ "#{name} must be a Numeric (seconds), ActiveSupport::Duration, or nil to disable, got #{value.inspect}"
455
+ end
456
+
457
+ def validate_positive_duration!(numeric, name)
458
+ raise ArgumentError, "#{name} must be a positive number, got #{numeric}" unless numeric.positive?
459
+
460
+ numeric
461
+ end
462
+
463
+ # Read a capsule's name from either symbol or string key, normalized
464
+ # to a string for comparison. Returns nil for unnamed (legacy) entries.
465
+ def capsule_name(entry)
466
+ raw = entry[:name] || entry["name"]
467
+ raw&.to_s
468
+ end
469
+
470
+ # Auto-assign :name to parsed capsules where the first queue token would
471
+ # yield a unique name and is not the bare wildcard. See the long comment
472
+ # on +workers=+ for the why. Returns the same array with :name merged in
473
+ # where applicable.
474
+ def assign_auto_names(parsed_capsules)
475
+ first_queue_counts = parsed_capsules.each_with_object(Hash.new(0)) do |capsule, h|
476
+ h[capsule[:queues].first] += 1
477
+ end
478
+
479
+ parsed_capsules.map do |capsule|
480
+ first = capsule[:queues].first
481
+ nameable = first != CapsuleDSL::WILDCARD && first_queue_counts[first] == 1
482
+ nameable ? capsule.merge(name: first.to_s) : capsule
483
+ end
484
+ end
485
+
486
+ # Validates that the new capsule (added via +c.capsule :name, ...+) does
487
+ # not overlap with any existing NAMED capsule. Anonymous capsules (parsed
488
+ # from the string DSL with auto-naming skipped, e.g. wildcards or
489
+ # would-collide first-queues) are intentionally invisible here — they
490
+ # represent "N forks of the same pool" and are allowed to overlap with
491
+ # each other and with named capsules.
492
+ #
493
+ # The wildcard '*' counts as overlapping with EVERY other queue (and
494
+ # vice versa) because at runtime '*' is expanded to all known queues.
495
+ # Raises ArgumentError on overlap.
496
+ def validate_no_queue_overlap!(new_queues)
497
+ existing_named = (@workers || []).select { |c| capsule_name(c) }
498
+ return if existing_named.empty?
499
+
500
+ existing_queues = existing_named.flat_map { |c| c[:queues] || c["queues"] || [] }
501
+ return if existing_queues.empty?
502
+
503
+ if existing_queues.include?(CapsuleDSL::WILDCARD)
504
+ raise ArgumentError,
505
+ "an existing named capsule already uses '*' (matches every queue) — " \
506
+ "the new capsule's queues #{new_queues.inspect} would overlap with it"
507
+ end
508
+
509
+ if new_queues.include?(CapsuleDSL::WILDCARD)
510
+ raise ArgumentError,
511
+ "the new capsule uses '*' (matches every queue) but other named capsules " \
512
+ "are already defined with queues #{existing_queues.inspect} — " \
513
+ "the wildcard would overlap with all of them"
514
+ end
515
+
516
+ conflict = new_queues.find { |q| existing_queues.include?(q) }
517
+ return unless conflict
518
+
519
+ raise ArgumentError,
520
+ "queue #{conflict.inspect} is already assigned to another named capsule — " \
521
+ "named capsules cannot share queues"
522
+ end
523
+
524
+ def sum_thread_counts(entries, default_threads:, group:)
525
+ return 0 unless entries
526
+
527
+ entries.sum do |entry|
528
+ threads = entry[:threads] || entry["threads"] || default_threads
529
+ unless threads.is_a?(Integer) && threads.positive?
530
+ raise ArgumentError,
531
+ "#{group} threads must be a positive integer, got #{threads.inspect}"
532
+ end
533
+ threads
534
+ end
535
+ end
536
+
537
+ def warn_if_oversized(size)
538
+ return unless size > POOL_SIZE_WARN_THRESHOLD
539
+
540
+ Pgbus.logger.warn do
541
+ "[Pgbus] Auto-tuned pool_size is #{size} (over #{POOL_SIZE_WARN_THRESHOLD}). " \
542
+ "Verify your worker thread counts are intentional. " \
543
+ "Set Pgbus.configuration.pool_size explicitly to override."
544
+ end
545
+ end
546
+
220
547
  def extract_ar_connection_hash
221
548
  base = connects_to ? Pgbus::BusRecord : ActiveRecord::Base
222
549
  db_config = base.connection_db_config