wurk 0.0.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -0
  3. data/app/controllers/wurk/api/serializers.rb +48 -2
  4. data/app/controllers/wurk/api_controller.rb +216 -1
  5. data/app/controllers/wurk/dashboard_controller.rb +20 -2
  6. data/app/controllers/wurk/extensions_controller.rb +56 -0
  7. data/app/controllers/wurk/profiles_controller.rb +68 -0
  8. data/config/routes.rb +54 -1
  9. data/exe/sidekiqswarm +8 -0
  10. data/exe/wurkswarm +23 -0
  11. data/lib/active_job/queue_adapters/wurk_adapter.rb +35 -0
  12. data/lib/generators/wurk/install/templates/wurk.rb +14 -3
  13. data/lib/sidekiq/api.rb +4 -0
  14. data/lib/sidekiq/cli.rb +9 -0
  15. data/lib/sidekiq/client.rb +4 -0
  16. data/lib/sidekiq/job.rb +4 -0
  17. data/lib/sidekiq/launcher.rb +4 -0
  18. data/lib/sidekiq/middleware/chain.rb +4 -0
  19. data/lib/sidekiq/middleware/server/statsd.rb +12 -0
  20. data/lib/sidekiq/rails.rb +10 -0
  21. data/lib/sidekiq/redis_connection.rb +4 -0
  22. data/lib/sidekiq/scheduled.rb +4 -0
  23. data/lib/sidekiq/testing.rb +4 -0
  24. data/lib/sidekiq/version.rb +4 -0
  25. data/lib/sidekiq/web.rb +4 -0
  26. data/lib/sidekiq/worker.rb +4 -0
  27. data/lib/sidekiq.rb +16 -0
  28. data/lib/wurk/batch/callbacks.rb +103 -13
  29. data/lib/wurk/batch/death_handler.rb +5 -2
  30. data/lib/wurk/batch/server_middleware.rb +35 -3
  31. data/lib/wurk/batch/status.rb +9 -0
  32. data/lib/wurk/batch.rb +23 -1
  33. data/lib/wurk/capsule.rb +20 -1
  34. data/lib/wurk/cli.rb +84 -1
  35. data/lib/wurk/client.rb +20 -17
  36. data/lib/wurk/compat.rb +44 -2
  37. data/lib/wurk/component.rb +5 -4
  38. data/lib/wurk/configuration.rb +120 -3
  39. data/lib/wurk/cron.rb +51 -9
  40. data/lib/wurk/dead_set.rb +8 -3
  41. data/lib/wurk/deploy.rb +8 -4
  42. data/lib/wurk/encryption.rb +6 -1
  43. data/lib/wurk/fetcher/reaper.rb +78 -11
  44. data/lib/wurk/fetcher/reliable.rb +14 -4
  45. data/lib/wurk/heartbeat.rb +45 -0
  46. data/lib/wurk/history.rb +174 -0
  47. data/lib/wurk/iterable_job/active_record_enumerator.rb +71 -0
  48. data/lib/wurk/iterable_job/csv_enumerator.rb +51 -0
  49. data/lib/wurk/iterable_job.rb +41 -0
  50. data/lib/wurk/iterable_job_query.rb +75 -0
  51. data/lib/wurk/job.rb +8 -0
  52. data/lib/wurk/job_record.rb +16 -1
  53. data/lib/wurk/job_set.rb +4 -4
  54. data/lib/wurk/job_util.rb +15 -6
  55. data/lib/wurk/keys.rb +10 -0
  56. data/lib/wurk/launcher.rb +35 -1
  57. data/lib/wurk/leader.rb +15 -6
  58. data/lib/wurk/limiter/bucket.rb +14 -3
  59. data/lib/wurk/limiter/concurrent.rb +1 -1
  60. data/lib/wurk/limiter/window.rb +2 -1
  61. data/lib/wurk/limiter.rb +12 -0
  62. data/lib/wurk/lua/loader.rb +10 -0
  63. data/lib/wurk/lua.rb +106 -14
  64. data/lib/wurk/metrics/history.rb +5 -0
  65. data/lib/wurk/metrics/query.rb +39 -0
  66. data/lib/wurk/metrics/queue_rollup.rb +151 -0
  67. data/lib/wurk/metrics/statsd.rb +11 -0
  68. data/lib/wurk/middleware/current_attributes.rb +29 -6
  69. data/lib/wurk/middleware/interrupt_handler.rb +5 -0
  70. data/lib/wurk/middleware/poison_pill.rb +35 -5
  71. data/lib/wurk/processor.rb +17 -8
  72. data/lib/wurk/profile_set.rb +65 -0
  73. data/lib/wurk/profiler.rb +127 -0
  74. data/lib/wurk/railtie.rb +19 -5
  75. data/lib/wurk/redis_client_adapter.rb +72 -0
  76. data/lib/wurk/redis_connection.rb +30 -0
  77. data/lib/wurk/redis_pool.rb +5 -1
  78. data/lib/wurk/scheduled.rb +42 -0
  79. data/lib/wurk/sorted_entry.rb +13 -11
  80. data/lib/wurk/stats.rb +11 -4
  81. data/lib/wurk/swarm/child_boot.rb +26 -4
  82. data/lib/wurk/swarm.rb +1 -1
  83. data/lib/wurk/transaction_aware_client.rb +69 -0
  84. data/lib/wurk/unique.rb +49 -7
  85. data/lib/wurk/version.rb +1 -1
  86. data/lib/wurk/web/batch_status.rb +42 -0
  87. data/lib/wurk/web/config.rb +219 -17
  88. data/lib/wurk/web/enterprise.rb +14 -0
  89. data/lib/wurk/web/extension.rb +348 -0
  90. data/lib/wurk/web/rack_app.rb +77 -0
  91. data/lib/wurk/web.rb +2 -0
  92. data/lib/wurk/worker/setter.rb +5 -1
  93. data/lib/wurk/worker.rb +17 -6
  94. data/lib/wurk.rb +44 -0
  95. data/vendor/assets/dashboard/assets/fa-brands-400-BP5tdqmh.woff2 +0 -0
  96. data/vendor/assets/dashboard/assets/fa-regular-400-nyy7hhHF.woff2 +0 -0
  97. data/vendor/assets/dashboard/assets/fa-solid-900-DRAAbZTg.woff2 +0 -0
  98. data/vendor/assets/dashboard/assets/index-9CFRWpfG.js +77 -0
  99. data/vendor/assets/dashboard/assets/index-CW8AFQIv.css +2 -0
  100. data/vendor/assets/dashboard/assets/wurk-logo-Vy3xW4K0.png +0 -0
  101. data/vendor/assets/dashboard/favicon.png +0 -0
  102. data/vendor/assets/dashboard/index.html +10 -3
  103. data/vendor/assets/dashboard/wurk-manifest.json +2 -2
  104. metadata +42 -3
  105. data/vendor/assets/dashboard/assets/index-D2XR0iGw.js +0 -60
  106. data/vendor/assets/dashboard/assets/index-DlPr4YXw.css +0 -1
data/lib/wurk/job.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'worker'
4
+ require_relative 'worker/setter'
4
5
 
5
6
  module Wurk
6
7
  # Sidekiq 7+ alias for Wurk::Worker. `include Wurk::Job` and
@@ -20,6 +21,13 @@ module Wurk
20
21
  # Spec: docs/target/sidekiq-free.md §6.4.
21
22
  class Interrupted < RuntimeError; end
22
23
 
24
+ # Per-call option carrier returned by `set(...)`. Sidekiq 7+ documents it
25
+ # under the modern mixin name `Sidekiq::Job::Setter`; since
26
+ # `Sidekiq::Job = Wurk::Job`, this rebind is what makes that constant
27
+ # resolve (without it `Sidekiq::Job::Setter` raises NameError). Same class
28
+ # as `Sidekiq::Worker::Setter`. Spec: docs/target/sidekiq-free.md §6.3.
29
+ Setter = Wurk::Worker::Setter
30
+
23
31
  def self.included(base)
24
32
  base.include(Wurk::Worker)
25
33
  end
@@ -53,6 +53,15 @@ module Wurk
53
53
  def args = item['args']
54
54
  def jid = item['jid']
55
55
  def bid = item['bid']
56
+
57
+ # IterableJob progress for this job, or nil for a non-iterable job (no
58
+ # `it-<jid>` HASH). Spec §19.3. Reads via the IterableJobQuery data API.
59
+ def iterable_state
60
+ return nil if jid.nil? || jid.to_s.empty?
61
+
62
+ Wurk::IterableJobQuery.new([jid])[jid]
63
+ end
64
+
56
65
  def tags = item['tags'] || []
57
66
  def enqueued_at = parse_time(item['enqueued_at'])
58
67
  def created_at = parse_time(item['created_at'])
@@ -96,10 +105,16 @@ module Wurk
96
105
  @display_class = active_job_wrapper? ? unwrap_class : klass
97
106
  end
98
107
 
108
+ # UI-facing args. Encrypted jobs (§4.7) get their envelope last arg
109
+ # masked as "<encrypted>" so ciphertext never reaches the dashboard;
110
+ # redaction keys off the envelope shape, so it fires whether or not the
111
+ # stored hash carried the `encrypt` flag. Cleartext preceding args stay
112
+ # visible for triage. Display-only — the stored payload is untouched.
99
113
  def display_args
100
114
  return @display_args if defined?(@display_args)
101
115
 
102
- @display_args = active_job_wrapper? ? unwrap_args : args
116
+ base = active_job_wrapper? ? unwrap_args : args
117
+ @display_args = Wurk::Encryption.redact_args('args' => base, 'encrypt' => item['encrypt'])
103
118
  end
104
119
 
105
120
  # @api internal
data/lib/wurk/job_set.rb CHANGED
@@ -122,10 +122,10 @@ module Wurk
122
122
  count
123
123
  end
124
124
 
125
- # Moves every job in this set to the dead set. `notify_failure: false`
126
- # because this is a UI-initiated bulk action, not a retry-exhausted
127
- # event. Returns the count of jobs moved.
128
- def kill_all(notify_failure: false, ex: nil)
125
+ # Moves every job in this set to the dead set. Death handlers fire per
126
+ # entry by default `each(&:kill)` equivalence with Sidekiq; pass
127
+ # `notify_failure: false` to suppress. Returns the count of jobs moved.
128
+ def kill_all(notify_failure: true, ex: nil)
129
129
  count = 0
130
130
  dead = DeadSet.new
131
131
  until size.zero?
data/lib/wurk/job_util.rb CHANGED
@@ -9,10 +9,15 @@ module Wurk
9
9
  #
10
10
  # Spec: docs/target/sidekiq-free.md §9 (Sidekiq::JobUtil).
11
11
  module JobUtil # rubocop:disable Metrics/ModuleLength
12
- # Top-level keys stripped from every payload before raw_push. Mutable so
13
- # Pro/Ent/extension code (e.g. TransactionAwareClient adding "client_class")
14
- # can append at load time without monkey-patching.
15
- TRANSIENT_ATTRIBUTES = [] # rubocop:disable Style/MutableConstant
12
+ # Top-level keys consumed at enqueue time but stripped from every payload
13
+ # before raw_push they must never reach the wire (spec §2.2):
14
+ # `pool` selects the Redis pool (resolved in client_push/build_client)
15
+ # `client_class` swaps the enqueue client (Wurk.transactional_push!)
16
+ # Both carry non-JSON values (a pool / a Class). Baked into the literal rather
17
+ # than appended at load: a load-time `<<` is fragile under the parallel test
18
+ # runner (a test that add/deletes the same key clobbers it for later suites).
19
+ # Still mutable so other extensions can append without monkey-patching.
20
+ TRANSIENT_ATTRIBUTES = %w[pool client_class] # rubocop:disable Style/MutableConstant
16
21
 
17
22
  RETRY_FOR_MAX = 1_000_000_000
18
23
 
@@ -111,13 +116,17 @@ module Wurk
111
116
 
112
117
  # Pro `expires_in:` → absolute epoch-float `expiry` resolved once at push,
113
118
  # so the server middleware doesn't redo the math. Spec: sidekiq-pro.md §7.
119
+ # For scheduled jobs the clock origin is `at` (epoch seconds), not
120
+ # `created_at` (epoch millis) — otherwise any delay > expires_in makes the
121
+ # job born-expired: perform_in(2h) + expires_in: 1h must expire at 3h.
114
122
  # nil.respond_to?(:to_f) is true on modern Ruby (returns 0.0), so we must
115
123
  # gate on a non-nil duration before coercing.
116
124
  def stamp_expiry(item)
117
125
  d = item['expires_in']
118
- return if d.nil?
126
+ return if d.nil? || !d.respond_to?(:to_f)
119
127
 
120
- item['expiry'] ||= (item['created_at'].to_f / 1000.0) + d.to_f if d.respond_to?(:to_f)
128
+ origin = item['at'] ? item['at'].to_f : (item['created_at'].to_f / 1000.0)
129
+ item['expiry'] ||= origin + d.to_f
121
130
  end
122
131
 
123
132
  def report_unsafe(item, offender, mode)
data/lib/wurk/keys.rb CHANGED
@@ -29,6 +29,16 @@ module Wurk
29
29
  # Live process identities (heartbeat membership).
30
30
  PROCESSES = 'processes'
31
31
 
32
+ # Ent Historical Metrics: capped Redis stream of periodic snapshots written
33
+ # by Wurk::History (§5.3). Same key a migrated Sidekiq Ent install uses, so
34
+ # its existing data renders without rewrite. Spec: sidekiq-ent.md §5.3, §10.
35
+ HISTORY_METRICS = 'history:metrics'
36
+
37
+ # Profiles (v8.0+): ZSET of `<token>-<jid>` keys, score = expiry epoch;
38
+ # each member also has a `<token>-<jid>` HASH holding the profile blob.
39
+ # Spec: docs/target/sidekiq-free.md §1.7.
40
+ PROFILES = 'profiles'
41
+
32
42
  # Global processed counter; per-day variants append `:YYYY-MM-DD`.
33
43
  STAT_PROCESSED = 'stat:processed'
34
44
 
data/lib/wurk/launcher.rb CHANGED
@@ -10,6 +10,8 @@ require_relative 'scheduled'
10
10
  require_relative 'leader'
11
11
  require_relative 'cron'
12
12
  require_relative 'metrics/rollup'
13
+ require_relative 'metrics/queue_rollup'
14
+ require_relative 'history'
13
15
  require_relative 'fetcher/reaper'
14
16
 
15
17
  module Wurk
@@ -41,7 +43,7 @@ module Wurk
41
43
  # (Sidekiq's drop-in surface). The single source of truth is Heartbeat.
42
44
  BEAT_PAUSE = Heartbeat::BEAT_PAUSE
43
45
 
44
- attr_accessor :managers, :poller, :cron_poller, :metrics_rollup
46
+ attr_accessor :managers, :poller, :cron_poller, :metrics_rollup, :queue_rollup, :history
45
47
 
46
48
  def initialize(config, embedded: false)
47
49
  @config = config
@@ -51,6 +53,8 @@ module Wurk
51
53
  @poller = build_poller
52
54
  @cron_poller = build_cron_poller
53
55
  @metrics_rollup = build_metrics_rollup
56
+ @queue_rollup = build_queue_rollup
57
+ @history = build_history
54
58
  @leader = build_leader
55
59
  @reaper = build_reaper
56
60
  @started_at = nil
@@ -81,8 +85,11 @@ module Wurk
81
85
  @leader&.start
82
86
  @cron_poller&.start
83
87
  @metrics_rollup&.start
88
+ @queue_rollup&.start
89
+ @history&.start
84
90
  @managers.each(&:start)
85
91
  @reaper.start
92
+ boot_reclaim
86
93
  @health_server&.start
87
94
  end
88
95
 
@@ -114,6 +121,8 @@ module Wurk
114
121
  # releasing the lock so no tick races a follower's promotion.
115
122
  @cron_poller&.terminate
116
123
  @metrics_rollup&.terminate
124
+ @queue_rollup&.terminate
125
+ @history&.terminate
117
126
  @reaper&.stop
118
127
  # CAS-release the cluster lock now (planned shutdown) so a follower can
119
128
  # take over immediately instead of waiting out the TTL.
@@ -247,6 +256,20 @@ module Wurk
247
256
  Wurk::Metrics::Rollup.new(@config)
248
257
  end
249
258
 
259
+ # Leader-only per-queue gauge sampler. Like the metrics rollup, every
260
+ # process runs one but only the leader writes the `qm|…` size/latency
261
+ # buckets the Historical tab's per-queue charts read.
262
+ def build_queue_rollup
263
+ Wurk::Metrics::QueueRollup.new(@config)
264
+ end
265
+
266
+ # Ent §5 Historical Metrics snapshotter — only when the host opted in via
267
+ # `config.retain_history`. Leader-gated like the rollups, so just one
268
+ # process emits the cluster-wide snapshot per interval.
269
+ def build_history
270
+ Wurk::History.new(@config) if @config.history_enabled?
271
+ end
272
+
250
273
  # Every worker process campaigns for the single cluster lock (`dear-leader`);
251
274
  # one wins and renews it, the rest follow and promote on its death. Cadence
252
275
  # falls back to the spec defaults (TTL 30 / renew 15 / follower 60) unless
@@ -260,6 +283,17 @@ module Wurk
260
283
  )
261
284
  end
262
285
 
286
+ # Deterministic boot-time orphan sweep: a SIGKILLed sibling's in-flight jobs
287
+ # would otherwise wait a full reaper interval before recovery. One unguarded
288
+ # scoped reclaim at start (no cluster lock — every booting worker helps) gets
289
+ # them re-queued immediately. Best-effort: a Redis hiccup here must not abort
290
+ # boot. Spec: docs/target/sidekiq-pro.md §3.2.
291
+ def boot_reclaim
292
+ @reaper.reclaim!
293
+ rescue StandardError => e
294
+ handle_exception(e, context: 'launcher-boot-reclaim') if respond_to?(:handle_exception)
295
+ end
296
+
263
297
  # Reliable-fetch orphan reclamation. Every worker runs one; a cluster
264
298
  # `SET NX EX` lock ensures only one actually sweeps per interval, so this
265
299
  # is leader-independent (it keeps working if the leader dies). Tune the
data/lib/wurk/leader.rb CHANGED
@@ -20,7 +20,8 @@ module Wurk
20
20
  #
21
21
  # Cadence per spec: renew every 15s while leader, recheck every 60s as
22
22
  # follower, lock TTL 30s. Opt out a process from campaigning entirely
23
- # with `WURK_LEADER=false` (useful for hot-standby pools).
23
+ # with `WURK_LEADER=false` (or its Sidekiq alias `SIDEKIQ_LEADER=false`),
24
+ # useful for hot-standby pools.
24
25
  #
25
26
  # Spec: docs/target/sidekiq-ent.md §6.
26
27
  class Leader
@@ -29,9 +30,17 @@ module Wurk
29
30
  DEFAULT_TTL = 30
30
31
  DEFAULT_RENEW_INTERVAL = 15
31
32
  DEFAULT_FOLLOWER_INTERVAL = 60
32
- OPT_OUT_ENV = 'WURK_LEADER'
33
+ OPT_OUT_ENV = 'WURK_LEADER' # native opt-out env
34
+ SIDEKIQ_OPT_OUT_ENV = 'SIDEKIQ_LEADER' # Sidekiq Ent drop-in alias (§6.2/§7.2)
33
35
  THREAD_NAME = 'wurk-leader'
34
36
 
37
+ # True when this process has opted out of campaigning via `WURK_LEADER=false`
38
+ # or its Sidekiq alias `SIDEKIQ_LEADER=false` (hot-standby pools that must
39
+ # never lead). Either env name works.
40
+ def self.opted_out?
41
+ [OPT_OUT_ENV, SIDEKIQ_OPT_OUT_ENV].any? { |k| ENV[k].to_s.downcase == 'false' }
42
+ end
43
+
35
44
  attr_reader :key, :ttl, :owner, :token, :config
36
45
 
37
46
  def initialize(config: nil, key: DEFAULT_KEY, ttl: DEFAULT_TTL, # rubocop:disable Metrics/ParameterLists
@@ -53,11 +62,11 @@ module Wurk
53
62
  @sleeper = ::ConditionVariable.new
54
63
  end
55
64
 
56
- # `WURK_LEADER=false` makes `acquire` a no-op and `leader?` permanently
57
- # false; the renewal thread also refuses to start. Useful for hot-
58
- # standby pools that must never campaign.
65
+ # `WURK_LEADER=false` (or `SIDEKIQ_LEADER=false`) makes `acquire` a no-op and
66
+ # `leader?` permanently false; the renewal thread also refuses to start.
67
+ # Useful for hot-standby pools that must never campaign.
59
68
  def disabled?
60
- ENV[OPT_OUT_ENV].to_s.downcase == 'false'
69
+ self.class.opted_out?
61
70
  end
62
71
 
63
72
  # SET NX EX. If the key already holds *our* owner string (rare — same
@@ -71,9 +71,20 @@ module Wurk
71
71
  end
72
72
 
73
73
  def acquire(used)
74
- lua(:limiter_bucket_acquire,
75
- keys: ["lmtr-b:#{@name}"],
76
- argv: [@options[:count], interval_seconds, used, ttl])
74
+ # Resolve the epoch from the Redis clock (spec §1: timing from TIME, not
75
+ # the client clock) and pass the single fully-qualified key. One declared
76
+ # key is safe on both Redis Cluster (no CROSSSLOT) and Dragonfly (no
77
+ # undeclared-key access) — see lua/limiter_bucket_acquire.lua (#91).
78
+ Wurk::Limiter.redis do |c|
79
+ now = c.call('TIME').first.to_i
80
+ epoch = now / interval_seconds
81
+ remaining = ((epoch + 1) * interval_seconds) - now
82
+ Wurk::Lua::Loader.eval_cached(
83
+ c, :limiter_bucket_acquire,
84
+ keys: ["lmtr-b:#{@name}:#{epoch}"],
85
+ argv: [@options[:count], used, ttl, remaining]
86
+ )
87
+ end
77
88
  end
78
89
  end
79
90
  end
@@ -87,7 +87,7 @@ module Wurk
87
87
  # Lowest slot expiry epoch (the next slot to free), or nil when empty.
88
88
  def soonest_expiry
89
89
  row = Wurk::Limiter.redis { |c| c.call('ZRANGE', state_key, 0, 0, 'WITHSCORES') }
90
- row && !row.empty? ? row[1].to_f : nil
90
+ Wurk::Limiter.first_score(row)
91
91
  end
92
92
 
93
93
  def state_key
@@ -63,7 +63,8 @@ module Wurk
63
63
  # Oldest timestamp + interval = the moment it leaves the window.
64
64
  def oldest_expiry
65
65
  row = Wurk::Limiter.redis { |c| c.call('ZRANGE', state_key, 0, 0, 'WITHSCORES') }
66
- row && !row.empty? ? row[1].to_f + interval_seconds : nil
66
+ score = Wurk::Limiter.first_score(row)
67
+ score && (score + interval_seconds)
67
68
  end
68
69
 
69
70
  def interval_seconds
data/lib/wurk/limiter.rb CHANGED
@@ -140,6 +140,18 @@ module Wurk
140
140
  pool.with(&)
141
141
  end
142
142
 
143
+ # `ZRANGE key 0 0 WITHSCORES` yields a single [member, score] pair, but the
144
+ # shape depends on the protocol: RESP3 (redis-client's default vs Redis >= 7)
145
+ # nests it as [[member, score]]; RESP2 returns a flat [member, score].
146
+ # Return the score as a Float across both, or nil when the set is empty.
147
+ # (The old flat-only `row[1]` silently collapsed to 0.0 under RESP3.)
148
+ def first_score(row)
149
+ pair = row.first
150
+ return nil if pair.nil?
151
+
152
+ (pair.is_a?(Array) ? pair.last : row[1]).to_f
153
+ end
154
+
143
155
  def concurrent(name, limit, wait_timeout: DEFAULT_WAIT_TIMEOUT, lock_timeout: DEFAULT_LOCK_TIMEOUT,
144
156
  policy: :raise, backoff: nil, ttl: DEFAULT_TTL)
145
157
  Concurrent.new(name,
@@ -38,6 +38,16 @@ module Wurk
38
38
  evalsha(redis, sha, keys, argv)
39
39
  end
40
40
 
41
+ # Source-embedded EVAL — the slow but cache-independent counterpart to
42
+ # `eval_cached`. Used on retry from a pipelined NOSCRIPT recovery where
43
+ # EVALSHA can still race a freshly-loaded script under heavy CI load
44
+ # (cf. WorkerTest NOSCRIPT flake on test (3.4, 7.2)). EVAL ships the
45
+ # full source every call, so it never raises NOSCRIPT.
46
+ def eval_with_source(redis, name, keys:, argv:)
47
+ src = SCRIPTS.fetch(name) { raise ArgumentError, "unknown Lua script: #{name.inspect}" }
48
+ redis.call('EVAL', src, keys.size, *keys, *argv)
49
+ end
50
+
41
51
  private
42
52
 
43
53
  def evalsha(redis, sha, keys, argv)
data/lib/wurk/lua.rb CHANGED
@@ -12,7 +12,7 @@ module Wurk
12
12
  #
13
13
  # `:zpopbyscore` is reproduced verbatim from sidekiq-free.md §1.8 and
14
14
  # MUST NOT diverge — parity tests will fail on a single byte change.
15
- module Lua
15
+ module Lua # rubocop:disable Metrics/ModuleLength
16
16
  ZPOPBYSCORE = <<~LUA
17
17
  local key, now = KEYS[1], ARGV[1]
18
18
  local jobs = redis.call("zrange", key, "-inf", now, "byscore", "limit", 0, 1)
@@ -58,13 +58,31 @@ module Wurk
58
58
 
59
59
  # Pro Batch: register a job into a batch and push it to its queue
60
60
  # atomically. Keeps total/pending in sync with the jids set.
61
- # KEYS = [b-<bid>, b-<bid>-jids, queue_list, queues_set]
62
- # ARGV = [queue_name, jid, job_json]
61
+ #
62
+ # A jid found in `b-<bid>-died` is a manual retry of a dead job (morgue
63
+ # "retry" / "add to queue") — it rejoins the live set without recounting:
64
+ # total and pending already include it, because a death never decrements
65
+ # pending. When that drains the died set the batch is no longer dead, so
66
+ # the durable `death` success-suppression flag clears and the bid leaves
67
+ # `dead-batches` — a later full drain can then fire `:success` (spec §2.4:
68
+ # success after the dead job is manually retried to success). The
69
+ # `b-<bid>-death` notify dedup key is untouched, so `:death` cannot
70
+ # re-fire.
71
+ # KEYS = [b-<bid>, b-<bid>-jids, queue_list, queues_set, b-<bid>-died, dead-batches]
72
+ # ARGV = [queue_name, jid, job_json, bid]
63
73
  # Returns 1.
64
74
  BATCH_PUSH = <<~LUA
65
- redis.call("hincrby", KEYS[1], "total", 1)
66
- redis.call("hincrby", KEYS[1], "pending", 1)
67
- redis.call("sadd", KEYS[2], ARGV[2])
75
+ if redis.call("srem", KEYS[5], ARGV[2]) == 1 then
76
+ redis.call("sadd", KEYS[2], ARGV[2])
77
+ if redis.call("scard", KEYS[5]) == 0 then
78
+ redis.call("hdel", KEYS[1], "death")
79
+ redis.call("zrem", KEYS[6], ARGV[4])
80
+ end
81
+ else
82
+ redis.call("hincrby", KEYS[1], "total", 1)
83
+ redis.call("hincrby", KEYS[1], "pending", 1)
84
+ redis.call("sadd", KEYS[2], ARGV[2])
85
+ end
68
86
  redis.call("sadd", KEYS[4], ARGV[1])
69
87
  redis.call("lpush", KEYS[3], ARGV[3])
70
88
  return 1
@@ -72,12 +90,21 @@ module Wurk
72
90
 
73
91
  # Pro Batch: ACK a job that completed successfully. SREM from the live
74
92
  # jids set and decrement pending iff the jid was a member (idempotent
75
- # against double-success on a flaky retry).
76
- # KEYS = [b-<bid>, b-<bid>-jids]
93
+ # against double-success on a flaky retry). A success also clears any
94
+ # outstanding "currently failing" record for the jid (a retry that finally
95
+ # passed), decrementing `failures` so it converges to the count of jobs
96
+ # *still* failing — Sidekiq Pro semantics, spec §2.5. The failed-set clear
97
+ # runs *before* the live-jids check so an invalidated batch (BATCH_INVALIDATE
98
+ # deletes the jids set) still converges failures to 0 on its short-circuited
99
+ # success ack, instead of stranding the jid in failed forever.
100
+ # KEYS = [b-<bid>, b-<bid>-jids, b-<bid>-failed]
77
101
  # ARGV = [jid]
78
102
  # Returns [new_pending, live_jids_remaining], or [-1, -1] when the jid
79
103
  # was not a member (treat as already acked).
80
104
  BATCH_ACK_SUCCESS = <<~LUA
105
+ if redis.call("srem", KEYS[3], ARGV[1]) == 1 then
106
+ redis.call("hincrby", KEYS[1], "failures", -1)
107
+ end
81
108
  local removed = redis.call("srem", KEYS[2], ARGV[1])
82
109
  if removed == 1 then
83
110
  local pending = redis.call("hincrby", KEYS[1], "pending", -1)
@@ -86,9 +113,28 @@ module Wurk
86
113
  return { -1, -1 }
87
114
  LUA
88
115
 
89
- # Pro Batch: ACK a job that exhausted retries and died. Records death,
90
- # bumps failures, and SREMs from live jids so the batch can fire
91
- # `:complete` even with terminally failed jobs.
116
+ # Pro Batch: record a job that failed and will retry (transient failure).
117
+ # SADDs the jid to the `failed` set and bumps `failures` only on the first
118
+ # add, so `failures` == SCARD(b-<bid>-failed) == the number of jobs
119
+ # currently in a failing/retrying state. Re-failures of the same jid are
120
+ # idempotent. Cleared by BATCH_ACK_SUCCESS (retry passed) or
121
+ # BATCH_ACK_COMPLETE (job died). Spec §2.5, §2.8.
122
+ # KEYS = [b-<bid>, b-<bid>-failed]
123
+ # ARGV = [jid]
124
+ # Returns 1.
125
+ BATCH_ACK_FAILED = <<~LUA
126
+ if redis.call("sadd", KEYS[2], ARGV[1]) == 1 then
127
+ redis.call("hincrby", KEYS[1], "failures", 1)
128
+ end
129
+ return 1
130
+ LUA
131
+
132
+ # Pro Batch: ACK a job that exhausted retries and died. Moves the jid from
133
+ # "currently failing" to "died": SREMs from the failed set (decrementing
134
+ # `failures` if it was recorded as failing), SADDs to died, and SREMs from
135
+ # live jids so the batch can fire `:complete` even with terminally failed
136
+ # jobs. `b-<bid>-failed` holds only currently-retrying jids; `b-<bid>-died`
137
+ # holds terminally-dead ones (spec §2.8 — the two sets are distinct).
92
138
  # KEYS = [b-<bid>, b-<bid>-jids, b-<bid>-died, b-<bid>-failed]
93
139
  # ARGV = [jid]
94
140
  # Returns [live_jids_remaining, died_count, first_death]. `first_death`
@@ -97,9 +143,10 @@ module Wurk
97
143
  BATCH_ACK_COMPLETE = <<~LUA
98
144
  local was_pre_existing_death = redis.call("scard", KEYS[3])
99
145
  redis.call("srem", KEYS[2], ARGV[1])
100
- redis.call("sadd", KEYS[4], ARGV[1])
146
+ if redis.call("srem", KEYS[4], ARGV[1]) == 1 then
147
+ redis.call("hincrby", KEYS[1], "failures", -1)
148
+ end
101
149
  local died_added = redis.call("sadd", KEYS[3], ARGV[1])
102
- redis.call("hincrby", KEYS[1], "failures", 1)
103
150
  local first_death = 0
104
151
  if was_pre_existing_death == 0 and died_added == 1 then
105
152
  first_death = 1
@@ -120,6 +167,48 @@ module Wurk
120
167
  return 1
121
168
  LUA
122
169
 
170
+ # Pro Batch (§2.4): atomically append one callback triple to the
171
+ # `callbacks` JSON array on the batch hash. Server-side append (vs a
172
+ # Ruby read-modify-write) so two processes registering callbacks on the
173
+ # same reopened batch cannot lose each other's writes. Refuses to write
174
+ # when the batch hash is gone — resurrecting a bare hash would create a
175
+ # batch that can never fire anything.
176
+ # KEYS = [b-<bid>]
177
+ # ARGV = [callback triple JSON, event name]
178
+ # Returns -1 when the batch hash does not exist; otherwise the event's
179
+ # fired flag ("1", or nil when it has not fired yet).
180
+ BATCH_APPEND_CALLBACK = <<~LUA
181
+ if redis.call("exists", KEYS[1]) == 0 then
182
+ return -1
183
+ end
184
+ local raw = redis.call("hget", KEYS[1], "callbacks")
185
+ local list
186
+ if raw and raw ~= "" then
187
+ list = cjson.decode(raw)
188
+ else
189
+ list = {}
190
+ end
191
+ list[#list + 1] = cjson.decode(ARGV[1])
192
+ redis.call("hset", KEYS[1], "callbacks", cjson.encode(list))
193
+ return redis.call("hget", KEYS[1], ARGV[2])
194
+ LUA
195
+
196
+ # Ent Unique (§3): atomic compare-and-delete of a lock key. Replaces the
197
+ # two-command GET-then-DEL — between those calls the key can expire and a
198
+ # fresh enqueue can grab it, and the bare DEL would then drop the new
199
+ # owner's lock. Shared by `Unique::ServerMiddleware#release` (normal
200
+ # success/start release) and `Unique::DEATH_HANDLER` (automatic-death
201
+ # release) so the two paths cannot drift.
202
+ # KEYS = [unique:<sha256>]
203
+ # ARGV = [owning jid]
204
+ # Returns 1 when the key was deleted, 0 otherwise.
205
+ RELEASE_IF_OWNER = <<~LUA
206
+ if redis.call("get", KEYS[1]) == ARGV[1] then
207
+ return redis.call("del", KEYS[1])
208
+ end
209
+ return 0
210
+ LUA
211
+
123
212
  # Pro Fast API (§11): server-side LRANGE+LREM to delete a single job by
124
213
  # jid from a queue list. Pure-Ruby Queue#find_job + JobRecord#delete is
125
214
  # O(N) round-trips; this is O(1) round-trip with O(N) Lua work.
@@ -172,10 +261,13 @@ module Wurk
172
261
  reliable_schedule_promote: RELIABLE_SCHEDULE_PROMOTE,
173
262
  batch_push: BATCH_PUSH,
174
263
  batch_ack_success: BATCH_ACK_SUCCESS,
264
+ batch_ack_failed: BATCH_ACK_FAILED,
175
265
  batch_ack_complete: BATCH_ACK_COMPLETE,
176
266
  batch_invalidate: BATCH_INVALIDATE,
267
+ batch_append_callback: BATCH_APPEND_CALLBACK,
177
268
  fast_delete_job: FAST_DELETE_JOB,
178
- fast_delete_by_class: FAST_DELETE_BY_CLASS
269
+ fast_delete_by_class: FAST_DELETE_BY_CLASS,
270
+ release_if_owner: RELEASE_IF_OWNER
179
271
  }.merge(FILE_SCRIPTS).freeze
180
272
 
181
273
  # SHA1 of each script source — matches what `SCRIPT LOAD` returns.
@@ -147,5 +147,10 @@ module Wurk
147
147
  end
148
148
  private_class_method :with_pool
149
149
  end
150
+
151
+ # Sidekiq exposes this as `Sidekiq::Metrics::Middleware` (via
152
+ # `Sidekiq::Metrics`, aliased to `Wurk::Metrics` in compat). Mirror that
153
+ # name so the drop-in constant resolves. Spec: docs/target/sidekiq-free.md §10.3.
154
+ Middleware = History
150
155
  end
151
156
  end
@@ -1,7 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative '../keys'
3
4
  require_relative 'history'
4
5
  require_relative 'rollup'
6
+ require_relative 'queue_rollup'
5
7
 
6
8
  module Wurk
7
9
  module Metrics
@@ -59,6 +61,43 @@ module Wurk
59
61
  starts.zip(rows).map { |at, (p, f, ms)| { at: at, p: p.to_i, f: f.to_i, ms: ms.to_i } }
60
62
  end
61
63
 
64
+ # Per-queue size/latency gauge time-series written by
65
+ # Metrics::QueueRollup. `bucket` is '1m'/'5m'/'1h'; `window_seconds` is
66
+ # clamped to the bucket's retention. Returns one entry per live queue
67
+ # (or the explicit `queues:` list) — `[{name:, points: [{at:, size:,
68
+ # latency:}, …]}, …]` — oldest→newest, gap-filled with zeros so a chart
69
+ # has a continuous x-axis. Capped at MAX_QUEUE_SERIES queues to bound the
70
+ # payload; the cap is logged-free because queue cardinality is small.
71
+ def queue_history(bucket, window_seconds, queues: nil, now: ::Time.now)
72
+ step, ttl = bucket_spec!(bucket)
73
+ starts = bucket_starts(now, step, clamp_history_window!(window_seconds, ttl))
74
+ names = queue_names(queues)
75
+ return [] if names.empty?
76
+
77
+ hashes = queue_bucket_hashes(bucket, starts)
78
+ names.map { |name| { name: name, points: queue_points(name, starts, hashes) } }
79
+ end
80
+
81
+ def queue_bucket_hashes(bucket, starts)
82
+ pipeline_hgetall(starts.map { |s| Wurk::Metrics::QueueRollup.bucket_key(bucket, s) })
83
+ .map { |h| h.is_a?(::Array) ? h.each_slice(2).to_h : (h || {}) }
84
+ end
85
+
86
+ MAX_QUEUE_SERIES = 25
87
+
88
+ def queue_names(queues)
89
+ names = queues || Wurk.redis { |c| c.call('SMEMBERS', Wurk::Keys::QUEUES_SET) }
90
+ names.sort.first(MAX_QUEUE_SERIES)
91
+ end
92
+
93
+ def queue_points(name, starts, hashes)
94
+ size_field = "#{name}|#{Wurk::Metrics::QueueRollup::SIZE_KIND}"
95
+ lat_field = "#{name}|#{Wurk::Metrics::QueueRollup::LAT_KIND}"
96
+ starts.zip(hashes).map do |at, hash|
97
+ { at: at, size: hash[size_field].to_i, latency: (hash[lat_field] || 0).to_f }
98
+ end
99
+ end
100
+
62
101
  def bucket_spec!(bucket)
63
102
  Wurk::Metrics::Rollup::BUCKETS.fetch(bucket) do
64
103
  raise ArgumentError, "bucket must be one of #{Wurk::Metrics::Rollup::BUCKETS.keys.inspect}"