wurk 0.0.5 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/app/controllers/wurk/api/serializers.rb +48 -2
- data/app/controllers/wurk/api_controller.rb +216 -1
- data/app/controllers/wurk/dashboard_controller.rb +20 -2
- data/app/controllers/wurk/extensions_controller.rb +56 -0
- data/app/controllers/wurk/profiles_controller.rb +68 -0
- data/config/routes.rb +54 -1
- data/exe/sidekiqswarm +8 -0
- data/exe/wurkswarm +23 -0
- data/lib/active_job/queue_adapters/wurk_adapter.rb +35 -0
- data/lib/generators/wurk/install/templates/wurk.rb +14 -3
- data/lib/sidekiq/api.rb +4 -0
- data/lib/sidekiq/cli.rb +9 -0
- data/lib/sidekiq/client.rb +4 -0
- data/lib/sidekiq/job.rb +4 -0
- data/lib/sidekiq/launcher.rb +4 -0
- data/lib/sidekiq/middleware/chain.rb +4 -0
- data/lib/sidekiq/middleware/server/statsd.rb +12 -0
- data/lib/sidekiq/rails.rb +10 -0
- data/lib/sidekiq/redis_connection.rb +4 -0
- data/lib/sidekiq/scheduled.rb +4 -0
- data/lib/sidekiq/testing.rb +4 -0
- data/lib/sidekiq/version.rb +4 -0
- data/lib/sidekiq/web.rb +4 -0
- data/lib/sidekiq/worker.rb +4 -0
- data/lib/sidekiq.rb +16 -0
- data/lib/wurk/batch/callbacks.rb +103 -13
- data/lib/wurk/batch/death_handler.rb +5 -2
- data/lib/wurk/batch/server_middleware.rb +35 -3
- data/lib/wurk/batch/status.rb +9 -0
- data/lib/wurk/batch.rb +23 -1
- data/lib/wurk/capsule.rb +20 -1
- data/lib/wurk/cli.rb +84 -1
- data/lib/wurk/client.rb +20 -17
- data/lib/wurk/compat.rb +44 -2
- data/lib/wurk/component.rb +5 -4
- data/lib/wurk/configuration.rb +120 -3
- data/lib/wurk/cron.rb +51 -9
- data/lib/wurk/dead_set.rb +8 -3
- data/lib/wurk/deploy.rb +8 -4
- data/lib/wurk/encryption.rb +6 -1
- data/lib/wurk/fetcher/reaper.rb +78 -11
- data/lib/wurk/fetcher/reliable.rb +14 -4
- data/lib/wurk/heartbeat.rb +45 -0
- data/lib/wurk/history.rb +174 -0
- data/lib/wurk/iterable_job/active_record_enumerator.rb +71 -0
- data/lib/wurk/iterable_job/csv_enumerator.rb +51 -0
- data/lib/wurk/iterable_job.rb +41 -0
- data/lib/wurk/iterable_job_query.rb +75 -0
- data/lib/wurk/job.rb +8 -0
- data/lib/wurk/job_record.rb +16 -1
- data/lib/wurk/job_set.rb +4 -4
- data/lib/wurk/job_util.rb +15 -6
- data/lib/wurk/keys.rb +10 -0
- data/lib/wurk/launcher.rb +35 -1
- data/lib/wurk/leader.rb +15 -6
- data/lib/wurk/limiter/bucket.rb +14 -3
- data/lib/wurk/limiter/concurrent.rb +1 -1
- data/lib/wurk/limiter/window.rb +2 -1
- data/lib/wurk/limiter.rb +12 -0
- data/lib/wurk/lua/loader.rb +10 -0
- data/lib/wurk/lua.rb +106 -14
- data/lib/wurk/metrics/history.rb +5 -0
- data/lib/wurk/metrics/query.rb +39 -0
- data/lib/wurk/metrics/queue_rollup.rb +151 -0
- data/lib/wurk/metrics/statsd.rb +11 -0
- data/lib/wurk/middleware/current_attributes.rb +29 -6
- data/lib/wurk/middleware/interrupt_handler.rb +5 -0
- data/lib/wurk/middleware/poison_pill.rb +35 -5
- data/lib/wurk/processor.rb +17 -8
- data/lib/wurk/profile_set.rb +65 -0
- data/lib/wurk/profiler.rb +127 -0
- data/lib/wurk/railtie.rb +19 -5
- data/lib/wurk/redis_client_adapter.rb +72 -0
- data/lib/wurk/redis_connection.rb +30 -0
- data/lib/wurk/redis_pool.rb +5 -1
- data/lib/wurk/scheduled.rb +42 -0
- data/lib/wurk/sorted_entry.rb +13 -11
- data/lib/wurk/stats.rb +11 -4
- data/lib/wurk/swarm/child_boot.rb +26 -4
- data/lib/wurk/swarm.rb +1 -1
- data/lib/wurk/transaction_aware_client.rb +69 -0
- data/lib/wurk/unique.rb +49 -7
- data/lib/wurk/version.rb +1 -1
- data/lib/wurk/web/batch_status.rb +42 -0
- data/lib/wurk/web/config.rb +219 -17
- data/lib/wurk/web/enterprise.rb +14 -0
- data/lib/wurk/web/extension.rb +348 -0
- data/lib/wurk/web/rack_app.rb +77 -0
- data/lib/wurk/web.rb +2 -0
- data/lib/wurk/worker/setter.rb +5 -1
- data/lib/wurk/worker.rb +17 -6
- data/lib/wurk.rb +44 -0
- data/vendor/assets/dashboard/assets/fa-brands-400-BP5tdqmh.woff2 +0 -0
- data/vendor/assets/dashboard/assets/fa-regular-400-nyy7hhHF.woff2 +0 -0
- data/vendor/assets/dashboard/assets/fa-solid-900-DRAAbZTg.woff2 +0 -0
- data/vendor/assets/dashboard/assets/index-9CFRWpfG.js +77 -0
- data/vendor/assets/dashboard/assets/index-CW8AFQIv.css +2 -0
- data/vendor/assets/dashboard/assets/wurk-logo-Vy3xW4K0.png +0 -0
- data/vendor/assets/dashboard/favicon.png +0 -0
- data/vendor/assets/dashboard/index.html +10 -3
- data/vendor/assets/dashboard/wurk-manifest.json +2 -2
- metadata +42 -3
- data/vendor/assets/dashboard/assets/index-D2XR0iGw.js +0 -60
- data/vendor/assets/dashboard/assets/index-DlPr4YXw.css +0 -1
data/lib/wurk/job.rb
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative 'worker'
|
|
4
|
+
require_relative 'worker/setter'
|
|
4
5
|
|
|
5
6
|
module Wurk
|
|
6
7
|
# Sidekiq 7+ alias for Wurk::Worker. `include Wurk::Job` and
|
|
@@ -20,6 +21,13 @@ module Wurk
|
|
|
20
21
|
# Spec: docs/target/sidekiq-free.md §6.4.
|
|
21
22
|
class Interrupted < RuntimeError; end
|
|
22
23
|
|
|
24
|
+
# Per-call option carrier returned by `set(...)`. Sidekiq 7+ documents it
|
|
25
|
+
# under the modern mixin name `Sidekiq::Job::Setter`; since
|
|
26
|
+
# `Sidekiq::Job = Wurk::Job`, this rebind is what makes that constant
|
|
27
|
+
# resolve (without it `Sidekiq::Job::Setter` raises NameError). Same class
|
|
28
|
+
# as `Sidekiq::Worker::Setter`. Spec: docs/target/sidekiq-free.md §6.3.
|
|
29
|
+
Setter = Wurk::Worker::Setter
|
|
30
|
+
|
|
23
31
|
def self.included(base)
|
|
24
32
|
base.include(Wurk::Worker)
|
|
25
33
|
end
|
data/lib/wurk/job_record.rb
CHANGED
|
@@ -53,6 +53,15 @@ module Wurk
|
|
|
53
53
|
def args = item['args']
|
|
54
54
|
def jid = item['jid']
|
|
55
55
|
def bid = item['bid']
|
|
56
|
+
|
|
57
|
+
# IterableJob progress for this job, or nil for a non-iterable job (no
|
|
58
|
+
# `it-<jid>` HASH). Spec §19.3. Reads via the IterableJobQuery data API.
|
|
59
|
+
def iterable_state
|
|
60
|
+
return nil if jid.nil? || jid.to_s.empty?
|
|
61
|
+
|
|
62
|
+
Wurk::IterableJobQuery.new([jid])[jid]
|
|
63
|
+
end
|
|
64
|
+
|
|
56
65
|
def tags = item['tags'] || []
|
|
57
66
|
def enqueued_at = parse_time(item['enqueued_at'])
|
|
58
67
|
def created_at = parse_time(item['created_at'])
|
|
@@ -96,10 +105,16 @@ module Wurk
|
|
|
96
105
|
@display_class = active_job_wrapper? ? unwrap_class : klass
|
|
97
106
|
end
|
|
98
107
|
|
|
108
|
+
# UI-facing args. Encrypted jobs (§4.7) get their envelope last arg
|
|
109
|
+
# masked as "<encrypted>" so ciphertext never reaches the dashboard;
|
|
110
|
+
# redaction keys off the envelope shape, so it fires whether or not the
|
|
111
|
+
# stored hash carried the `encrypt` flag. Cleartext preceding args stay
|
|
112
|
+
# visible for triage. Display-only — the stored payload is untouched.
|
|
99
113
|
def display_args
|
|
100
114
|
return @display_args if defined?(@display_args)
|
|
101
115
|
|
|
102
|
-
|
|
116
|
+
base = active_job_wrapper? ? unwrap_args : args
|
|
117
|
+
@display_args = Wurk::Encryption.redact_args('args' => base, 'encrypt' => item['encrypt'])
|
|
103
118
|
end
|
|
104
119
|
|
|
105
120
|
# @api internal
|
data/lib/wurk/job_set.rb
CHANGED
|
@@ -122,10 +122,10 @@ module Wurk
|
|
|
122
122
|
count
|
|
123
123
|
end
|
|
124
124
|
|
|
125
|
-
# Moves every job in this set to the dead set.
|
|
126
|
-
#
|
|
127
|
-
#
|
|
128
|
-
def kill_all(notify_failure:
|
|
125
|
+
# Moves every job in this set to the dead set. Death handlers fire per
|
|
126
|
+
# entry by default — `each(&:kill)` equivalence with Sidekiq; pass
|
|
127
|
+
# `notify_failure: false` to suppress. Returns the count of jobs moved.
|
|
128
|
+
def kill_all(notify_failure: true, ex: nil)
|
|
129
129
|
count = 0
|
|
130
130
|
dead = DeadSet.new
|
|
131
131
|
until size.zero?
|
data/lib/wurk/job_util.rb
CHANGED
|
@@ -9,10 +9,15 @@ module Wurk
|
|
|
9
9
|
#
|
|
10
10
|
# Spec: docs/target/sidekiq-free.md §9 (Sidekiq::JobUtil).
|
|
11
11
|
module JobUtil # rubocop:disable Metrics/ModuleLength
|
|
12
|
-
# Top-level keys stripped from every payload
|
|
13
|
-
#
|
|
14
|
-
#
|
|
15
|
-
|
|
12
|
+
# Top-level keys consumed at enqueue time but stripped from every payload
|
|
13
|
+
# before raw_push — they must never reach the wire (spec §2.2):
|
|
14
|
+
# `pool` selects the Redis pool (resolved in client_push/build_client)
|
|
15
|
+
# `client_class` swaps the enqueue client (Wurk.transactional_push!)
|
|
16
|
+
# Both carry non-JSON values (a pool / a Class). Baked into the literal rather
|
|
17
|
+
# than appended at load: a load-time `<<` is fragile under the parallel test
|
|
18
|
+
# runner (a test that add/deletes the same key clobbers it for later suites).
|
|
19
|
+
# Still mutable so other extensions can append without monkey-patching.
|
|
20
|
+
TRANSIENT_ATTRIBUTES = %w[pool client_class] # rubocop:disable Style/MutableConstant
|
|
16
21
|
|
|
17
22
|
RETRY_FOR_MAX = 1_000_000_000
|
|
18
23
|
|
|
@@ -111,13 +116,17 @@ module Wurk
|
|
|
111
116
|
|
|
112
117
|
# Pro `expires_in:` → absolute epoch-float `expiry` resolved once at push,
|
|
113
118
|
# so the server middleware doesn't redo the math. Spec: sidekiq-pro.md §7.
|
|
119
|
+
# For scheduled jobs the clock origin is `at` (epoch seconds), not
|
|
120
|
+
# `created_at` (epoch millis) — otherwise any delay > expires_in makes the
|
|
121
|
+
# job born-expired: perform_in(2h) + expires_in: 1h must expire at 3h.
|
|
114
122
|
# nil.respond_to?(:to_f) is true on modern Ruby (returns 0.0), so we must
|
|
115
123
|
# gate on a non-nil duration before coercing.
|
|
116
124
|
def stamp_expiry(item)
|
|
117
125
|
d = item['expires_in']
|
|
118
|
-
return if d.nil?
|
|
126
|
+
return if d.nil? || !d.respond_to?(:to_f)
|
|
119
127
|
|
|
120
|
-
item['
|
|
128
|
+
origin = item['at'] ? item['at'].to_f : (item['created_at'].to_f / 1000.0)
|
|
129
|
+
item['expiry'] ||= origin + d.to_f
|
|
121
130
|
end
|
|
122
131
|
|
|
123
132
|
def report_unsafe(item, offender, mode)
|
data/lib/wurk/keys.rb
CHANGED
|
@@ -29,6 +29,16 @@ module Wurk
|
|
|
29
29
|
# Live process identities (heartbeat membership).
|
|
30
30
|
PROCESSES = 'processes'
|
|
31
31
|
|
|
32
|
+
# Ent Historical Metrics: capped Redis stream of periodic snapshots written
|
|
33
|
+
# by Wurk::History (§5.3). Same key a migrated Sidekiq Ent install uses, so
|
|
34
|
+
# its existing data renders without rewrite. Spec: sidekiq-ent.md §5.3, §10.
|
|
35
|
+
HISTORY_METRICS = 'history:metrics'
|
|
36
|
+
|
|
37
|
+
# Profiles (v8.0+): ZSET of `<token>-<jid>` keys, score = expiry epoch;
|
|
38
|
+
# each member also has a `<token>-<jid>` HASH holding the profile blob.
|
|
39
|
+
# Spec: docs/target/sidekiq-free.md §1.7.
|
|
40
|
+
PROFILES = 'profiles'
|
|
41
|
+
|
|
32
42
|
# Global processed counter; per-day variants append `:YYYY-MM-DD`.
|
|
33
43
|
STAT_PROCESSED = 'stat:processed'
|
|
34
44
|
|
data/lib/wurk/launcher.rb
CHANGED
|
@@ -10,6 +10,8 @@ require_relative 'scheduled'
|
|
|
10
10
|
require_relative 'leader'
|
|
11
11
|
require_relative 'cron'
|
|
12
12
|
require_relative 'metrics/rollup'
|
|
13
|
+
require_relative 'metrics/queue_rollup'
|
|
14
|
+
require_relative 'history'
|
|
13
15
|
require_relative 'fetcher/reaper'
|
|
14
16
|
|
|
15
17
|
module Wurk
|
|
@@ -41,7 +43,7 @@ module Wurk
|
|
|
41
43
|
# (Sidekiq's drop-in surface). The single source of truth is Heartbeat.
|
|
42
44
|
BEAT_PAUSE = Heartbeat::BEAT_PAUSE
|
|
43
45
|
|
|
44
|
-
attr_accessor :managers, :poller, :cron_poller, :metrics_rollup
|
|
46
|
+
attr_accessor :managers, :poller, :cron_poller, :metrics_rollup, :queue_rollup, :history
|
|
45
47
|
|
|
46
48
|
def initialize(config, embedded: false)
|
|
47
49
|
@config = config
|
|
@@ -51,6 +53,8 @@ module Wurk
|
|
|
51
53
|
@poller = build_poller
|
|
52
54
|
@cron_poller = build_cron_poller
|
|
53
55
|
@metrics_rollup = build_metrics_rollup
|
|
56
|
+
@queue_rollup = build_queue_rollup
|
|
57
|
+
@history = build_history
|
|
54
58
|
@leader = build_leader
|
|
55
59
|
@reaper = build_reaper
|
|
56
60
|
@started_at = nil
|
|
@@ -81,8 +85,11 @@ module Wurk
|
|
|
81
85
|
@leader&.start
|
|
82
86
|
@cron_poller&.start
|
|
83
87
|
@metrics_rollup&.start
|
|
88
|
+
@queue_rollup&.start
|
|
89
|
+
@history&.start
|
|
84
90
|
@managers.each(&:start)
|
|
85
91
|
@reaper.start
|
|
92
|
+
boot_reclaim
|
|
86
93
|
@health_server&.start
|
|
87
94
|
end
|
|
88
95
|
|
|
@@ -114,6 +121,8 @@ module Wurk
|
|
|
114
121
|
# releasing the lock so no tick races a follower's promotion.
|
|
115
122
|
@cron_poller&.terminate
|
|
116
123
|
@metrics_rollup&.terminate
|
|
124
|
+
@queue_rollup&.terminate
|
|
125
|
+
@history&.terminate
|
|
117
126
|
@reaper&.stop
|
|
118
127
|
# CAS-release the cluster lock now (planned shutdown) so a follower can
|
|
119
128
|
# take over immediately instead of waiting out the TTL.
|
|
@@ -247,6 +256,20 @@ module Wurk
|
|
|
247
256
|
Wurk::Metrics::Rollup.new(@config)
|
|
248
257
|
end
|
|
249
258
|
|
|
259
|
+
# Leader-only per-queue gauge sampler. Like the metrics rollup, every
|
|
260
|
+
# process runs one but only the leader writes the `qm|…` size/latency
|
|
261
|
+
# buckets the Historical tab's per-queue charts read.
|
|
262
|
+
def build_queue_rollup
|
|
263
|
+
Wurk::Metrics::QueueRollup.new(@config)
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
# Ent §5 Historical Metrics snapshotter — only when the host opted in via
|
|
267
|
+
# `config.retain_history`. Leader-gated like the rollups, so just one
|
|
268
|
+
# process emits the cluster-wide snapshot per interval.
|
|
269
|
+
def build_history
|
|
270
|
+
Wurk::History.new(@config) if @config.history_enabled?
|
|
271
|
+
end
|
|
272
|
+
|
|
250
273
|
# Every worker process campaigns for the single cluster lock (`dear-leader`);
|
|
251
274
|
# one wins and renews it, the rest follow and promote on its death. Cadence
|
|
252
275
|
# falls back to the spec defaults (TTL 30 / renew 15 / follower 60) unless
|
|
@@ -260,6 +283,17 @@ module Wurk
|
|
|
260
283
|
)
|
|
261
284
|
end
|
|
262
285
|
|
|
286
|
+
# Deterministic boot-time orphan sweep: a SIGKILLed sibling's in-flight jobs
|
|
287
|
+
# would otherwise wait a full reaper interval before recovery. One unguarded
|
|
288
|
+
# scoped reclaim at start (no cluster lock — every booting worker helps) gets
|
|
289
|
+
# them re-queued immediately. Best-effort: a Redis hiccup here must not abort
|
|
290
|
+
# boot. Spec: docs/target/sidekiq-pro.md §3.2.
|
|
291
|
+
def boot_reclaim
|
|
292
|
+
@reaper.reclaim!
|
|
293
|
+
rescue StandardError => e
|
|
294
|
+
handle_exception(e, context: 'launcher-boot-reclaim') if respond_to?(:handle_exception)
|
|
295
|
+
end
|
|
296
|
+
|
|
263
297
|
# Reliable-fetch orphan reclamation. Every worker runs one; a cluster
|
|
264
298
|
# `SET NX EX` lock ensures only one actually sweeps per interval, so this
|
|
265
299
|
# is leader-independent (it keeps working if the leader dies). Tune the
|
data/lib/wurk/leader.rb
CHANGED
|
@@ -20,7 +20,8 @@ module Wurk
|
|
|
20
20
|
#
|
|
21
21
|
# Cadence per spec: renew every 15s while leader, recheck every 60s as
|
|
22
22
|
# follower, lock TTL 30s. Opt out a process from campaigning entirely
|
|
23
|
-
# with `WURK_LEADER=false` (
|
|
23
|
+
# with `WURK_LEADER=false` (or its Sidekiq alias `SIDEKIQ_LEADER=false`),
|
|
24
|
+
# useful for hot-standby pools.
|
|
24
25
|
#
|
|
25
26
|
# Spec: docs/target/sidekiq-ent.md §6.
|
|
26
27
|
class Leader
|
|
@@ -29,9 +30,17 @@ module Wurk
|
|
|
29
30
|
DEFAULT_TTL = 30
|
|
30
31
|
DEFAULT_RENEW_INTERVAL = 15
|
|
31
32
|
DEFAULT_FOLLOWER_INTERVAL = 60
|
|
32
|
-
OPT_OUT_ENV = 'WURK_LEADER'
|
|
33
|
+
OPT_OUT_ENV = 'WURK_LEADER' # native opt-out env
|
|
34
|
+
SIDEKIQ_OPT_OUT_ENV = 'SIDEKIQ_LEADER' # Sidekiq Ent drop-in alias (§6.2/§7.2)
|
|
33
35
|
THREAD_NAME = 'wurk-leader'
|
|
34
36
|
|
|
37
|
+
# True when this process has opted out of campaigning via `WURK_LEADER=false`
|
|
38
|
+
# or its Sidekiq alias `SIDEKIQ_LEADER=false` (hot-standby pools that must
|
|
39
|
+
# never lead). Either env name works.
|
|
40
|
+
def self.opted_out?
|
|
41
|
+
[OPT_OUT_ENV, SIDEKIQ_OPT_OUT_ENV].any? { |k| ENV[k].to_s.downcase == 'false' }
|
|
42
|
+
end
|
|
43
|
+
|
|
35
44
|
attr_reader :key, :ttl, :owner, :token, :config
|
|
36
45
|
|
|
37
46
|
def initialize(config: nil, key: DEFAULT_KEY, ttl: DEFAULT_TTL, # rubocop:disable Metrics/ParameterLists
|
|
@@ -53,11 +62,11 @@ module Wurk
|
|
|
53
62
|
@sleeper = ::ConditionVariable.new
|
|
54
63
|
end
|
|
55
64
|
|
|
56
|
-
# `WURK_LEADER=false` makes `acquire` a no-op and
|
|
57
|
-
# false; the renewal thread also refuses to start.
|
|
58
|
-
# standby pools that must never campaign.
|
|
65
|
+
# `WURK_LEADER=false` (or `SIDEKIQ_LEADER=false`) makes `acquire` a no-op and
|
|
66
|
+
# `leader?` permanently false; the renewal thread also refuses to start.
|
|
67
|
+
# Useful for hot-standby pools that must never campaign.
|
|
59
68
|
def disabled?
|
|
60
|
-
|
|
69
|
+
self.class.opted_out?
|
|
61
70
|
end
|
|
62
71
|
|
|
63
72
|
# SET NX EX. If the key already holds *our* owner string (rare — same
|
data/lib/wurk/limiter/bucket.rb
CHANGED
|
@@ -71,9 +71,20 @@ module Wurk
|
|
|
71
71
|
end
|
|
72
72
|
|
|
73
73
|
def acquire(used)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
74
|
+
# Resolve the epoch from the Redis clock (spec §1: timing from TIME, not
|
|
75
|
+
# the client clock) and pass the single fully-qualified key. One declared
|
|
76
|
+
# key is safe on both Redis Cluster (no CROSSSLOT) and Dragonfly (no
|
|
77
|
+
# undeclared-key access) — see lua/limiter_bucket_acquire.lua (#91).
|
|
78
|
+
Wurk::Limiter.redis do |c|
|
|
79
|
+
now = c.call('TIME').first.to_i
|
|
80
|
+
epoch = now / interval_seconds
|
|
81
|
+
remaining = ((epoch + 1) * interval_seconds) - now
|
|
82
|
+
Wurk::Lua::Loader.eval_cached(
|
|
83
|
+
c, :limiter_bucket_acquire,
|
|
84
|
+
keys: ["lmtr-b:#{@name}:#{epoch}"],
|
|
85
|
+
argv: [@options[:count], used, ttl, remaining]
|
|
86
|
+
)
|
|
87
|
+
end
|
|
77
88
|
end
|
|
78
89
|
end
|
|
79
90
|
end
|
|
@@ -87,7 +87,7 @@ module Wurk
|
|
|
87
87
|
# Lowest slot expiry epoch (the next slot to free), or nil when empty.
|
|
88
88
|
def soonest_expiry
|
|
89
89
|
row = Wurk::Limiter.redis { |c| c.call('ZRANGE', state_key, 0, 0, 'WITHSCORES') }
|
|
90
|
-
|
|
90
|
+
Wurk::Limiter.first_score(row)
|
|
91
91
|
end
|
|
92
92
|
|
|
93
93
|
def state_key
|
data/lib/wurk/limiter/window.rb
CHANGED
|
@@ -63,7 +63,8 @@ module Wurk
|
|
|
63
63
|
# Oldest timestamp + interval = the moment it leaves the window.
|
|
64
64
|
def oldest_expiry
|
|
65
65
|
row = Wurk::Limiter.redis { |c| c.call('ZRANGE', state_key, 0, 0, 'WITHSCORES') }
|
|
66
|
-
|
|
66
|
+
score = Wurk::Limiter.first_score(row)
|
|
67
|
+
score && (score + interval_seconds)
|
|
67
68
|
end
|
|
68
69
|
|
|
69
70
|
def interval_seconds
|
data/lib/wurk/limiter.rb
CHANGED
|
@@ -140,6 +140,18 @@ module Wurk
|
|
|
140
140
|
pool.with(&)
|
|
141
141
|
end
|
|
142
142
|
|
|
143
|
+
# `ZRANGE key 0 0 WITHSCORES` yields a single [member, score] pair, but the
|
|
144
|
+
# shape depends on the protocol: RESP3 (redis-client's default vs Redis >= 7)
|
|
145
|
+
# nests it as [[member, score]]; RESP2 returns a flat [member, score].
|
|
146
|
+
# Return the score as a Float across both, or nil when the set is empty.
|
|
147
|
+
# (The old flat-only `row[1]` silently collapsed to 0.0 under RESP3.)
|
|
148
|
+
def first_score(row)
|
|
149
|
+
pair = row.first
|
|
150
|
+
return nil if pair.nil?
|
|
151
|
+
|
|
152
|
+
(pair.is_a?(Array) ? pair.last : row[1]).to_f
|
|
153
|
+
end
|
|
154
|
+
|
|
143
155
|
def concurrent(name, limit, wait_timeout: DEFAULT_WAIT_TIMEOUT, lock_timeout: DEFAULT_LOCK_TIMEOUT,
|
|
144
156
|
policy: :raise, backoff: nil, ttl: DEFAULT_TTL)
|
|
145
157
|
Concurrent.new(name,
|
data/lib/wurk/lua/loader.rb
CHANGED
|
@@ -38,6 +38,16 @@ module Wurk
|
|
|
38
38
|
evalsha(redis, sha, keys, argv)
|
|
39
39
|
end
|
|
40
40
|
|
|
41
|
+
# Source-embedded EVAL — the slow but cache-independent counterpart to
|
|
42
|
+
# `eval_cached`. Used on retry from a pipelined NOSCRIPT recovery where
|
|
43
|
+
# EVALSHA can still race a freshly-loaded script under heavy CI load
|
|
44
|
+
# (cf. WorkerTest NOSCRIPT flake on test (3.4, 7.2)). EVAL ships the
|
|
45
|
+
# full source every call, so it never raises NOSCRIPT.
|
|
46
|
+
def eval_with_source(redis, name, keys:, argv:)
|
|
47
|
+
src = SCRIPTS.fetch(name) { raise ArgumentError, "unknown Lua script: #{name.inspect}" }
|
|
48
|
+
redis.call('EVAL', src, keys.size, *keys, *argv)
|
|
49
|
+
end
|
|
50
|
+
|
|
41
51
|
private
|
|
42
52
|
|
|
43
53
|
def evalsha(redis, sha, keys, argv)
|
data/lib/wurk/lua.rb
CHANGED
|
@@ -12,7 +12,7 @@ module Wurk
|
|
|
12
12
|
#
|
|
13
13
|
# `:zpopbyscore` is reproduced verbatim from sidekiq-free.md §1.8 and
|
|
14
14
|
# MUST NOT diverge — parity tests will fail on a single byte change.
|
|
15
|
-
module Lua
|
|
15
|
+
module Lua # rubocop:disable Metrics/ModuleLength
|
|
16
16
|
ZPOPBYSCORE = <<~LUA
|
|
17
17
|
local key, now = KEYS[1], ARGV[1]
|
|
18
18
|
local jobs = redis.call("zrange", key, "-inf", now, "byscore", "limit", 0, 1)
|
|
@@ -58,13 +58,31 @@ module Wurk
|
|
|
58
58
|
|
|
59
59
|
# Pro Batch: register a job into a batch and push it to its queue
|
|
60
60
|
# atomically. Keeps total/pending in sync with the jids set.
|
|
61
|
-
#
|
|
62
|
-
#
|
|
61
|
+
#
|
|
62
|
+
# A jid found in `b-<bid>-died` is a manual retry of a dead job (morgue
|
|
63
|
+
# "retry" / "add to queue") — it rejoins the live set without recounting:
|
|
64
|
+
# total and pending already include it, because a death never decrements
|
|
65
|
+
# pending. When that drains the died set the batch is no longer dead, so
|
|
66
|
+
# the durable `death` success-suppression flag clears and the bid leaves
|
|
67
|
+
# `dead-batches` — a later full drain can then fire `:success` (spec §2.4:
|
|
68
|
+
# success after the dead job is manually retried to success). The
|
|
69
|
+
# `b-<bid>-death` notify dedup key is untouched, so `:death` cannot
|
|
70
|
+
# re-fire.
|
|
71
|
+
# KEYS = [b-<bid>, b-<bid>-jids, queue_list, queues_set, b-<bid>-died, dead-batches]
|
|
72
|
+
# ARGV = [queue_name, jid, job_json, bid]
|
|
63
73
|
# Returns 1.
|
|
64
74
|
BATCH_PUSH = <<~LUA
|
|
65
|
-
redis.call("
|
|
66
|
-
|
|
67
|
-
|
|
75
|
+
if redis.call("srem", KEYS[5], ARGV[2]) == 1 then
|
|
76
|
+
redis.call("sadd", KEYS[2], ARGV[2])
|
|
77
|
+
if redis.call("scard", KEYS[5]) == 0 then
|
|
78
|
+
redis.call("hdel", KEYS[1], "death")
|
|
79
|
+
redis.call("zrem", KEYS[6], ARGV[4])
|
|
80
|
+
end
|
|
81
|
+
else
|
|
82
|
+
redis.call("hincrby", KEYS[1], "total", 1)
|
|
83
|
+
redis.call("hincrby", KEYS[1], "pending", 1)
|
|
84
|
+
redis.call("sadd", KEYS[2], ARGV[2])
|
|
85
|
+
end
|
|
68
86
|
redis.call("sadd", KEYS[4], ARGV[1])
|
|
69
87
|
redis.call("lpush", KEYS[3], ARGV[3])
|
|
70
88
|
return 1
|
|
@@ -72,12 +90,21 @@ module Wurk
|
|
|
72
90
|
|
|
73
91
|
# Pro Batch: ACK a job that completed successfully. SREM from the live
|
|
74
92
|
# jids set and decrement pending iff the jid was a member (idempotent
|
|
75
|
-
# against double-success on a flaky retry).
|
|
76
|
-
#
|
|
93
|
+
# against double-success on a flaky retry). A success also clears any
|
|
94
|
+
# outstanding "currently failing" record for the jid (a retry that finally
|
|
95
|
+
# passed), decrementing `failures` so it converges to the count of jobs
|
|
96
|
+
# *still* failing — Sidekiq Pro semantics, spec §2.5. The failed-set clear
|
|
97
|
+
# runs *before* the live-jids check so an invalidated batch (BATCH_INVALIDATE
|
|
98
|
+
# deletes the jids set) still converges failures to 0 on its short-circuited
|
|
99
|
+
# success ack, instead of stranding the jid in failed forever.
|
|
100
|
+
# KEYS = [b-<bid>, b-<bid>-jids, b-<bid>-failed]
|
|
77
101
|
# ARGV = [jid]
|
|
78
102
|
# Returns [new_pending, live_jids_remaining], or [-1, -1] when the jid
|
|
79
103
|
# was not a member (treat as already acked).
|
|
80
104
|
BATCH_ACK_SUCCESS = <<~LUA
|
|
105
|
+
if redis.call("srem", KEYS[3], ARGV[1]) == 1 then
|
|
106
|
+
redis.call("hincrby", KEYS[1], "failures", -1)
|
|
107
|
+
end
|
|
81
108
|
local removed = redis.call("srem", KEYS[2], ARGV[1])
|
|
82
109
|
if removed == 1 then
|
|
83
110
|
local pending = redis.call("hincrby", KEYS[1], "pending", -1)
|
|
@@ -86,9 +113,28 @@ module Wurk
|
|
|
86
113
|
return { -1, -1 }
|
|
87
114
|
LUA
|
|
88
115
|
|
|
89
|
-
# Pro Batch:
|
|
90
|
-
#
|
|
91
|
-
#
|
|
116
|
+
# Pro Batch: record a job that failed and will retry (transient failure).
|
|
117
|
+
# SADDs the jid to the `failed` set and bumps `failures` only on the first
|
|
118
|
+
# add, so `failures` == SCARD(b-<bid>-failed) == the number of jobs
|
|
119
|
+
# currently in a failing/retrying state. Re-failures of the same jid are
|
|
120
|
+
# idempotent. Cleared by BATCH_ACK_SUCCESS (retry passed) or
|
|
121
|
+
# BATCH_ACK_COMPLETE (job died). Spec §2.5, §2.8.
|
|
122
|
+
# KEYS = [b-<bid>, b-<bid>-failed]
|
|
123
|
+
# ARGV = [jid]
|
|
124
|
+
# Returns 1.
|
|
125
|
+
BATCH_ACK_FAILED = <<~LUA
|
|
126
|
+
if redis.call("sadd", KEYS[2], ARGV[1]) == 1 then
|
|
127
|
+
redis.call("hincrby", KEYS[1], "failures", 1)
|
|
128
|
+
end
|
|
129
|
+
return 1
|
|
130
|
+
LUA
|
|
131
|
+
|
|
132
|
+
# Pro Batch: ACK a job that exhausted retries and died. Moves the jid from
|
|
133
|
+
# "currently failing" to "died": SREMs from the failed set (decrementing
|
|
134
|
+
# `failures` if it was recorded as failing), SADDs to died, and SREMs from
|
|
135
|
+
# live jids so the batch can fire `:complete` even with terminally failed
|
|
136
|
+
# jobs. `b-<bid>-failed` holds only currently-retrying jids; `b-<bid>-died`
|
|
137
|
+
# holds terminally-dead ones (spec §2.8 — the two sets are distinct).
|
|
92
138
|
# KEYS = [b-<bid>, b-<bid>-jids, b-<bid>-died, b-<bid>-failed]
|
|
93
139
|
# ARGV = [jid]
|
|
94
140
|
# Returns [live_jids_remaining, died_count, first_death]. `first_death`
|
|
@@ -97,9 +143,10 @@ module Wurk
|
|
|
97
143
|
BATCH_ACK_COMPLETE = <<~LUA
|
|
98
144
|
local was_pre_existing_death = redis.call("scard", KEYS[3])
|
|
99
145
|
redis.call("srem", KEYS[2], ARGV[1])
|
|
100
|
-
redis.call("
|
|
146
|
+
if redis.call("srem", KEYS[4], ARGV[1]) == 1 then
|
|
147
|
+
redis.call("hincrby", KEYS[1], "failures", -1)
|
|
148
|
+
end
|
|
101
149
|
local died_added = redis.call("sadd", KEYS[3], ARGV[1])
|
|
102
|
-
redis.call("hincrby", KEYS[1], "failures", 1)
|
|
103
150
|
local first_death = 0
|
|
104
151
|
if was_pre_existing_death == 0 and died_added == 1 then
|
|
105
152
|
first_death = 1
|
|
@@ -120,6 +167,48 @@ module Wurk
|
|
|
120
167
|
return 1
|
|
121
168
|
LUA
|
|
122
169
|
|
|
170
|
+
# Pro Batch (§2.4): atomically append one callback triple to the
|
|
171
|
+
# `callbacks` JSON array on the batch hash. Server-side append (vs a
|
|
172
|
+
# Ruby read-modify-write) so two processes registering callbacks on the
|
|
173
|
+
# same reopened batch cannot lose each other's writes. Refuses to write
|
|
174
|
+
# when the batch hash is gone — resurrecting a bare hash would create a
|
|
175
|
+
# batch that can never fire anything.
|
|
176
|
+
# KEYS = [b-<bid>]
|
|
177
|
+
# ARGV = [callback triple JSON, event name]
|
|
178
|
+
# Returns -1 when the batch hash does not exist; otherwise the event's
|
|
179
|
+
# fired flag ("1", or nil when it has not fired yet).
|
|
180
|
+
BATCH_APPEND_CALLBACK = <<~LUA
|
|
181
|
+
if redis.call("exists", KEYS[1]) == 0 then
|
|
182
|
+
return -1
|
|
183
|
+
end
|
|
184
|
+
local raw = redis.call("hget", KEYS[1], "callbacks")
|
|
185
|
+
local list
|
|
186
|
+
if raw and raw ~= "" then
|
|
187
|
+
list = cjson.decode(raw)
|
|
188
|
+
else
|
|
189
|
+
list = {}
|
|
190
|
+
end
|
|
191
|
+
list[#list + 1] = cjson.decode(ARGV[1])
|
|
192
|
+
redis.call("hset", KEYS[1], "callbacks", cjson.encode(list))
|
|
193
|
+
return redis.call("hget", KEYS[1], ARGV[2])
|
|
194
|
+
LUA
|
|
195
|
+
|
|
196
|
+
# Ent Unique (§3): atomic compare-and-delete of a lock key. Replaces the
|
|
197
|
+
# two-command GET-then-DEL — between those calls the key can expire and a
|
|
198
|
+
# fresh enqueue can grab it, and the bare DEL would then drop the new
|
|
199
|
+
# owner's lock. Shared by `Unique::ServerMiddleware#release` (normal
|
|
200
|
+
# success/start release) and `Unique::DEATH_HANDLER` (automatic-death
|
|
201
|
+
# release) so the two paths cannot drift.
|
|
202
|
+
# KEYS = [unique:<sha256>]
|
|
203
|
+
# ARGV = [owning jid]
|
|
204
|
+
# Returns 1 when the key was deleted, 0 otherwise.
|
|
205
|
+
RELEASE_IF_OWNER = <<~LUA
|
|
206
|
+
if redis.call("get", KEYS[1]) == ARGV[1] then
|
|
207
|
+
return redis.call("del", KEYS[1])
|
|
208
|
+
end
|
|
209
|
+
return 0
|
|
210
|
+
LUA
|
|
211
|
+
|
|
123
212
|
# Pro Fast API (§11): server-side LRANGE+LREM to delete a single job by
|
|
124
213
|
# jid from a queue list. Pure-Ruby Queue#find_job + JobRecord#delete is
|
|
125
214
|
# O(N) round-trips; this is O(1) round-trip with O(N) Lua work.
|
|
@@ -172,10 +261,13 @@ module Wurk
|
|
|
172
261
|
reliable_schedule_promote: RELIABLE_SCHEDULE_PROMOTE,
|
|
173
262
|
batch_push: BATCH_PUSH,
|
|
174
263
|
batch_ack_success: BATCH_ACK_SUCCESS,
|
|
264
|
+
batch_ack_failed: BATCH_ACK_FAILED,
|
|
175
265
|
batch_ack_complete: BATCH_ACK_COMPLETE,
|
|
176
266
|
batch_invalidate: BATCH_INVALIDATE,
|
|
267
|
+
batch_append_callback: BATCH_APPEND_CALLBACK,
|
|
177
268
|
fast_delete_job: FAST_DELETE_JOB,
|
|
178
|
-
fast_delete_by_class: FAST_DELETE_BY_CLASS
|
|
269
|
+
fast_delete_by_class: FAST_DELETE_BY_CLASS,
|
|
270
|
+
release_if_owner: RELEASE_IF_OWNER
|
|
179
271
|
}.merge(FILE_SCRIPTS).freeze
|
|
180
272
|
|
|
181
273
|
# SHA1 of each script source — matches what `SCRIPT LOAD` returns.
|
data/lib/wurk/metrics/history.rb
CHANGED
|
@@ -147,5 +147,10 @@ module Wurk
|
|
|
147
147
|
end
|
|
148
148
|
private_class_method :with_pool
|
|
149
149
|
end
|
|
150
|
+
|
|
151
|
+
# Sidekiq exposes this as `Sidekiq::Metrics::Middleware` (via
|
|
152
|
+
# `Sidekiq::Metrics`, aliased to `Wurk::Metrics` in compat). Mirror that
|
|
153
|
+
# name so the drop-in constant resolves. Spec: docs/target/sidekiq-free.md §10.3.
|
|
154
|
+
Middleware = History
|
|
150
155
|
end
|
|
151
156
|
end
|
data/lib/wurk/metrics/query.rb
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative '../keys'
|
|
3
4
|
require_relative 'history'
|
|
4
5
|
require_relative 'rollup'
|
|
6
|
+
require_relative 'queue_rollup'
|
|
5
7
|
|
|
6
8
|
module Wurk
|
|
7
9
|
module Metrics
|
|
@@ -59,6 +61,43 @@ module Wurk
|
|
|
59
61
|
starts.zip(rows).map { |at, (p, f, ms)| { at: at, p: p.to_i, f: f.to_i, ms: ms.to_i } }
|
|
60
62
|
end
|
|
61
63
|
|
|
64
|
+
# Per-queue size/latency gauge time-series written by
|
|
65
|
+
# Metrics::QueueRollup. `bucket` is '1m'/'5m'/'1h'; `window_seconds` is
|
|
66
|
+
# clamped to the bucket's retention. Returns one entry per live queue
|
|
67
|
+
# (or the explicit `queues:` list) — `[{name:, points: [{at:, size:,
|
|
68
|
+
# latency:}, …]}, …]` — oldest→newest, gap-filled with zeros so a chart
|
|
69
|
+
# has a continuous x-axis. Capped at MAX_QUEUE_SERIES queues to bound the
|
|
70
|
+
# payload; the cap is logged-free because queue cardinality is small.
|
|
71
|
+
def queue_history(bucket, window_seconds, queues: nil, now: ::Time.now)
|
|
72
|
+
step, ttl = bucket_spec!(bucket)
|
|
73
|
+
starts = bucket_starts(now, step, clamp_history_window!(window_seconds, ttl))
|
|
74
|
+
names = queue_names(queues)
|
|
75
|
+
return [] if names.empty?
|
|
76
|
+
|
|
77
|
+
hashes = queue_bucket_hashes(bucket, starts)
|
|
78
|
+
names.map { |name| { name: name, points: queue_points(name, starts, hashes) } }
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def queue_bucket_hashes(bucket, starts)
|
|
82
|
+
pipeline_hgetall(starts.map { |s| Wurk::Metrics::QueueRollup.bucket_key(bucket, s) })
|
|
83
|
+
.map { |h| h.is_a?(::Array) ? h.each_slice(2).to_h : (h || {}) }
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
MAX_QUEUE_SERIES = 25
|
|
87
|
+
|
|
88
|
+
def queue_names(queues)
|
|
89
|
+
names = queues || Wurk.redis { |c| c.call('SMEMBERS', Wurk::Keys::QUEUES_SET) }
|
|
90
|
+
names.sort.first(MAX_QUEUE_SERIES)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def queue_points(name, starts, hashes)
|
|
94
|
+
size_field = "#{name}|#{Wurk::Metrics::QueueRollup::SIZE_KIND}"
|
|
95
|
+
lat_field = "#{name}|#{Wurk::Metrics::QueueRollup::LAT_KIND}"
|
|
96
|
+
starts.zip(hashes).map do |at, hash|
|
|
97
|
+
{ at: at, size: hash[size_field].to_i, latency: (hash[lat_field] || 0).to_f }
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
62
101
|
def bucket_spec!(bucket)
|
|
63
102
|
Wurk::Metrics::Rollup::BUCKETS.fetch(bucket) do
|
|
64
103
|
raise ArgumentError, "bucket must be one of #{Wurk::Metrics::Rollup::BUCKETS.keys.inspect}"
|