wurk 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +43 -0
- data/CONTRIBUTING.md +73 -0
- data/LICENSE +21 -0
- data/README.md +137 -0
- data/SECURITY.md +39 -0
- data/app/controllers/wurk/api/pagination.rb +67 -0
- data/app/controllers/wurk/api/serializers.rb +131 -0
- data/app/controllers/wurk/api_controller.rb +248 -0
- data/app/controllers/wurk/application_controller.rb +7 -0
- data/app/controllers/wurk/dashboard_controller.rb +48 -0
- data/config/locales/en.yml +15 -0
- data/config/routes.rb +34 -0
- data/exe/wurk +22 -0
- data/lib/active_job/queue_adapters/wurk_adapter.rb +96 -0
- data/lib/generators/wurk/install/install_generator.rb +22 -0
- data/lib/generators/wurk/install/templates/wurk.rb +16 -0
- data/lib/wurk/active_job/wrapper.rb +32 -0
- data/lib/wurk/api/fast.rb +78 -0
- data/lib/wurk/batch/buffer.rb +26 -0
- data/lib/wurk/batch/callback_job.rb +37 -0
- data/lib/wurk/batch/callbacks.rb +176 -0
- data/lib/wurk/batch/client_middleware.rb +27 -0
- data/lib/wurk/batch/death_handler.rb +39 -0
- data/lib/wurk/batch/empty.rb +21 -0
- data/lib/wurk/batch/server_middleware.rb +62 -0
- data/lib/wurk/batch/status.rb +140 -0
- data/lib/wurk/batch.rb +351 -0
- data/lib/wurk/batch_set.rb +67 -0
- data/lib/wurk/capsule.rb +176 -0
- data/lib/wurk/cli.rb +349 -0
- data/lib/wurk/client/buffered.rb +372 -0
- data/lib/wurk/client.rb +330 -0
- data/lib/wurk/compat.rb +136 -0
- data/lib/wurk/component.rb +136 -0
- data/lib/wurk/configuration.rb +373 -0
- data/lib/wurk/context.rb +35 -0
- data/lib/wurk/cron.rb +636 -0
- data/lib/wurk/dashboard_manifest.rb +39 -0
- data/lib/wurk/dead_set.rb +78 -0
- data/lib/wurk/deploy.rb +91 -0
- data/lib/wurk/embedded.rb +94 -0
- data/lib/wurk/encryption.rb +276 -0
- data/lib/wurk/engine.rb +81 -0
- data/lib/wurk/fetcher/reaper.rb +264 -0
- data/lib/wurk/fetcher/reliable.rb +138 -0
- data/lib/wurk/fetcher.rb +11 -0
- data/lib/wurk/health.rb +193 -0
- data/lib/wurk/heartbeat.rb +211 -0
- data/lib/wurk/iterable_job.rb +292 -0
- data/lib/wurk/job/options.rb +70 -0
- data/lib/wurk/job.rb +33 -0
- data/lib/wurk/job_logger.rb +68 -0
- data/lib/wurk/job_record.rb +156 -0
- data/lib/wurk/job_retry.rb +320 -0
- data/lib/wurk/job_set.rb +212 -0
- data/lib/wurk/job_util.rb +162 -0
- data/lib/wurk/keys.rb +52 -0
- data/lib/wurk/launcher.rb +289 -0
- data/lib/wurk/leader.rb +221 -0
- data/lib/wurk/limiter/base.rb +138 -0
- data/lib/wurk/limiter/bucket.rb +80 -0
- data/lib/wurk/limiter/concurrent.rb +132 -0
- data/lib/wurk/limiter/leaky.rb +91 -0
- data/lib/wurk/limiter/points.rb +89 -0
- data/lib/wurk/limiter/server_middleware.rb +77 -0
- data/lib/wurk/limiter/unlimited.rb +48 -0
- data/lib/wurk/limiter/window.rb +80 -0
- data/lib/wurk/limiter.rb +255 -0
- data/lib/wurk/logger.rb +81 -0
- data/lib/wurk/lua/loader.rb +53 -0
- data/lib/wurk/lua.rb +187 -0
- data/lib/wurk/manager.rb +132 -0
- data/lib/wurk/metrics/history.rb +151 -0
- data/lib/wurk/metrics/query.rb +173 -0
- data/lib/wurk/metrics/rollup.rb +169 -0
- data/lib/wurk/metrics/statsd.rb +197 -0
- data/lib/wurk/metrics.rb +7 -0
- data/lib/wurk/middleware/chain.rb +128 -0
- data/lib/wurk/middleware/current_attributes.rb +87 -0
- data/lib/wurk/middleware/expiry.rb +50 -0
- data/lib/wurk/middleware/i18n.rb +63 -0
- data/lib/wurk/middleware/interrupt_handler.rb +45 -0
- data/lib/wurk/middleware/poison_pill.rb +149 -0
- data/lib/wurk/middleware.rb +34 -0
- data/lib/wurk/process_set.rb +243 -0
- data/lib/wurk/processor.rb +247 -0
- data/lib/wurk/queue.rb +108 -0
- data/lib/wurk/queues.rb +80 -0
- data/lib/wurk/rails.rb +9 -0
- data/lib/wurk/railtie.rb +28 -0
- data/lib/wurk/redis_pool.rb +79 -0
- data/lib/wurk/retry_set.rb +17 -0
- data/lib/wurk/scheduled.rb +189 -0
- data/lib/wurk/scheduled_set.rb +18 -0
- data/lib/wurk/sorted_entry.rb +95 -0
- data/lib/wurk/stats.rb +190 -0
- data/lib/wurk/swarm/child_boot.rb +105 -0
- data/lib/wurk/swarm.rb +260 -0
- data/lib/wurk/testing.rb +102 -0
- data/lib/wurk/topology.rb +74 -0
- data/lib/wurk/unique.rb +240 -0
- data/lib/wurk/version.rb +5 -0
- data/lib/wurk/web/config.rb +180 -0
- data/lib/wurk/web/enterprise.rb +138 -0
- data/lib/wurk/web/search.rb +139 -0
- data/lib/wurk/web.rb +25 -0
- data/lib/wurk/work_set.rb +116 -0
- data/lib/wurk/worker/setter.rb +93 -0
- data/lib/wurk/worker.rb +216 -0
- data/lib/wurk.rb +238 -0
- data/vendor/assets/dashboard/assets/index-8P3N_m1X.js +152 -0
- data/vendor/assets/dashboard/assets/index-Bqz4_SOQ.css +1 -0
- data/vendor/assets/dashboard/index.html +13 -0
- data/vendor/assets/dashboard/wurk-manifest.json +4 -0
- metadata +232 -0
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'component'
|
|
4
|
+
require_relative 'keys'
|
|
5
|
+
require_relative 'lua'
|
|
6
|
+
require_relative 'lua/loader'
|
|
7
|
+
require_relative 'client'
|
|
8
|
+
require_relative 'process_set'
|
|
9
|
+
|
|
10
|
+
module Wurk
|
|
11
|
+
# Promotes due jobs from the `retry` and `schedule` sorted sets back onto
|
|
12
|
+
# their target queues. One Poller thread per process; collectively they
|
|
13
|
+
# drain both SETS via an atomic Lua pop-by-score (loaded via the EVALSHA
|
|
14
|
+
# cache, retried once on NOSCRIPT). Polling cadence scales with cluster
|
|
15
|
+
# size so total scheduler traffic stays constant as processes are added.
|
|
16
|
+
#
|
|
17
|
+
# Spec: docs/target/sidekiq-free.md §16. Pluggable via `config[:scheduled_enq]`.
|
|
18
|
+
module Scheduled
|
|
19
|
+
SETS = %w[retry schedule].freeze
|
|
20
|
+
|
|
21
|
+
# Atomic pop-by-score for retry/schedule. Source must match
|
|
22
|
+
# Wurk::Lua::ZPOPBYSCORE byte-for-byte — they share the same SHA.
|
|
23
|
+
LUA_ZPOPBYSCORE = Wurk::Lua::ZPOPBYSCORE
|
|
24
|
+
|
|
25
|
+
# Drains both SETS each call. Iterates per-set inside a single pooled
|
|
26
|
+
# checkout so the EVALSHA + LPUSH loop avoids re-checkout per job.
|
|
27
|
+
class Enq
|
|
28
|
+
include Component
|
|
29
|
+
|
|
30
|
+
LUA_ZPOPBYSCORE = Wurk::Lua::ZPOPBYSCORE
|
|
31
|
+
|
|
32
|
+
def initialize(container)
|
|
33
|
+
@config = container
|
|
34
|
+
@done = false
|
|
35
|
+
@client = Client.new(config: container)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Pops every due job from each sorted set and re-pushes through the
|
|
39
|
+
# client. `now` is captured once per set so a slow loop on one ZSET
|
|
40
|
+
# can't keep grabbing newly-scheduled jobs from a moving window.
|
|
41
|
+
def enqueue_jobs(sorted_sets = SETS)
|
|
42
|
+
@config.redis do |conn|
|
|
43
|
+
sorted_sets.each { |sset| drain_set(conn, sset) }
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def terminate
|
|
48
|
+
@done = true
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
def drain_set(conn, sset)
|
|
54
|
+
now = real_time.to_s
|
|
55
|
+
loop do
|
|
56
|
+
break if @done
|
|
57
|
+
|
|
58
|
+
jobstr = Wurk::Lua::Loader.eval_cached(conn, :zpopbyscore, keys: [sset], argv: [now])
|
|
59
|
+
break unless jobstr
|
|
60
|
+
|
|
61
|
+
@client.push(Wurk.load_json(jobstr))
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def real_time
|
|
66
|
+
::Process.clock_gettime(::Process::CLOCK_REALTIME)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Single thread that wakes on a randomized interval, drains both ZSETs,
|
|
71
|
+
# then sleeps again. Random spread prevents the cluster from dogpiling
|
|
72
|
+
# Redis at the top of each cadence.
|
|
73
|
+
class Poller
|
|
74
|
+
include Component
|
|
75
|
+
|
|
76
|
+
INITIAL_WAIT = 10
|
|
77
|
+
|
|
78
|
+
attr_accessor :rnd
|
|
79
|
+
|
|
80
|
+
def initialize(config)
|
|
81
|
+
@config = config
|
|
82
|
+
@enq = (config[:scheduled_enq] || Enq).new(config)
|
|
83
|
+
@done = false
|
|
84
|
+
@mutex = ::Mutex.new
|
|
85
|
+
@sleeper = ::ConditionVariable.new
|
|
86
|
+
@thread = nil
|
|
87
|
+
@rnd = ::Random.new
|
|
88
|
+
@last_cleanup_ms = 0
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Spawns the scheduler thread. INITIAL_WAIT delays the first sweep so
|
|
92
|
+
# a fleet-wide deploy doesn't have every freshly-booted process hit
|
|
93
|
+
# Redis simultaneously.
|
|
94
|
+
def start
|
|
95
|
+
@thread ||= safe_thread('scheduler') do # rubocop:disable Naming/MemoizedInstanceVariableName
|
|
96
|
+
initial_wait
|
|
97
|
+
until @done
|
|
98
|
+
enqueue
|
|
99
|
+
wait
|
|
100
|
+
end
|
|
101
|
+
logger.info('Scheduler exiting...')
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Idempotent. Wakes the sleeping thread so it observes @done and exits.
|
|
106
|
+
# Also propagates the stop signal to @enq so any in-flight drain loop
|
|
107
|
+
# short-circuits instead of running to completion.
|
|
108
|
+
def terminate
|
|
109
|
+
@mutex.synchronize do
|
|
110
|
+
@done = true
|
|
111
|
+
@enq.terminate
|
|
112
|
+
@sleeper.signal
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Called on every wake. Any raise inside the Enq is reported and the
|
|
117
|
+
# loop continues — a transient Redis blip must not kill the scheduler.
|
|
118
|
+
def enqueue
|
|
119
|
+
@enq.enqueue_jobs
|
|
120
|
+
rescue StandardError => e
|
|
121
|
+
handle_exception(e, { context: 'scheduler' })
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
private
|
|
125
|
+
|
|
126
|
+
# INITIAL_WAIT (10s) staggers the fleet's first sweep after a deploy so
|
|
127
|
+
# freshly-booted processes don't hit Redis in unison. Overridable via
|
|
128
|
+
# `config[:scheduler_initial_wait]` (tests want a near-zero first sweep).
|
|
129
|
+
def initial_wait
|
|
130
|
+
wait = @config[:scheduler_initial_wait] || INITIAL_WAIT
|
|
131
|
+
@mutex.synchronize do
|
|
132
|
+
@sleeper.wait(@mutex, wait) unless @done
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def wait
|
|
137
|
+
@mutex.synchronize do
|
|
138
|
+
@sleeper.wait(@mutex, random_poll_interval) unless @done
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# interval = process_count * average_scheduled_poll_interval
|
|
143
|
+
# <10 procs: jitter `interval * rand + interval/2`
|
|
144
|
+
# ≥10 procs: jitter `interval * rand * 2`
|
|
145
|
+
# The two regimes produce comparable expected wait times but the
|
|
146
|
+
# high-cluster form widens the spread so 100+ processes don't cluster.
|
|
147
|
+
def random_poll_interval
|
|
148
|
+
count = process_count
|
|
149
|
+
interval = poll_interval_average(count)
|
|
150
|
+
if count < 10
|
|
151
|
+
(interval * @rnd.rand) + (interval / 2.0)
|
|
152
|
+
else
|
|
153
|
+
interval * @rnd.rand * 2
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def poll_interval_average(count)
|
|
158
|
+
@config[:poll_interval_average] || scaled_poll_interval(count)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def scaled_poll_interval(count)
|
|
162
|
+
count * @config[:average_scheduled_poll_interval]
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# SCARD on the `processes` SET, floor of 1 so a freshly-booted process
|
|
166
|
+
# (not yet in the set) still computes a non-zero interval.
|
|
167
|
+
def process_count
|
|
168
|
+
pcount = cleanup
|
|
169
|
+
pcount = 1 if pcount < 1
|
|
170
|
+
pcount
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Returns the current `processes` SCARD. Rate-limited to 1/min: the
|
|
174
|
+
# full ProcessSet prune is expensive (SMEMBERS + per-id HGET), so we
|
|
175
|
+
# only invoke it when at least 60s have passed; intermediate calls
|
|
176
|
+
# just SCARD and trust the previous prune.
|
|
177
|
+
def cleanup
|
|
178
|
+
@config.redis do |conn|
|
|
179
|
+
if mono_ms - @last_cleanup_ms > 60_000
|
|
180
|
+
@last_cleanup_ms = mono_ms
|
|
181
|
+
ProcessSet.new(true).size
|
|
182
|
+
else
|
|
183
|
+
conn.call('SCARD', Keys::PROCESSES)
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'job_set'
|
|
4
|
+
|
|
5
|
+
module Wurk
|
|
6
|
+
# ZSET of jobs scheduled to run at a future time (score = epoch seconds).
|
|
7
|
+
# The scheduled-poller pops eligible members and re-enqueues via the
|
|
8
|
+
# client. Wire-compat with Sidekiq's `schedule` key.
|
|
9
|
+
#
|
|
10
|
+
# Spec: docs/target/sidekiq-free.md §19.5.
|
|
11
|
+
class ScheduledSet < JobSet
|
|
12
|
+
# Optional `name` allows tests to operate on a namespaced ZSET; production
|
|
13
|
+
# callers always use the default `'schedule'` key (wire-compat with Sidekiq).
|
|
14
|
+
def initialize(name = 'schedule')
|
|
15
|
+
super
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'job_record'
|
|
4
|
+
|
|
5
|
+
module Wurk
|
|
6
|
+
# One entry inside a sorted-set view (Retry/Scheduled/Dead). Carries the
|
|
7
|
+
# member's `score` alongside the JobRecord so callers can re-target the
|
|
8
|
+
# exact (score, value) pair when mutating Redis — sorted-set membership is
|
|
9
|
+
# by value, but ZREM-by-value is faster than ZRANGEBYSCORE+filter.
|
|
10
|
+
#
|
|
11
|
+
# The `id` field ("<score>|<jid>") is the Sidekiq wire-compat identifier
|
|
12
|
+
# used by dashboards and third-party tooling. Don't reformat it.
|
|
13
|
+
#
|
|
14
|
+
# Spec: docs/target/sidekiq-free.md §19.4.
|
|
15
|
+
class SortedEntry < JobRecord
|
|
16
|
+
attr_reader :score, :parent
|
|
17
|
+
|
|
18
|
+
# @param parent [JobSet, nil] the owning set; nil when constructed bare.
|
|
19
|
+
# @param score [Numeric] ZSET score (Float seconds since epoch).
|
|
20
|
+
# @param item [String, Hash] raw JSON or parsed payload.
|
|
21
|
+
def initialize(parent, score, item)
|
|
22
|
+
super(item)
|
|
23
|
+
@score = score.to_f
|
|
24
|
+
@parent = parent
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def id = "#{score}|#{jid}"
|
|
28
|
+
|
|
29
|
+
def at = ::Time.at(score).utc
|
|
30
|
+
|
|
31
|
+
# Removes this entry from the parent set. Prefers exact-value match
|
|
32
|
+
# (idempotent across duplicates with the same jid), falls back to
|
|
33
|
+
# (score, jid) when constructed without a cached `value`.
|
|
34
|
+
def delete
|
|
35
|
+
if @value
|
|
36
|
+
@parent.delete_by_value(@parent.name, @value)
|
|
37
|
+
else
|
|
38
|
+
@parent.delete_by_jid(@score, jid)
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# ZINCRBY to shift the score; positive deltas reschedule into the future.
|
|
43
|
+
# Sidekiq passes the absolute target time; we compute the delta here so
|
|
44
|
+
# the call survives clock skew between caller and Redis.
|
|
45
|
+
def reschedule(at) # rubocop:disable Naming/MethodParameterName
|
|
46
|
+
Wurk.redis { |conn| conn.call('ZINCRBY', @parent.name, at.to_f - @score, value) }
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Removes this entry, decrements `retry_count` by one (so the worker treats
|
|
50
|
+
# the next attempt as a re-do, not a fresh retry), and re-enqueues via the
|
|
51
|
+
# client. Wire-compat with Sidekiq's "Retry now" UI action.
|
|
52
|
+
def add_to_queue
|
|
53
|
+
remove_job do |message|
|
|
54
|
+
message['retry_count'] = message['retry_count'].to_i - 1 if message['retry_count']
|
|
55
|
+
Client.new.push(message)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Same flow as add_to_queue but keeps `retry_count` intact. Used for the
|
|
60
|
+
# "retry" action from the retry set (count was already incremented when
|
|
61
|
+
# the job entered retry; don't double-bump).
|
|
62
|
+
def retry
|
|
63
|
+
remove_job do |message|
|
|
64
|
+
Client.new.push(message)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Removes this entry from its parent set and writes it to the dead set.
|
|
69
|
+
# `notify_failure: false` because the kill is user-initiated (UI action),
|
|
70
|
+
# not a retry-exhausted event — death_handlers don't fire.
|
|
71
|
+
def kill
|
|
72
|
+
remove_job do |message|
|
|
73
|
+
DeadSet.new.kill(Wurk.dump_json(message), notify_failure: false)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def error? = !item['error_class'].nil?
|
|
78
|
+
|
|
79
|
+
private
|
|
80
|
+
|
|
81
|
+
# Pulls the message out of Redis, yields it for the caller's re-enqueue
|
|
82
|
+
# work, and returns the parsed hash. Done with the cached value when
|
|
83
|
+
# available so LREM-like ZREM matches the exact bytes.
|
|
84
|
+
# Returns nil without yielding when the parent removal fails — prevents
|
|
85
|
+
# duplicate side effects (e.g. retry pushing twice) if another caller
|
|
86
|
+
# already removed the entry.
|
|
87
|
+
def remove_job
|
|
88
|
+
message = item.dup
|
|
89
|
+
return nil unless @parent.remove_job(self)
|
|
90
|
+
|
|
91
|
+
yield message
|
|
92
|
+
message
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
data/lib/wurk/stats.rb
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'date'
|
|
4
|
+
|
|
5
|
+
module Wurk
|
|
6
|
+
# Read-only inspector for cluster state in Redis. The cheap counters are
|
|
7
|
+
# eagerly pipelined at initialize so a single instance can answer many
|
|
8
|
+
# questions without re-querying; the unbounded ones (`enqueued`,
|
|
9
|
+
# `workers_size`, `queue_summaries`) re-fetch lazily.
|
|
10
|
+
#
|
|
11
|
+
# Wire-compat is sacred: every key matches the Sidekiq OSS schema exactly.
|
|
12
|
+
# Spec: docs/target/sidekiq-free.md §19.1.
|
|
13
|
+
class Stats
|
|
14
|
+
# Sidekiq exposes this as a `Data` class. Third-party gems destructure on
|
|
15
|
+
# `name`/`size`/`latency`/`paused?`, so the shape can't change.
|
|
16
|
+
QueueSummary = Data.define(:name, :size, :latency, :paused) do
|
|
17
|
+
alias_method :paused?, :paused
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def initialize
|
|
21
|
+
fetch_stats_fast!
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def processed = @stats.fetch(:processed)
|
|
25
|
+
def failed = @stats.fetch(:failed)
|
|
26
|
+
def expired = @stats.fetch(:expired)
|
|
27
|
+
def scheduled_size = @stats.fetch(:scheduled_size)
|
|
28
|
+
def retry_size = @stats.fetch(:retry_size)
|
|
29
|
+
def dead_size = @stats.fetch(:dead_size)
|
|
30
|
+
def processes_size = @stats.fetch(:processes_size)
|
|
31
|
+
|
|
32
|
+
# Sum of LLEN across every known queue. Linear in queue count — the
|
|
33
|
+
# spec labels this "slow" upstream; don't put it on a hot path.
|
|
34
|
+
def enqueued
|
|
35
|
+
queues.each_value.sum
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Sum of the `busy` HASH field across every live process identity.
|
|
39
|
+
# Pipelined but unbounded by process count.
|
|
40
|
+
def workers_size
|
|
41
|
+
Wurk.redis do |conn|
|
|
42
|
+
identities = conn.call('SMEMBERS', Keys::PROCESSES)
|
|
43
|
+
next 0 if identities.empty?
|
|
44
|
+
|
|
45
|
+
busy = conn.pipelined do |pipe|
|
|
46
|
+
identities.each { |id| pipe.call('HGET', id, 'busy') }
|
|
47
|
+
end
|
|
48
|
+
busy.sum(&:to_i)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# @return [Hash{String=>Integer}] queue name → LLEN.
|
|
53
|
+
def queues
|
|
54
|
+
Wurk.redis do |conn|
|
|
55
|
+
names = conn.call('SMEMBERS', Keys::QUEUES_SET)
|
|
56
|
+
next {} if names.empty?
|
|
57
|
+
|
|
58
|
+
sizes = conn.pipelined do |pipe|
|
|
59
|
+
names.each { |q| pipe.call('LLEN', Keys.queue(q)) }
|
|
60
|
+
end
|
|
61
|
+
names.zip(sizes.map(&:to_i)).to_h
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# @return [Array<QueueSummary>] one per known queue.
|
|
66
|
+
def queue_summaries
|
|
67
|
+
Wurk.redis do |conn|
|
|
68
|
+
names = conn.call('SMEMBERS', Keys::QUEUES_SET)
|
|
69
|
+
next [] if names.empty?
|
|
70
|
+
|
|
71
|
+
paused_set = conn.call('SMEMBERS', 'paused')
|
|
72
|
+
results = conn.pipelined do |pipe|
|
|
73
|
+
names.each do |q|
|
|
74
|
+
pipe.call('LLEN', Keys.queue(q))
|
|
75
|
+
pipe.call('LRANGE', Keys.queue(q), -1, -1)
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
build_summaries(names, results, paused_set)
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Latency (secs) of the `default` queue — the most-asked-about gauge.
|
|
83
|
+
def default_queue_latency
|
|
84
|
+
now_ms = ::Process.clock_gettime(::Process::CLOCK_REALTIME, :millisecond)
|
|
85
|
+
payload = Wurk.redis { |c| c.call('LRANGE', Keys.queue('default'), -1, -1) }.first
|
|
86
|
+
compute_latency(payload, now_ms)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Resets the named global counters. With no args, clears `processed`,
|
|
90
|
+
# `failed`, and `expired`. SET … 0 (not DEL — keeps the key around so
|
|
91
|
+
# reads stay `Integer` not `nil`).
|
|
92
|
+
def reset(*stats)
|
|
93
|
+
all = %w[failed processed expired]
|
|
94
|
+
to_clear = stats.empty? ? all : all & stats.flatten.map(&:to_s)
|
|
95
|
+
Wurk.redis do |conn|
|
|
96
|
+
conn.pipelined do |pipe|
|
|
97
|
+
to_clear.each { |s| pipe.call('SET', "stat:#{s}", 0) }
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
private
|
|
103
|
+
|
|
104
|
+
# Single pipeline for the cheap counters. Eagerly invoked at initialize
|
|
105
|
+
# so callers can read many fields without paying per-method round trips.
|
|
106
|
+
FAST_QUERIES = [
|
|
107
|
+
['GET', 'stat:processed'],
|
|
108
|
+
['GET', 'stat:failed'],
|
|
109
|
+
['GET', Keys::STAT_EXPIRED],
|
|
110
|
+
['ZCARD', Keys::SCHEDULE],
|
|
111
|
+
['ZCARD', Keys::RETRY],
|
|
112
|
+
['ZCARD', Keys::DEAD],
|
|
113
|
+
['SCARD', Keys::PROCESSES]
|
|
114
|
+
].freeze
|
|
115
|
+
FAST_KEYS = %i[processed failed expired scheduled_size retry_size dead_size processes_size].freeze
|
|
116
|
+
private_constant :FAST_QUERIES, :FAST_KEYS
|
|
117
|
+
|
|
118
|
+
def fetch_stats_fast!
|
|
119
|
+
raw = Wurk.redis do |conn|
|
|
120
|
+
conn.pipelined { |pipe| FAST_QUERIES.each { |args| pipe.call(*args) } }
|
|
121
|
+
end
|
|
122
|
+
@stats = FAST_KEYS.zip(raw.map(&:to_i)).to_h
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def build_summaries(names, results, paused_set)
|
|
126
|
+
now_ms = ::Process.clock_gettime(::Process::CLOCK_REALTIME, :millisecond)
|
|
127
|
+
names.each_with_index.map do |name, i|
|
|
128
|
+
QueueSummary.new(
|
|
129
|
+
name: name,
|
|
130
|
+
size: results[i * 2].to_i,
|
|
131
|
+
latency: compute_latency(results[(i * 2) + 1].first, now_ms),
|
|
132
|
+
paused: paused_set.include?(name)
|
|
133
|
+
)
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# `enqueued_at` may be Float (epoch secs, legacy) or Integer (epoch ms,
|
|
138
|
+
# current). Spec §5 calls out the dual format; handle both. Malformed
|
|
139
|
+
# JSON or non-numeric `enqueued_at` shouldn't crash a dashboard read —
|
|
140
|
+
# fall back to 0.
|
|
141
|
+
def compute_latency(payload_json, now_ms)
|
|
142
|
+
return 0.0 if payload_json.nil?
|
|
143
|
+
|
|
144
|
+
enq = Float(Wurk.load_json(payload_json)['enqueued_at'] || 0)
|
|
145
|
+
enq_ms = enq < 10_000_000_000 ? enq * 1_000 : enq
|
|
146
|
+
diff = (now_ms - enq_ms) / 1_000.0
|
|
147
|
+
diff.negative? ? 0.0 : diff
|
|
148
|
+
rescue ::JSON::ParserError, ::TypeError, ::ArgumentError
|
|
149
|
+
0.0
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Per-day historical processed/failed/expired counts. Reads
|
|
153
|
+
# `stat:processed:YYYY-MM-DD`, `stat:failed:YYYY-MM-DD`, and
|
|
154
|
+
# `stat:expired:YYYY-MM-DD` strings; missing days return 0. Range
|
|
155
|
+
# 1..1825 (5 years) mirrors upstream.
|
|
156
|
+
class History
|
|
157
|
+
MAX_DAYS = 1_825
|
|
158
|
+
|
|
159
|
+
def initialize(days_previous, start_date = nil, pool: nil)
|
|
160
|
+
raise ArgumentError, "days_previous must be in 1..#{MAX_DAYS}" unless (1..MAX_DAYS).cover?(days_previous)
|
|
161
|
+
|
|
162
|
+
@days_previous = days_previous
|
|
163
|
+
@start_date = start_date || ::Date.today
|
|
164
|
+
@pool = pool
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def processed = date_stat_hash('processed')
|
|
168
|
+
def failed = date_stat_hash('failed')
|
|
169
|
+
def expired = date_stat_hash('expired')
|
|
170
|
+
|
|
171
|
+
private
|
|
172
|
+
|
|
173
|
+
def date_stat_hash(stat)
|
|
174
|
+
keys = (0...@days_previous).map { |i| (@start_date - i).strftime('%Y-%m-%d') }
|
|
175
|
+
values = with_redis do |conn|
|
|
176
|
+
conn.pipelined { |pipe| keys.each { |d| pipe.call('GET', "stat:#{stat}:#{d}") } }
|
|
177
|
+
end
|
|
178
|
+
keys.zip(values.map(&:to_i)).to_h
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def with_redis(&)
|
|
182
|
+
if @pool
|
|
183
|
+
@pool.with(&)
|
|
184
|
+
else
|
|
185
|
+
Wurk.redis(&)
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
end
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../launcher'
|
|
4
|
+
require_relative '../fetcher/reliable'
|
|
5
|
+
|
|
6
|
+
module Wurk
|
|
7
|
+
class Swarm
|
|
8
|
+
# Step 5 of the boot ordering. Runs inside each forked child:
|
|
9
|
+
# * reset signal traps inherited from the parent,
|
|
10
|
+
# * reconnect ActiveRecord (if loaded) + open a fresh Redis pool,
|
|
11
|
+
# * apply the slot's queues + concurrency to the default capsule,
|
|
12
|
+
# * install child signal handlers (TERM/INT drain, TSTP quiet,
|
|
13
|
+
# USR2 reopen logs),
|
|
14
|
+
# * launch the Wurk::Launcher and block until shutdown.
|
|
15
|
+
#
|
|
16
|
+
# Kept separate from Wurk::Swarm so the parent supervisor stays
|
|
17
|
+
# focused on PID supervision (SRP).
|
|
18
|
+
class ChildBoot
|
|
19
|
+
CHILD_SIGNALS = { 'TERM' => :term, 'INT' => :term, 'TSTP' => :tstp, 'USR2' => :usr2 }.freeze
|
|
20
|
+
|
|
21
|
+
def initialize(config, slot, index)
|
|
22
|
+
@config = config
|
|
23
|
+
@slot = slot
|
|
24
|
+
@index = index
|
|
25
|
+
@signal_queue = ::Thread::Queue.new
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def run
|
|
29
|
+
reset_inherited_signals
|
|
30
|
+
reconnect_after_fork
|
|
31
|
+
Wurk.server = true
|
|
32
|
+
apply_slot_to_config
|
|
33
|
+
launcher = Wurk::Launcher.new(@config)
|
|
34
|
+
install_signal_handlers(launcher)
|
|
35
|
+
launcher.run
|
|
36
|
+
wait_loop(launcher)
|
|
37
|
+
exit 0
|
|
38
|
+
rescue StandardError, ::Wurk::Shutdown => e
|
|
39
|
+
@config.logger.error { "swarm child ##{@index} (#{::Process.pid}) crashed: #{e.class}: #{e.message}" }
|
|
40
|
+
exit 1
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
|
|
45
|
+
# Parent installed traps for TERM/INT/TSTP/CONT/USR1 — the child
|
|
46
|
+
# needs its own behavior, not the parent's.
|
|
47
|
+
def reset_inherited_signals
|
|
48
|
+
%w[TERM INT TSTP CONT USR1 USR2].each { |s| ::Signal.trap(s, 'DEFAULT') }
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def reconnect_after_fork
|
|
52
|
+
@config.reset_redis_pools!
|
|
53
|
+
return unless defined?(::ActiveRecord::Base)
|
|
54
|
+
|
|
55
|
+
begin
|
|
56
|
+
::ActiveRecord::Base.establish_connection
|
|
57
|
+
rescue StandardError
|
|
58
|
+
nil
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def apply_slot_to_config
|
|
63
|
+
cap = @config.default_capsule
|
|
64
|
+
cap.queues = @slot.queues
|
|
65
|
+
cap.concurrency = @slot.concurrency
|
|
66
|
+
# Fetcher defaulting + lazy-ivar materialization now happens in
|
|
67
|
+
# Configuration#freeze! (Capsule#prepare!), called by Launcher#run
|
|
68
|
+
# below — for every entry point, not just the swarm.
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def install_signal_handlers(launcher)
|
|
72
|
+
CHILD_SIGNALS.each { |sig, sym| ::Signal.trap(sig) { @signal_queue << sym } }
|
|
73
|
+
@dispatcher = Thread.new { dispatch_signals(launcher) }
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# TSTP/USR2 keep looping; TERM/INT run the full launcher.stop
|
|
77
|
+
# (which blocks on manager drain) and then return — wait_loop
|
|
78
|
+
# joins this thread, so the main child thread can't `exit 0`
|
|
79
|
+
# mid-drain. Otherwise quiet would flip launcher.stopping? true
|
|
80
|
+
# and the main thread would race past the unfinished managers.
|
|
81
|
+
def dispatch_signals(launcher)
|
|
82
|
+
loop do
|
|
83
|
+
case @signal_queue.pop
|
|
84
|
+
when :term
|
|
85
|
+
launcher.stop
|
|
86
|
+
return
|
|
87
|
+
when :tstp then launcher.quiet
|
|
88
|
+
when :usr2 then reopen_logs
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def reopen_logs
|
|
94
|
+
log = @config.logger
|
|
95
|
+
log.reopen if log.respond_to?(:reopen)
|
|
96
|
+
rescue StandardError
|
|
97
|
+
nil
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def wait_loop(_launcher)
|
|
101
|
+
@dispatcher.join
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|