wurk 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +43 -0
- data/CONTRIBUTING.md +73 -0
- data/LICENSE +21 -0
- data/README.md +137 -0
- data/SECURITY.md +39 -0
- data/app/controllers/wurk/api/pagination.rb +67 -0
- data/app/controllers/wurk/api/serializers.rb +131 -0
- data/app/controllers/wurk/api_controller.rb +248 -0
- data/app/controllers/wurk/application_controller.rb +7 -0
- data/app/controllers/wurk/dashboard_controller.rb +48 -0
- data/config/locales/en.yml +15 -0
- data/config/routes.rb +34 -0
- data/exe/wurk +22 -0
- data/lib/active_job/queue_adapters/wurk_adapter.rb +96 -0
- data/lib/generators/wurk/install/install_generator.rb +22 -0
- data/lib/generators/wurk/install/templates/wurk.rb +16 -0
- data/lib/wurk/active_job/wrapper.rb +32 -0
- data/lib/wurk/api/fast.rb +78 -0
- data/lib/wurk/batch/buffer.rb +26 -0
- data/lib/wurk/batch/callback_job.rb +37 -0
- data/lib/wurk/batch/callbacks.rb +176 -0
- data/lib/wurk/batch/client_middleware.rb +27 -0
- data/lib/wurk/batch/death_handler.rb +39 -0
- data/lib/wurk/batch/empty.rb +21 -0
- data/lib/wurk/batch/server_middleware.rb +62 -0
- data/lib/wurk/batch/status.rb +140 -0
- data/lib/wurk/batch.rb +351 -0
- data/lib/wurk/batch_set.rb +67 -0
- data/lib/wurk/capsule.rb +176 -0
- data/lib/wurk/cli.rb +349 -0
- data/lib/wurk/client/buffered.rb +372 -0
- data/lib/wurk/client.rb +330 -0
- data/lib/wurk/compat.rb +136 -0
- data/lib/wurk/component.rb +136 -0
- data/lib/wurk/configuration.rb +373 -0
- data/lib/wurk/context.rb +35 -0
- data/lib/wurk/cron.rb +636 -0
- data/lib/wurk/dashboard_manifest.rb +39 -0
- data/lib/wurk/dead_set.rb +78 -0
- data/lib/wurk/deploy.rb +91 -0
- data/lib/wurk/embedded.rb +94 -0
- data/lib/wurk/encryption.rb +276 -0
- data/lib/wurk/engine.rb +81 -0
- data/lib/wurk/fetcher/reaper.rb +264 -0
- data/lib/wurk/fetcher/reliable.rb +138 -0
- data/lib/wurk/fetcher.rb +11 -0
- data/lib/wurk/health.rb +193 -0
- data/lib/wurk/heartbeat.rb +211 -0
- data/lib/wurk/iterable_job.rb +292 -0
- data/lib/wurk/job/options.rb +70 -0
- data/lib/wurk/job.rb +33 -0
- data/lib/wurk/job_logger.rb +68 -0
- data/lib/wurk/job_record.rb +156 -0
- data/lib/wurk/job_retry.rb +320 -0
- data/lib/wurk/job_set.rb +212 -0
- data/lib/wurk/job_util.rb +162 -0
- data/lib/wurk/keys.rb +52 -0
- data/lib/wurk/launcher.rb +289 -0
- data/lib/wurk/leader.rb +221 -0
- data/lib/wurk/limiter/base.rb +138 -0
- data/lib/wurk/limiter/bucket.rb +80 -0
- data/lib/wurk/limiter/concurrent.rb +132 -0
- data/lib/wurk/limiter/leaky.rb +91 -0
- data/lib/wurk/limiter/points.rb +89 -0
- data/lib/wurk/limiter/server_middleware.rb +77 -0
- data/lib/wurk/limiter/unlimited.rb +48 -0
- data/lib/wurk/limiter/window.rb +80 -0
- data/lib/wurk/limiter.rb +255 -0
- data/lib/wurk/logger.rb +81 -0
- data/lib/wurk/lua/loader.rb +53 -0
- data/lib/wurk/lua.rb +187 -0
- data/lib/wurk/manager.rb +132 -0
- data/lib/wurk/metrics/history.rb +151 -0
- data/lib/wurk/metrics/query.rb +173 -0
- data/lib/wurk/metrics/rollup.rb +169 -0
- data/lib/wurk/metrics/statsd.rb +197 -0
- data/lib/wurk/metrics.rb +7 -0
- data/lib/wurk/middleware/chain.rb +128 -0
- data/lib/wurk/middleware/current_attributes.rb +87 -0
- data/lib/wurk/middleware/expiry.rb +50 -0
- data/lib/wurk/middleware/i18n.rb +63 -0
- data/lib/wurk/middleware/interrupt_handler.rb +45 -0
- data/lib/wurk/middleware/poison_pill.rb +149 -0
- data/lib/wurk/middleware.rb +34 -0
- data/lib/wurk/process_set.rb +243 -0
- data/lib/wurk/processor.rb +247 -0
- data/lib/wurk/queue.rb +108 -0
- data/lib/wurk/queues.rb +80 -0
- data/lib/wurk/rails.rb +9 -0
- data/lib/wurk/railtie.rb +28 -0
- data/lib/wurk/redis_pool.rb +79 -0
- data/lib/wurk/retry_set.rb +17 -0
- data/lib/wurk/scheduled.rb +189 -0
- data/lib/wurk/scheduled_set.rb +18 -0
- data/lib/wurk/sorted_entry.rb +95 -0
- data/lib/wurk/stats.rb +190 -0
- data/lib/wurk/swarm/child_boot.rb +105 -0
- data/lib/wurk/swarm.rb +260 -0
- data/lib/wurk/testing.rb +102 -0
- data/lib/wurk/topology.rb +74 -0
- data/lib/wurk/unique.rb +240 -0
- data/lib/wurk/version.rb +5 -0
- data/lib/wurk/web/config.rb +180 -0
- data/lib/wurk/web/enterprise.rb +138 -0
- data/lib/wurk/web/search.rb +139 -0
- data/lib/wurk/web.rb +25 -0
- data/lib/wurk/work_set.rb +116 -0
- data/lib/wurk/worker/setter.rb +93 -0
- data/lib/wurk/worker.rb +216 -0
- data/lib/wurk.rb +238 -0
- data/vendor/assets/dashboard/assets/index-8P3N_m1X.js +152 -0
- data/vendor/assets/dashboard/assets/index-Bqz4_SOQ.css +1 -0
- data/vendor/assets/dashboard/index.html +13 -0
- data/vendor/assets/dashboard/wurk-manifest.json +4 -0
- metadata +232 -0
data/lib/wurk/swarm.rb
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'component'
|
|
4
|
+
require_relative 'launcher'
|
|
5
|
+
require_relative 'fetcher/reliable'
|
|
6
|
+
require_relative 'keys'
|
|
7
|
+
require_relative 'swarm/child_boot'
|
|
8
|
+
|
|
9
|
+
module Wurk
|
|
10
|
+
# Parent supervisor. Forks N children per the worker topology, monitors
|
|
11
|
+
# PIDs, relays signals, respawns crashed children, handles rolling
|
|
12
|
+
# restart on SIGUSR1, recycles RSS-bloated children.
|
|
13
|
+
#
|
|
14
|
+
# Boot ordering (must be exact — see docs/idea/03-process-model.md):
|
|
15
|
+
# 1. Host app boots fully; eager loads done.
|
|
16
|
+
# 2. Railtie `after_initialize` fires.
|
|
17
|
+
# 3. `boot` closes parent-side connections (Redis, ActiveRecord).
|
|
18
|
+
# 4. `boot` forks N children.
|
|
19
|
+
# 5. Each child reconnects DB + opens a fresh Redis pool, then
|
|
20
|
+
# installs its own signal handlers and starts the Launcher.
|
|
21
|
+
# 6. Parent calls `supervise` to enter the wait/relay loop.
|
|
22
|
+
#
|
|
23
|
+
# Signals (see docs/idea/04-signals.md):
|
|
24
|
+
# TERM/INT → `shutdown` (graceful drain)
|
|
25
|
+
# TSTP → relay TSTP (pause fetch)
|
|
26
|
+
# CONT → relay CONT (resume fetch)
|
|
27
|
+
# USR1 → `rolling_restart` (zero-downtime cycle)
|
|
28
|
+
class Swarm
|
|
29
|
+
include Component
|
|
30
|
+
|
|
31
|
+
SUPERVISE_TICK = 0.2
|
|
32
|
+
RESPAWN_BACKOFF = 1.0
|
|
33
|
+
HEARTBEAT_WAIT = 30
|
|
34
|
+
MEMORY_CHECK_INTERVAL = 10
|
|
35
|
+
DEFAULT_SHUTDOWN_TIMEOUT = 25
|
|
36
|
+
|
|
37
|
+
attr_reader :topology, :children
|
|
38
|
+
|
|
39
|
+
def initialize(topology:, config: Wurk.configuration, memory_limit: nil,
|
|
40
|
+
shutdown_timeout: DEFAULT_SHUTDOWN_TIMEOUT)
|
|
41
|
+
@topology = topology
|
|
42
|
+
@config = config
|
|
43
|
+
@memory_limit = memory_limit
|
|
44
|
+
@shutdown_timeout = shutdown_timeout
|
|
45
|
+
@children = {}
|
|
46
|
+
@assignments = []
|
|
47
|
+
@stopping = false
|
|
48
|
+
@last_memory_check = 0
|
|
49
|
+
@signal_queue = ::Thread::Queue.new
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# `install_signals:` is false in tests so the integration suite can
|
|
53
|
+
# drive `shutdown` / `rolling_restart` directly without poisoning the
|
|
54
|
+
# test process's signal handlers.
|
|
55
|
+
def boot(install_signals: true)
|
|
56
|
+
raise 'Wurk::Swarm already booted' unless @assignments.empty?
|
|
57
|
+
raise ArgumentError, 'Topology has no slots' if @topology.empty?
|
|
58
|
+
|
|
59
|
+
@assignments = @topology.assignments.freeze
|
|
60
|
+
close_parent_sockets
|
|
61
|
+
fork_children
|
|
62
|
+
install_signal_handlers if install_signals
|
|
63
|
+
@children.keys
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def supervise
|
|
67
|
+
until done?
|
|
68
|
+
drain_signals
|
|
69
|
+
reap_one_child
|
|
70
|
+
check_memory_pressure
|
|
71
|
+
sleep SUPERVISE_TICK
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def shutdown(timeout: @shutdown_timeout)
|
|
76
|
+
@stopping = true
|
|
77
|
+
relay_signal('TERM')
|
|
78
|
+
wait_for_children(timeout)
|
|
79
|
+
hard_kill_stragglers
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# SIGUSR1. For each existing child, fork a replacement, wait for its
|
|
83
|
+
# first heartbeat, then TERM + drain the old one. Long-running jobs
|
|
84
|
+
# in the old slot get the full shutdown_timeout while the replacement
|
|
85
|
+
# is already serving new work.
|
|
86
|
+
def rolling_restart
|
|
87
|
+
@children.dup.each do |old_pid, meta|
|
|
88
|
+
replacement = fork_child(meta[:slot], meta[:index])
|
|
89
|
+
@children[replacement] = meta
|
|
90
|
+
unless wait_for_heartbeat(replacement)
|
|
91
|
+
logger.warn do
|
|
92
|
+
"swarm: replacement #{replacement} heartbeat not seen within #{HEARTBEAT_WAIT}s; proceeding anyway"
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
safe_kill(old_pid, 'TERM')
|
|
96
|
+
wait_pid(old_pid, @shutdown_timeout)
|
|
97
|
+
@children.delete(old_pid)
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
private
|
|
102
|
+
|
|
103
|
+
# Step 3.
|
|
104
|
+
def close_parent_sockets
|
|
105
|
+
@config.reset_redis_pools!
|
|
106
|
+
close_active_record_pool
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def close_active_record_pool
|
|
110
|
+
return unless defined?(::ActiveRecord::Base)
|
|
111
|
+
|
|
112
|
+
::ActiveRecord::Base.connection_handler.clear_active_connections!
|
|
113
|
+
::ActiveRecord::Base.connection_handler.flush_idle_connections!
|
|
114
|
+
rescue StandardError => e
|
|
115
|
+
logger.warn { "swarm: ActiveRecord close failed: #{e.class}: #{e.message}" }
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Step 4.
|
|
119
|
+
def fork_children
|
|
120
|
+
@assignments.each_with_index do |slot, idx|
|
|
121
|
+
@children[fork_child(slot, idx)] = { slot: slot, index: idx }
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def fork_child(slot, idx)
|
|
126
|
+
pid = ::Process.fork
|
|
127
|
+
return pid if pid
|
|
128
|
+
|
|
129
|
+
ChildBoot.new(@config, slot, idx).run
|
|
130
|
+
exit 0 # unreachable; ChildBoot exits explicitly
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def install_signal_handlers
|
|
134
|
+
{ 'TERM' => :term, 'INT' => :term, 'TSTP' => :tstp,
|
|
135
|
+
'CONT' => :cont, 'USR1' => :usr1 }.each do |sig, sym|
|
|
136
|
+
::Signal.trap(sig) { @signal_queue << sym }
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def drain_signals
|
|
141
|
+
until @signal_queue.empty?
|
|
142
|
+
sym = next_signal_symbol
|
|
143
|
+
next if sym.nil?
|
|
144
|
+
|
|
145
|
+
case sym
|
|
146
|
+
when :term then shutdown
|
|
147
|
+
when :tstp then relay_signal('TSTP')
|
|
148
|
+
when :cont then relay_signal('CONT')
|
|
149
|
+
when :usr1 then rolling_restart
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def next_signal_symbol
|
|
155
|
+
@signal_queue.pop(true)
|
|
156
|
+
rescue ThreadError
|
|
157
|
+
nil
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def reap_one_child
|
|
161
|
+
pid, status = ::Process.wait2(-1, ::Process::WNOHANG)
|
|
162
|
+
on_child_exit(pid, status) if pid
|
|
163
|
+
rescue Errno::ECHILD
|
|
164
|
+
@stopping = true
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def on_child_exit(pid, status)
|
|
168
|
+
meta = @children.delete(pid)
|
|
169
|
+
return unless meta
|
|
170
|
+
|
|
171
|
+
if @stopping
|
|
172
|
+
logger.info { "swarm: child #{pid} exited (status=#{status.exitstatus})" }
|
|
173
|
+
else
|
|
174
|
+
logger.warn { "swarm: child #{pid} died (status=#{status.exitstatus}); respawning slot #{meta[:index]}" }
|
|
175
|
+
sleep RESPAWN_BACKOFF
|
|
176
|
+
@children[fork_child(meta[:slot], meta[:index])] = meta
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def check_memory_pressure
|
|
181
|
+
return unless @memory_limit
|
|
182
|
+
|
|
183
|
+
now = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
184
|
+
return if now - @last_memory_check < MEMORY_CHECK_INTERVAL
|
|
185
|
+
|
|
186
|
+
@last_memory_check = now
|
|
187
|
+
@children.dup.each_key { |pid| recycle_if_bloated(pid) }
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def recycle_if_bloated(pid)
|
|
191
|
+
rss = pid_rss_kb(pid)
|
|
192
|
+
return if rss.nil? || rss < @memory_limit
|
|
193
|
+
|
|
194
|
+
logger.warn { "swarm: child #{pid} RSS #{rss}KB >= #{@memory_limit}KB; recycling" }
|
|
195
|
+
safe_kill(pid, 'TERM')
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def pid_rss_kb(pid)
|
|
199
|
+
return nil unless ::File.exist?("/proc/#{pid}/statm")
|
|
200
|
+
|
|
201
|
+
::File.read("/proc/#{pid}/statm").split[1].to_i * 4
|
|
202
|
+
rescue StandardError
|
|
203
|
+
nil
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def relay_signal(sig)
|
|
207
|
+
@children.each_key { |pid| safe_kill(pid, sig) }
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def safe_kill(pid, sig)
|
|
211
|
+
::Process.kill(sig, pid)
|
|
212
|
+
rescue Errno::ESRCH, Errno::EPERM
|
|
213
|
+
nil
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def wait_pid(pid, timeout)
|
|
217
|
+
deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + timeout
|
|
218
|
+
while ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) < deadline
|
|
219
|
+
return true if ::Process.wait(pid, ::Process::WNOHANG)
|
|
220
|
+
|
|
221
|
+
sleep 0.1
|
|
222
|
+
end
|
|
223
|
+
false
|
|
224
|
+
rescue Errno::ECHILD
|
|
225
|
+
true
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def wait_for_children(timeout)
|
|
229
|
+
deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + timeout
|
|
230
|
+
while ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) < deadline && @children.any?
|
|
231
|
+
reap_one_child
|
|
232
|
+
sleep 0.1
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def hard_kill_stragglers
|
|
237
|
+
@children.each_key { |pid| safe_kill(pid, 'KILL') }
|
|
238
|
+
@children.clear
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
# Identity is `<hostname>:<pid>:<nonce>`. PROCESS_NONCE is set when
|
|
242
|
+
# Component loads in the parent and inherited by every fork — the
|
|
243
|
+
# parent can compute a child's identity from its PID alone.
|
|
244
|
+
# Returns true if the heartbeat was observed before the deadline.
|
|
245
|
+
def wait_for_heartbeat(pid) # rubocop:disable Naming/PredicateMethod
|
|
246
|
+
identity = "#{hostname}:#{pid}:#{Component::PROCESS_NONCE}"
|
|
247
|
+
deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + HEARTBEAT_WAIT
|
|
248
|
+
while ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) < deadline
|
|
249
|
+
return true if @config.redis { |c| c.call('SISMEMBER', Keys::PROCESSES, identity) } == 1
|
|
250
|
+
|
|
251
|
+
sleep 0.5
|
|
252
|
+
end
|
|
253
|
+
false
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
def done?
|
|
257
|
+
@stopping && @children.empty?
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
end
|
data/lib/wurk/testing.rb
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'queues'
|
|
4
|
+
require_relative 'middleware/chain'
|
|
5
|
+
|
|
6
|
+
module Wurk
|
|
7
|
+
# Sidekiq::Testing-compatible test harness (aliased to Sidekiq::Testing).
|
|
8
|
+
# Three modes control how `Wurk::Client#raw_push` behaves:
|
|
9
|
+
#
|
|
10
|
+
# :disable — real Redis push (the default; production behavior)
|
|
11
|
+
# :fake — payloads collected in the in-memory Wurk::Queues store
|
|
12
|
+
# :inline — jobs executed synchronously the instant they're pushed
|
|
13
|
+
#
|
|
14
|
+
# A block form switches the mode for the duration of the block on the current
|
|
15
|
+
# thread only (`fake! { ... }`); the no-block form sets it process-globally.
|
|
16
|
+
#
|
|
17
|
+
# Spec: docs/target/sidekiq-free.md §24.
|
|
18
|
+
module Testing
|
|
19
|
+
class TestModeAlreadySetError < ::RuntimeError; end
|
|
20
|
+
# Raised by `Worker.perform_one` / `drain` when no fake job is available.
|
|
21
|
+
class EmptyQueueError < ::RuntimeError; end
|
|
22
|
+
|
|
23
|
+
THREAD_KEY = :__wurk_testing_mode
|
|
24
|
+
|
|
25
|
+
class << self
|
|
26
|
+
def disable!(&) = __set_test_mode(:disable, &)
|
|
27
|
+
def fake!(&) = __set_test_mode(:fake, &)
|
|
28
|
+
def inline!(&) = __set_test_mode(:inline, &)
|
|
29
|
+
|
|
30
|
+
def disabled? = mode == :disable
|
|
31
|
+
def enabled? = !disabled?
|
|
32
|
+
def fake? = mode == :fake
|
|
33
|
+
def inline? = mode == :inline
|
|
34
|
+
|
|
35
|
+
# Thread-local override (set by a block) wins over the global mode, so a
|
|
36
|
+
# `fake! { ... }` block is isolated to the calling thread.
|
|
37
|
+
def mode
|
|
38
|
+
::Thread.current[THREAD_KEY] || @mode || :disable
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Block → thread-local for the block's duration; no block → global.
|
|
42
|
+
# Nesting block forms (`fake! { inline! { ... } }`) is rejected, matching
|
|
43
|
+
# Sidekiq 8.
|
|
44
|
+
def __set_test_mode(new_mode, &block)
|
|
45
|
+
return @mode = new_mode unless block
|
|
46
|
+
|
|
47
|
+
if ::Thread.current[THREAD_KEY]
|
|
48
|
+
raise TestModeAlreadySetError, 'Nested Sidekiq::Testing block modes are not allowed'
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
::Thread.current[THREAD_KEY] = new_mode
|
|
52
|
+
begin
|
|
53
|
+
block.call
|
|
54
|
+
ensure
|
|
55
|
+
::Thread.current[THREAD_KEY] = nil
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# In-process server-middleware chain used for inline execution. Empty by
|
|
60
|
+
# default — configure with `Sidekiq::Testing.server_middleware { |c| ... }`.
|
|
61
|
+
def server_middleware
|
|
62
|
+
@server_middleware ||= ::Wurk::Middleware::Chain.new(::Wurk.configuration)
|
|
63
|
+
yield @server_middleware if block_given?
|
|
64
|
+
@server_middleware
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# --- push hooks invoked by Wurk::Client#raw_push -------------------
|
|
68
|
+
|
|
69
|
+
# Route a push through the active test mode (only called when enabled?).
|
|
70
|
+
def dispatch_push(payloads)
|
|
71
|
+
inline? ? inline_push(payloads) : fake_push(payloads)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Collect payloads into the in-memory store. `enqueued_at` is stamped now
|
|
75
|
+
# unless the job is scheduled (`at`), mirroring the real client.
|
|
76
|
+
def fake_push(payloads)
|
|
77
|
+
now = ::Process.clock_gettime(::Process::CLOCK_REALTIME, :millisecond)
|
|
78
|
+
payloads.each do |payload|
|
|
79
|
+
payload['enqueued_at'] = now unless payload['at']
|
|
80
|
+
::Wurk::Queues.push(payload['queue'], payload['class'], payload)
|
|
81
|
+
end
|
|
82
|
+
payloads.last['jid']
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Execute each payload immediately through the inline server chain.
|
|
86
|
+
def inline_push(payloads)
|
|
87
|
+
payloads.each { |payload| ::Object.const_get(payload['class'].to_s).process_job(payload) }
|
|
88
|
+
payloads.last['jid']
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Run every fake job across all classes until the store is empty.
|
|
92
|
+
def drain_all
|
|
93
|
+
count = 0
|
|
94
|
+
while (job = ::Wurk::Queues.shift_any)
|
|
95
|
+
::Object.const_get(job['class'].to_s).process_job(job)
|
|
96
|
+
count += 1
|
|
97
|
+
end
|
|
98
|
+
count
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Wurk
|
|
4
|
+
# Worker topology DSL (Wurk extension on top of Ent's flat swarm).
|
|
5
|
+
# Lets users declare specialized slots: e.g. 2 forks dedicated to the
|
|
6
|
+
# critical queue with low concurrency, 2 forks for bulk + low with high
|
|
7
|
+
# concurrency. Stronger queue isolation than a flat swarm.
|
|
8
|
+
#
|
|
9
|
+
# Each Slot describes a *kind* of fork; `count` is how many identical
|
|
10
|
+
# forks of that kind to spawn. Swarm consumes `assignments` (the flat
|
|
11
|
+
# list of forks to spawn, in order) so a slot with count=2 yields two
|
|
12
|
+
# assignment entries pointing at the same Slot.
|
|
13
|
+
#
|
|
14
|
+
# See docs/idea/03-process-model.md §Worker topology.
|
|
15
|
+
class Topology
|
|
16
|
+
# `:count` shadows Struct#count by design — Slot is a kw-init data
|
|
17
|
+
# carrier and the slot's child-count is the field users read.
|
|
18
|
+
Slot = Struct.new(:count, :queues, :concurrency, keyword_init: true) do # rubocop:disable Lint/StructNewOverride
|
|
19
|
+
def to_h
|
|
20
|
+
{ count: count, queues: queues, concurrency: concurrency }
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def initialize
|
|
25
|
+
@slots = []
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Declare one slot kind. Returns self so calls chain.
|
|
29
|
+
def slot(count:, queues:, concurrency:)
|
|
30
|
+
queue_list = validate_slot!(count, queues, concurrency)
|
|
31
|
+
@slots << Slot.new(count: count, queues: queue_list, concurrency: concurrency).freeze
|
|
32
|
+
self
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def slots
|
|
36
|
+
@slots.dup.freeze
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def empty?
|
|
40
|
+
@slots.empty?
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Flat ordered list of slots to fork, one per child process. A slot
|
|
44
|
+
# with count=N contributes N entries.
|
|
45
|
+
def assignments
|
|
46
|
+
@slots.flat_map { |s| Array.new(s.count, s) }
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def total_processes
|
|
50
|
+
@slots.sum(&:count)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Convenience: build a flat topology of `count` identical forks
|
|
54
|
+
# consuming `queues` with `concurrency` threads each. Used by the
|
|
55
|
+
# railtie when the host hasn't declared a custom topology.
|
|
56
|
+
def self.flat(count:, queues:, concurrency:)
|
|
57
|
+
new.slot(count: count, queues: queues, concurrency: concurrency)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
def validate_slot!(count, queues, concurrency)
|
|
63
|
+
raise ArgumentError, "count must be > 0 (got #{count.inspect})" unless count.is_a?(Integer) && count.positive?
|
|
64
|
+
unless concurrency.is_a?(Integer) && concurrency.positive?
|
|
65
|
+
raise ArgumentError, "concurrency must be > 0 (got #{concurrency.inspect})"
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
queue_list = Array(queues).map(&:to_s)
|
|
69
|
+
raise ArgumentError, 'queues cannot be empty' if queue_list.empty?
|
|
70
|
+
|
|
71
|
+
queue_list.freeze
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
data/lib/wurk/unique.rb
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'digest'
|
|
5
|
+
require_relative 'middleware'
|
|
6
|
+
|
|
7
|
+
module Wurk
|
|
8
|
+
# Sidekiq Enterprise unique jobs. Best-effort dedup at enqueue time keyed
|
|
9
|
+
# by a SHA256 digest of `[class, queue, args]` (overridable via
|
|
10
|
+
# `sidekiq_unique_context`). Three lock-release strategies:
|
|
11
|
+
#
|
|
12
|
+
# * `unique_until: :success` (default) — lock retained through retries;
|
|
13
|
+
# server middleware DELs it on successful perform. Surviving across
|
|
14
|
+
# a process crash is bounded by `unique_for` TTL.
|
|
15
|
+
# * `unique_until: :start` — server middleware DELs the lock right
|
|
16
|
+
# *before* invoking perform; a duplicate can be enqueued while the
|
|
17
|
+
# first is running.
|
|
18
|
+
#
|
|
19
|
+
# Wire-compat (§3.9): single-key Redis layout — `unique:<sha256>` STRING
|
|
20
|
+
# holding the owning JID. Scheduled jobs extend the TTL by the delay so
|
|
21
|
+
# the lock covers the entire wait+execution window (§3.4).
|
|
22
|
+
#
|
|
23
|
+
# Spec: docs/target/sidekiq-ent.md §3.
|
|
24
|
+
module Unique
|
|
25
|
+
KEY_PREFIX = 'unique:'
|
|
26
|
+
DEFAULT_UNTIL = :success
|
|
27
|
+
VALID_UNTIL = %i[success start].freeze
|
|
28
|
+
|
|
29
|
+
# `Sidekiq::Enterprise.unique!` flips this on. The middleware pair is
|
|
30
|
+
# always loaded (so worker `sidekiq_options unique_for:` is a no-op
|
|
31
|
+
# without `unique!`) — only when the flag is set does the client
|
|
32
|
+
# middleware actually compute and SETNX the digest.
|
|
33
|
+
class << self
|
|
34
|
+
def enabled?
|
|
35
|
+
@enabled == true
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def enable! # rubocop:disable Naming/PredicateMethod
|
|
39
|
+
@enabled = true
|
|
40
|
+
register_middleware!
|
|
41
|
+
true
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Test helper — not part of the public Sidekiq surface. Clears the
|
|
45
|
+
# flag so per-test enable!/disable! does not leak across runs.
|
|
46
|
+
def disable!
|
|
47
|
+
@enabled = false
|
|
48
|
+
nil
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Compute the lock key for an arbitrary `(queue, klass, args)` triple.
|
|
52
|
+
# Used by both the client middleware and the public `locked?` probe so
|
|
53
|
+
# they cannot drift.
|
|
54
|
+
def lock_key(klass, queue, args)
|
|
55
|
+
context = [klass.to_s, queue.to_s, args]
|
|
56
|
+
"#{KEY_PREFIX}#{Digest::SHA256.hexdigest(JSON.dump(context))}"
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Compute the lock key from a job payload, honoring
|
|
60
|
+
# `sidekiq_unique_context` when the worker class is loaded and
|
|
61
|
+
# defines it.
|
|
62
|
+
def lock_key_for(job)
|
|
63
|
+
context = unique_context(job)
|
|
64
|
+
"#{KEY_PREFIX}#{Digest::SHA256.hexdigest(JSON.dump(context))}"
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Default: `[class, queue, args]`. Workers may override by defining
|
|
68
|
+
# `self.sidekiq_unique_context(job)` returning any JSON-serializable
|
|
69
|
+
# value (e.g. a subset of args). Spec §3.5.
|
|
70
|
+
def unique_context(job)
|
|
71
|
+
klass = resolve_class(job['class'])
|
|
72
|
+
if klass.respond_to?(:sidekiq_unique_context)
|
|
73
|
+
klass.sidekiq_unique_context(job)
|
|
74
|
+
else
|
|
75
|
+
[job['class'], job['queue'], job['args']]
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
private
|
|
80
|
+
|
|
81
|
+
def resolve_class(name)
|
|
82
|
+
return nil if name.nil? || name.to_s.empty?
|
|
83
|
+
|
|
84
|
+
::Object.const_get(name.to_s)
|
|
85
|
+
rescue ::NameError
|
|
86
|
+
nil
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def register_middleware!
|
|
90
|
+
Wurk.configuration.client_middleware.add(ClientMiddleware) \
|
|
91
|
+
unless Wurk.configuration.client_middleware.exists?(ClientMiddleware)
|
|
92
|
+
Wurk.configuration.server_middleware.add(ServerMiddleware) \
|
|
93
|
+
unless Wurk.configuration.server_middleware.exists?(ServerMiddleware)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Coerce `unique_for` to a numeric seconds value. Accepts Integer,
|
|
98
|
+
# Numeric, ActiveSupport::Duration (any `to_i`-respondent), or `false`
|
|
99
|
+
# (skip). Returns nil when uniqueness should be skipped.
|
|
100
|
+
def self.coerce_ttl(value)
|
|
101
|
+
return nil if value.nil? || value == false
|
|
102
|
+
return value if value.is_a?(Integer) && value.positive?
|
|
103
|
+
return value.to_i if value.is_a?(Numeric)
|
|
104
|
+
return value.to_i if duration_like?(value)
|
|
105
|
+
|
|
106
|
+
nil
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def self.duration_like?(value)
|
|
110
|
+
return false unless value.respond_to?(:to_i)
|
|
111
|
+
|
|
112
|
+
value.respond_to?(:since) || value.class.name.to_s.include?('Duration')
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# ------------------------------------------------------------------
|
|
116
|
+
# Introspection — `Sidekiq::Enterprise::Unique.locked?`
|
|
117
|
+
# ------------------------------------------------------------------
|
|
118
|
+
|
|
119
|
+
# @return [String, nil] owning jid, or nil when the lock is free.
|
|
120
|
+
def self.locked?(queue_or_klass, klass_or_args = nil, args = nil)
|
|
121
|
+
queue, klass, payload = normalize_locked_args(queue_or_klass, klass_or_args, args)
|
|
122
|
+
key = lock_key(klass, queue, payload)
|
|
123
|
+
Wurk.redis { |c| c.call('GET', key) }
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Accepts either `(klass, args)` or `(queue, klass, args)`. Without a
|
|
127
|
+
# queue the default Wurk job queue is assumed — matches the Sidekiq
|
|
128
|
+
# Ent docs §3.6.
|
|
129
|
+
def self.normalize_locked_args(first, second, third)
|
|
130
|
+
if third.nil?
|
|
131
|
+
[Wurk.default_job_options['queue'] || 'default', first, Array(second)]
|
|
132
|
+
else
|
|
133
|
+
[first.to_s, second, Array(third)]
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
private_class_method :normalize_locked_args
|
|
137
|
+
|
|
138
|
+
# ------------------------------------------------------------------
|
|
139
|
+
# Client middleware — SETNX lock at push time.
|
|
140
|
+
# ------------------------------------------------------------------
|
|
141
|
+
#
|
|
142
|
+
# Drops the duplicate by returning nil from the chain (Wurk::Client
|
|
143
|
+
# treats nil as "halted"; the caller's `perform_async` returns nil
|
|
144
|
+
# JID). Logs the holder JID for debuggability.
|
|
145
|
+
class ClientMiddleware
|
|
146
|
+
include Wurk::Middleware::ClientMiddleware
|
|
147
|
+
|
|
148
|
+
def call(_worker, job, _queue, redis_pool, &)
|
|
149
|
+
return yield unless Wurk::Unique.enabled?
|
|
150
|
+
|
|
151
|
+
ttl = effective_ttl(job)
|
|
152
|
+
return yield if ttl.nil?
|
|
153
|
+
|
|
154
|
+
acquire_or_drop(redis_pool, job, Wurk::Unique.lock_key_for(job), ttl, &)
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
private
|
|
158
|
+
|
|
159
|
+
# Add `at - now` delay to the base TTL so a scheduled job's lock
|
|
160
|
+
# spans the wait + execution window (§3.4). Returns nil when the
|
|
161
|
+
# job opts out (`unique_for: false` / missing).
|
|
162
|
+
def effective_ttl(job)
|
|
163
|
+
base = Wurk::Unique.coerce_ttl(job['unique_for'])
|
|
164
|
+
return nil if base.nil?
|
|
165
|
+
return base unless job['at']
|
|
166
|
+
|
|
167
|
+
delay = (job['at'].to_f - ::Time.now.to_f).ceil
|
|
168
|
+
delay.positive? ? base + delay : base
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def acquire_or_drop(pool, job, key, ttl)
|
|
172
|
+
pool.with do |conn|
|
|
173
|
+
return yield if conn.call('SET', key, job['jid'], 'NX', 'EX', ttl) == 'OK'
|
|
174
|
+
|
|
175
|
+
log_duplicate(job, conn.call('GET', key))
|
|
176
|
+
end
|
|
177
|
+
nil
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def log_duplicate(job, holder)
|
|
181
|
+
return unless Wurk.logger
|
|
182
|
+
|
|
183
|
+
msg = "Wurk::Unique: duplicate #{job['class']} dropped " \
|
|
184
|
+
"(jid=#{job['jid']} blocked by jid=#{holder || '?'})"
|
|
185
|
+
Wurk.logger.info { msg }
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# ------------------------------------------------------------------
|
|
190
|
+
# Server middleware — release lock per `unique_until` strategy.
|
|
191
|
+
# ------------------------------------------------------------------
|
|
192
|
+
#
|
|
193
|
+
# `:start` → DEL before perform. Lock-after-this-point not held; a
|
|
194
|
+
# duplicate can be re-enqueued while the first runs.
|
|
195
|
+
# `:success` → DEL only on successful return. Retries keep the lock.
|
|
196
|
+
# Spec §3.7: a raise during perform leaves the lock so
|
|
197
|
+
# the retry can proceed; the TTL bounds the worst case.
|
|
198
|
+
class ServerMiddleware
|
|
199
|
+
include Wurk::Middleware::ServerMiddleware
|
|
200
|
+
|
|
201
|
+
def call(_worker, job, _queue)
|
|
202
|
+
return yield unless Wurk::Unique.enabled? && Wurk::Unique.coerce_ttl(job['unique_for'])
|
|
203
|
+
|
|
204
|
+
mode = unique_until(job)
|
|
205
|
+
key = Wurk::Unique.lock_key_for(job)
|
|
206
|
+
|
|
207
|
+
if mode == :start
|
|
208
|
+
release(key, job['jid'])
|
|
209
|
+
yield
|
|
210
|
+
else
|
|
211
|
+
result = yield
|
|
212
|
+
release(key, job['jid'])
|
|
213
|
+
result
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
private
|
|
218
|
+
|
|
219
|
+
# Honor `unique_until: :start | :success`, fall back to default.
|
|
220
|
+
def unique_until(job)
|
|
221
|
+
raw = job['unique_until']
|
|
222
|
+
return DEFAULT_UNTIL if raw.nil?
|
|
223
|
+
|
|
224
|
+
sym = raw.to_sym
|
|
225
|
+
VALID_UNTIL.include?(sym) ? sym : DEFAULT_UNTIL
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# CAS DEL: only drop the key if the owning JID still matches ours.
|
|
229
|
+
# Prevents a long-overdue retry from releasing a fresh lock held by
|
|
230
|
+
# a re-enqueued duplicate after the original TTL expired.
|
|
231
|
+
def release(key, jid)
|
|
232
|
+
redis_pool.with do |conn|
|
|
233
|
+
conn.call('DEL', key) if conn.call('GET', key) == jid
|
|
234
|
+
end
|
|
235
|
+
rescue StandardError => e
|
|
236
|
+
Wurk.logger&.warn { "Wurk::Unique release failed: #{e.class}: #{e.message}" }
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
end
|