wurk 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +43 -0
- data/CONTRIBUTING.md +73 -0
- data/LICENSE +21 -0
- data/README.md +137 -0
- data/SECURITY.md +39 -0
- data/app/controllers/wurk/api/pagination.rb +67 -0
- data/app/controllers/wurk/api/serializers.rb +131 -0
- data/app/controllers/wurk/api_controller.rb +248 -0
- data/app/controllers/wurk/application_controller.rb +7 -0
- data/app/controllers/wurk/dashboard_controller.rb +48 -0
- data/config/locales/en.yml +15 -0
- data/config/routes.rb +34 -0
- data/exe/wurk +22 -0
- data/lib/active_job/queue_adapters/wurk_adapter.rb +96 -0
- data/lib/generators/wurk/install/install_generator.rb +22 -0
- data/lib/generators/wurk/install/templates/wurk.rb +16 -0
- data/lib/wurk/active_job/wrapper.rb +32 -0
- data/lib/wurk/api/fast.rb +78 -0
- data/lib/wurk/batch/buffer.rb +26 -0
- data/lib/wurk/batch/callback_job.rb +37 -0
- data/lib/wurk/batch/callbacks.rb +176 -0
- data/lib/wurk/batch/client_middleware.rb +27 -0
- data/lib/wurk/batch/death_handler.rb +39 -0
- data/lib/wurk/batch/empty.rb +21 -0
- data/lib/wurk/batch/server_middleware.rb +62 -0
- data/lib/wurk/batch/status.rb +140 -0
- data/lib/wurk/batch.rb +351 -0
- data/lib/wurk/batch_set.rb +67 -0
- data/lib/wurk/capsule.rb +176 -0
- data/lib/wurk/cli.rb +349 -0
- data/lib/wurk/client/buffered.rb +372 -0
- data/lib/wurk/client.rb +330 -0
- data/lib/wurk/compat.rb +136 -0
- data/lib/wurk/component.rb +136 -0
- data/lib/wurk/configuration.rb +373 -0
- data/lib/wurk/context.rb +35 -0
- data/lib/wurk/cron.rb +636 -0
- data/lib/wurk/dashboard_manifest.rb +39 -0
- data/lib/wurk/dead_set.rb +78 -0
- data/lib/wurk/deploy.rb +91 -0
- data/lib/wurk/embedded.rb +94 -0
- data/lib/wurk/encryption.rb +276 -0
- data/lib/wurk/engine.rb +81 -0
- data/lib/wurk/fetcher/reaper.rb +264 -0
- data/lib/wurk/fetcher/reliable.rb +138 -0
- data/lib/wurk/fetcher.rb +11 -0
- data/lib/wurk/health.rb +193 -0
- data/lib/wurk/heartbeat.rb +211 -0
- data/lib/wurk/iterable_job.rb +292 -0
- data/lib/wurk/job/options.rb +70 -0
- data/lib/wurk/job.rb +33 -0
- data/lib/wurk/job_logger.rb +68 -0
- data/lib/wurk/job_record.rb +156 -0
- data/lib/wurk/job_retry.rb +320 -0
- data/lib/wurk/job_set.rb +212 -0
- data/lib/wurk/job_util.rb +162 -0
- data/lib/wurk/keys.rb +52 -0
- data/lib/wurk/launcher.rb +289 -0
- data/lib/wurk/leader.rb +221 -0
- data/lib/wurk/limiter/base.rb +138 -0
- data/lib/wurk/limiter/bucket.rb +80 -0
- data/lib/wurk/limiter/concurrent.rb +132 -0
- data/lib/wurk/limiter/leaky.rb +91 -0
- data/lib/wurk/limiter/points.rb +89 -0
- data/lib/wurk/limiter/server_middleware.rb +77 -0
- data/lib/wurk/limiter/unlimited.rb +48 -0
- data/lib/wurk/limiter/window.rb +80 -0
- data/lib/wurk/limiter.rb +255 -0
- data/lib/wurk/logger.rb +81 -0
- data/lib/wurk/lua/loader.rb +53 -0
- data/lib/wurk/lua.rb +187 -0
- data/lib/wurk/manager.rb +132 -0
- data/lib/wurk/metrics/history.rb +151 -0
- data/lib/wurk/metrics/query.rb +173 -0
- data/lib/wurk/metrics/rollup.rb +169 -0
- data/lib/wurk/metrics/statsd.rb +197 -0
- data/lib/wurk/metrics.rb +7 -0
- data/lib/wurk/middleware/chain.rb +128 -0
- data/lib/wurk/middleware/current_attributes.rb +87 -0
- data/lib/wurk/middleware/expiry.rb +50 -0
- data/lib/wurk/middleware/i18n.rb +63 -0
- data/lib/wurk/middleware/interrupt_handler.rb +45 -0
- data/lib/wurk/middleware/poison_pill.rb +149 -0
- data/lib/wurk/middleware.rb +34 -0
- data/lib/wurk/process_set.rb +243 -0
- data/lib/wurk/processor.rb +247 -0
- data/lib/wurk/queue.rb +108 -0
- data/lib/wurk/queues.rb +80 -0
- data/lib/wurk/rails.rb +9 -0
- data/lib/wurk/railtie.rb +28 -0
- data/lib/wurk/redis_pool.rb +79 -0
- data/lib/wurk/retry_set.rb +17 -0
- data/lib/wurk/scheduled.rb +189 -0
- data/lib/wurk/scheduled_set.rb +18 -0
- data/lib/wurk/sorted_entry.rb +95 -0
- data/lib/wurk/stats.rb +190 -0
- data/lib/wurk/swarm/child_boot.rb +105 -0
- data/lib/wurk/swarm.rb +260 -0
- data/lib/wurk/testing.rb +102 -0
- data/lib/wurk/topology.rb +74 -0
- data/lib/wurk/unique.rb +240 -0
- data/lib/wurk/version.rb +5 -0
- data/lib/wurk/web/config.rb +180 -0
- data/lib/wurk/web/enterprise.rb +138 -0
- data/lib/wurk/web/search.rb +139 -0
- data/lib/wurk/web.rb +25 -0
- data/lib/wurk/work_set.rb +116 -0
- data/lib/wurk/worker/setter.rb +93 -0
- data/lib/wurk/worker.rb +216 -0
- data/lib/wurk.rb +238 -0
- data/vendor/assets/dashboard/assets/index-8P3N_m1X.js +152 -0
- data/vendor/assets/dashboard/assets/index-Bqz4_SOQ.css +1 -0
- data/vendor/assets/dashboard/index.html +13 -0
- data/vendor/assets/dashboard/wurk-manifest.json +4 -0
- metadata +232 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'securerandom'
|
|
5
|
+
|
|
6
|
+
module Wurk
|
|
7
|
+
# Mixin shared by Wurk::Client (and Wurk::Job::Setter) to validate, normalize,
|
|
8
|
+
# and JSON-verify job payloads before they hit Redis.
|
|
9
|
+
#
|
|
10
|
+
# Spec: docs/target/sidekiq-free.md §9 (Sidekiq::JobUtil).
|
|
11
|
+
module JobUtil # rubocop:disable Metrics/ModuleLength
|
|
12
|
+
# Top-level keys stripped from every payload before raw_push. Mutable so
|
|
13
|
+
# Pro/Ent/extension code (e.g. TransactionAwareClient adding "client_class")
|
|
14
|
+
# can append at load time without monkey-patching.
|
|
15
|
+
TRANSIENT_ATTRIBUTES = [] # rubocop:disable Style/MutableConstant
|
|
16
|
+
|
|
17
|
+
RETRY_FOR_MAX = 1_000_000_000
|
|
18
|
+
|
|
19
|
+
# @raise [ArgumentError] if the payload is structurally invalid.
|
|
20
|
+
def validate(item)
|
|
21
|
+
raise(ArgumentError, "Job must be a Hash with 'class' and 'args' keys: `#{item}`") unless valid_shape?(item)
|
|
22
|
+
raise(ArgumentError, "Job args must be an Array: `#{item}`") unless item['args'].is_a?(Array)
|
|
23
|
+
raise(ArgumentError, "Job class must be a Class or String: `#{item}`") unless valid_class?(item['class'])
|
|
24
|
+
raise(ArgumentError, "Job 'at' must be a Numeric timestamp: `#{item}`") unless valid_at?(item)
|
|
25
|
+
raise(ArgumentError, "Job tags must be an Array: `#{item}`") unless valid_tags?(item)
|
|
26
|
+
return if valid_retry_for?(item)
|
|
27
|
+
|
|
28
|
+
raise(ArgumentError, "Job retry_for over #{RETRY_FOR_MAX} is unreasonable: `#{item}`")
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Walk args; report the first non-JSON-native value according to the
|
|
32
|
+
# configured strict mode. Hash keys must be Strings.
|
|
33
|
+
def verify_json(item)
|
|
34
|
+
mode = Wurk.strict_args_mode
|
|
35
|
+
return if mode == false
|
|
36
|
+
|
|
37
|
+
offender = json_unsafe(item['args'])
|
|
38
|
+
return if offender.nil?
|
|
39
|
+
|
|
40
|
+
report_unsafe(item, offender, mode)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Validate → merge class/default options → stringify → assign jid &
|
|
44
|
+
# created_at → strip transient keys. Returns the canonical payload.
|
|
45
|
+
def normalize_item(item)
|
|
46
|
+
validate(item)
|
|
47
|
+
normalized = class_defaults_for(item['class']).merge(item)
|
|
48
|
+
normalized = wrap_options(normalized)
|
|
49
|
+
stringify_identity!(normalized, item['class'])
|
|
50
|
+
finalize(normalized)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def now_in_millis
|
|
54
|
+
::Process.clock_gettime(::Process::CLOCK_REALTIME, :millisecond)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
private
|
|
58
|
+
|
|
59
|
+
def valid_shape?(item) = item.is_a?(Hash) && item.key?('class') && item.key?('args')
|
|
60
|
+
def valid_class?(klass) = klass.is_a?(Class) || klass.is_a?(String)
|
|
61
|
+
def valid_at?(item) = !item.key?('at') || item['at'].is_a?(Numeric)
|
|
62
|
+
def valid_tags?(item) = !item.key?('tags') || item['tags'].is_a?(Array)
|
|
63
|
+
|
|
64
|
+
def valid_retry_for?(item)
|
|
65
|
+
return true unless item.key?('retry_for')
|
|
66
|
+
|
|
67
|
+
value = item['retry_for']
|
|
68
|
+
parsed = numeric_retry_for(value)
|
|
69
|
+
return false if parsed.nil?
|
|
70
|
+
|
|
71
|
+
parsed <= RETRY_FOR_MAX
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def numeric_retry_for(value)
|
|
75
|
+
case value
|
|
76
|
+
when Integer then value
|
|
77
|
+
when Numeric then value.to_i
|
|
78
|
+
when String then (Integer(value, 10) if value.match?(/\A-?\d+\z/))
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def class_defaults_for(job_class)
|
|
83
|
+
respondable_class?(job_class) ? job_class.get_sidekiq_options : Wurk.default_job_options
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def respondable_class?(klass)
|
|
87
|
+
klass.is_a?(Class) && klass.respond_to?(:get_sidekiq_options)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def wrap_options(normalized)
|
|
91
|
+
wrapped = normalized['wrapped']
|
|
92
|
+
respondable_class?(wrapped) ? wrapped.get_sidekiq_options.merge(normalized) : normalized
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def stringify_identity!(normalized, job_class)
|
|
96
|
+
normalized['class'] = job_class.to_s
|
|
97
|
+
normalized['queue'] = normalized['queue'].to_s
|
|
98
|
+
return unless normalized['queue'].empty?
|
|
99
|
+
|
|
100
|
+
raise ArgumentError, "Job must include a non-empty queue name: `#{normalized}`"
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def finalize(normalized)
|
|
104
|
+
TRANSIENT_ATTRIBUTES.each { |k| normalized.delete(k) }
|
|
105
|
+
normalized['jid'] ||= SecureRandom.hex(12)
|
|
106
|
+
normalized['retry_for'] = numeric_retry_for(normalized['retry_for']) if normalized.key?('retry_for')
|
|
107
|
+
normalized['created_at'] ||= now_in_millis
|
|
108
|
+
stamp_expiry(normalized)
|
|
109
|
+
normalized
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Pro `expires_in:` → absolute epoch-float `expiry` resolved once at push,
|
|
113
|
+
# so the server middleware doesn't redo the math. Spec: sidekiq-pro.md §7.
|
|
114
|
+
# nil.respond_to?(:to_f) is true on modern Ruby (returns 0.0), so we must
|
|
115
|
+
# gate on a non-nil duration before coercing.
|
|
116
|
+
def stamp_expiry(item)
|
|
117
|
+
d = item['expires_in']
|
|
118
|
+
return if d.nil?
|
|
119
|
+
|
|
120
|
+
item['expiry'] ||= (item['created_at'].to_f / 1000.0) + d.to_f if d.respond_to?(:to_f)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def report_unsafe(item, offender, mode)
|
|
124
|
+
job_class = item['wrapped'] || item['class']
|
|
125
|
+
msg = "Job arguments to #{job_class} must be native JSON types, " \
|
|
126
|
+
"but #{offender.inspect} is a #{offender.class}. " \
|
|
127
|
+
'See https://github.com/sidekiq/sidekiq/wiki/Best-Practices'
|
|
128
|
+
case mode
|
|
129
|
+
when :raise then raise ArgumentError, msg
|
|
130
|
+
when :warn then Wurk.logger.warn(msg)
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Returns the first offending value, or nil when the tree is JSON-native.
|
|
135
|
+
def json_unsafe(obj)
|
|
136
|
+
case obj
|
|
137
|
+
when String, Integer, Float, TrueClass, FalseClass, NilClass then nil
|
|
138
|
+
when Array then json_unsafe_array(obj)
|
|
139
|
+
when Hash then json_unsafe_hash(obj)
|
|
140
|
+
else obj
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def json_unsafe_array(arr)
|
|
145
|
+
arr.each do |v|
|
|
146
|
+
bad = json_unsafe(v)
|
|
147
|
+
return bad unless bad.nil?
|
|
148
|
+
end
|
|
149
|
+
nil
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def json_unsafe_hash(hash)
|
|
153
|
+
hash.each do |k, v|
|
|
154
|
+
return k unless k.is_a?(String)
|
|
155
|
+
|
|
156
|
+
bad = json_unsafe(v)
|
|
157
|
+
return bad unless bad.nil?
|
|
158
|
+
end
|
|
159
|
+
nil
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
data/lib/wurk/keys.rb
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Wurk
|
|
4
|
+
# Canonical Redis key constants. Wire-compat is sacred: these strings are
|
|
5
|
+
# the on-disk schema shared with Sidekiq OSS and every third-party gem that
|
|
6
|
+
# reads Redis directly (sidekiq-cron, sidekiq-unique-jobs, etc.). Renaming
|
|
7
|
+
# or namespacing any of them silently breaks the drop-in contract.
|
|
8
|
+
#
|
|
9
|
+
# OSS uses no namespace. Pro/Ent layer a prefix on top; that lives outside
|
|
10
|
+
# the free gem.
|
|
11
|
+
#
|
|
12
|
+
# Spec: docs/target/sidekiq-free.md §1 (Redis Key Schema).
|
|
13
|
+
module Keys
|
|
14
|
+
# Queue list keys: `queue:<name>` (LIST, LPUSH/BRPOP).
|
|
15
|
+
QUEUE_PREFIX = 'queue:'
|
|
16
|
+
|
|
17
|
+
# Set of known queue names, without the `queue:` prefix.
|
|
18
|
+
QUEUES_SET = 'queues'
|
|
19
|
+
|
|
20
|
+
# Set of paused queue names (Pro feature; Wurk ships it free).
|
|
21
|
+
# Members are unprefixed queue names. Fetchers exclude these on each pass.
|
|
22
|
+
PAUSED_SET = 'paused'
|
|
23
|
+
|
|
24
|
+
# Sorted sets keyed by score = unix epoch float seconds.
|
|
25
|
+
SCHEDULE = 'schedule'
|
|
26
|
+
RETRY = 'retry'
|
|
27
|
+
DEAD = 'dead'
|
|
28
|
+
|
|
29
|
+
# Live process identities (heartbeat membership).
|
|
30
|
+
PROCESSES = 'processes'
|
|
31
|
+
|
|
32
|
+
# Global processed counter; per-day variants append `:YYYY-MM-DD`.
|
|
33
|
+
STAT_PROCESSED = 'stat:processed'
|
|
34
|
+
|
|
35
|
+
# Global expired counter — subset of processed: jobs the Expiry server
|
|
36
|
+
# middleware dropped before `perform` because `expiry` had already
|
|
37
|
+
# elapsed. Per-day variants append `:YYYY-MM-DD`. Spec: sidekiq-pro.md §7.
|
|
38
|
+
STAT_EXPIRED = 'stat:expired'
|
|
39
|
+
|
|
40
|
+
# TTL applied to per-day `stat:processed:*` / `stat:failed:*` /
|
|
41
|
+
# `stat:expired:*` strings. 5 years, in seconds. Matches
|
|
42
|
+
# Sidekiq::Launcher::STATS_TTL.
|
|
43
|
+
STATS_TTL = 5 * 365 * 24 * 60 * 60
|
|
44
|
+
|
|
45
|
+
# Build a queue list key from a queue name. Centralizing the concat keeps
|
|
46
|
+
# the prefix in one place even though it's a constant — third-party gems
|
|
47
|
+
# that grep for `"queue:"` still find it via the constant.
|
|
48
|
+
def self.queue(name)
|
|
49
|
+
"#{QUEUE_PREFIX}#{name}"
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'component'
|
|
4
|
+
require_relative 'manager'
|
|
5
|
+
require_relative 'processor'
|
|
6
|
+
require_relative 'heartbeat'
|
|
7
|
+
require_relative 'health'
|
|
8
|
+
require_relative 'keys'
|
|
9
|
+
require_relative 'scheduled'
|
|
10
|
+
require_relative 'leader'
|
|
11
|
+
require_relative 'cron'
|
|
12
|
+
require_relative 'metrics/rollup'
|
|
13
|
+
require_relative 'fetcher/reaper'
|
|
14
|
+
|
|
15
|
+
module Wurk
|
|
16
|
+
# Top-level supervisor inside each worker process. Owns the Manager pool
|
|
17
|
+
# (one per Capsule), the scheduler poller, and the heartbeat thread.
|
|
18
|
+
# The heartbeat WIRE lives in Wurk::Heartbeat — Launcher owns lifecycle,
|
|
19
|
+
# signal dispatch, and stats rollup; Heartbeat owns the Redis writes.
|
|
20
|
+
#
|
|
21
|
+
# Lifecycle:
|
|
22
|
+
# * `run(async_beat:)` — freeze config, start heartbeat, poller, managers.
|
|
23
|
+
# * `quiet` — stop fetching across all managers + poller.
|
|
24
|
+
# * `stop` — graceful drain inside `config[:timeout]`.
|
|
25
|
+
# * `heartbeat` — one-shot beat (also driven by the heartbeat thread).
|
|
26
|
+
#
|
|
27
|
+
# `flush_stats` rolls per-process Processor counters (PROCESSED / FAILURE
|
|
28
|
+
# / EXPIRED) into the global + per-day Redis strings every beat. Per-day
|
|
29
|
+
# keys carry `STATS_TTL` so old days expire automatically.
|
|
30
|
+
#
|
|
31
|
+
# Spec: docs/target/sidekiq-free.md §12 (Sidekiq::Launcher).
|
|
32
|
+
class Launcher
|
|
33
|
+
include Component
|
|
34
|
+
|
|
35
|
+
# 5 years, in seconds. Per-day `stat:processed:YYYY-MM-DD` /
|
|
36
|
+
# `stat:failed:YYYY-MM-DD` / `stat:expired:YYYY-MM-DD` strings carry
|
|
37
|
+
# this TTL so they roll off without manual cleanup.
|
|
38
|
+
STATS_TTL = 5 * 365 * 24 * 60 * 60
|
|
39
|
+
|
|
40
|
+
# Re-exported for test/third-party callers that read it off Launcher
|
|
41
|
+
# (Sidekiq's drop-in surface). The single source of truth is Heartbeat.
|
|
42
|
+
BEAT_PAUSE = Heartbeat::BEAT_PAUSE
|
|
43
|
+
|
|
44
|
+
attr_accessor :managers, :poller, :cron_poller, :metrics_rollup
|
|
45
|
+
|
|
46
|
+
def initialize(config, embedded: false)
|
|
47
|
+
@config = config
|
|
48
|
+
@embedded = embedded
|
|
49
|
+
@done = false
|
|
50
|
+
@managers = config.capsules.values.map { |cap| Manager.new(cap) }
|
|
51
|
+
@poller = build_poller
|
|
52
|
+
@cron_poller = build_cron_poller
|
|
53
|
+
@metrics_rollup = build_metrics_rollup
|
|
54
|
+
@leader = build_leader
|
|
55
|
+
@reaper = build_reaper
|
|
56
|
+
@started_at = nil
|
|
57
|
+
@heartbeat = nil
|
|
58
|
+
@heartbeat_thread = nil
|
|
59
|
+
@health_server = build_health_server
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Boot order matters:
|
|
63
|
+
# 1. freeze! the config so mutations after fork are visible mistakes.
|
|
64
|
+
# 2. spawn the heartbeat thread BEFORE the managers so the dashboard
|
|
65
|
+
# sees the process the moment it can pick up jobs.
|
|
66
|
+
# 3. start the scheduler poller + the cron poller (both leader-gated for
|
|
67
|
+
# what they enqueue; safe to start before leadership is settled since
|
|
68
|
+
# a non-leader tick just returns early).
|
|
69
|
+
# 4. start the managers (which start their processors).
|
|
70
|
+
# 5. start the health probe server LAST so the listener doesn't
|
|
71
|
+
# accept k8s probes until the rest of the launcher is up.
|
|
72
|
+
def run(async_beat: true)
|
|
73
|
+
@started_at = Time.now.to_f
|
|
74
|
+
# Default each capsule's fetcher + materialize its lazy pools/middleware
|
|
75
|
+
# before the config freezes. Every entry point (swarm child, standalone
|
|
76
|
+
# CLI, embedded) runs through here, so none boots with a nil fetcher.
|
|
77
|
+
@config.capsules.each_value(&:prepare!)
|
|
78
|
+
@config.freeze!
|
|
79
|
+
@heartbeat_thread = safe_thread('heartbeat', &method(:start_heartbeat)) if async_beat
|
|
80
|
+
@poller&.start
|
|
81
|
+
@leader&.start
|
|
82
|
+
@cron_poller&.start
|
|
83
|
+
@metrics_rollup&.start
|
|
84
|
+
@managers.each(&:start)
|
|
85
|
+
@reaper.start
|
|
86
|
+
@health_server&.start
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Idempotent. Flips `stopping?` true, halts fetching across every
|
|
90
|
+
# Manager + the poller, then fires the `:quiet` event in reverse
|
|
91
|
+
# registration order so teardown hooks run LIFO.
|
|
92
|
+
def quiet
|
|
93
|
+
return if @done
|
|
94
|
+
|
|
95
|
+
@done = true
|
|
96
|
+
@managers.each(&:quiet)
|
|
97
|
+
@poller&.terminate
|
|
98
|
+
# The cron poller is intentionally NOT terminated here: a USR1-quieted
|
|
99
|
+
# leader still enqueues periodic jobs — it only stops fetching for itself.
|
|
100
|
+
# Loops stop only on full shutdown (#stop). Spec: sidekiq-ent.md §2.6.
|
|
101
|
+
fire_event(:quiet, reverse: true)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Graceful shutdown. Deadline is monotonic so wall-clock skew can't
|
|
105
|
+
# extend it. Managers stop in parallel threads so a slow capsule
|
|
106
|
+
# doesn't block its siblings.
|
|
107
|
+
def stop
|
|
108
|
+
deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + (@config[:timeout] || 25)
|
|
109
|
+
quiet
|
|
110
|
+
stoppers = @managers.map { |m| Thread.new { m.stop(deadline) } }
|
|
111
|
+
fire_event(:shutdown, reverse: true)
|
|
112
|
+
stoppers.each(&:join)
|
|
113
|
+
# Full shutdown stops periodic firing (it survived #quiet); do this before
|
|
114
|
+
# releasing the lock so no tick races a follower's promotion.
|
|
115
|
+
@cron_poller&.terminate
|
|
116
|
+
@metrics_rollup&.terminate
|
|
117
|
+
@reaper&.stop
|
|
118
|
+
# CAS-release the cluster lock now (planned shutdown) so a follower can
|
|
119
|
+
# take over immediately instead of waiting out the TTL.
|
|
120
|
+
@leader&.stop
|
|
121
|
+
clear_heartbeat
|
|
122
|
+
fire_event(:exit, reverse: true)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def stopping?
|
|
126
|
+
@done
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# One-shot beat. Public for embedded mode (and for tests) — the
|
|
130
|
+
# heartbeat thread calls this on `BEAT_PAUSE` cadence.
|
|
131
|
+
def heartbeat
|
|
132
|
+
flush_stats
|
|
133
|
+
beat
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Rolls in-process Processor counters into Redis. Pipelined so a single
|
|
137
|
+
# round trip covers all writes. Skips when all counters are zero to
|
|
138
|
+
# avoid touching keys we have nothing to add to.
|
|
139
|
+
def flush_stats
|
|
140
|
+
processed = Processor::PROCESSED.reset
|
|
141
|
+
failed = Processor::FAILURE.reset
|
|
142
|
+
expired = Processor::EXPIRED.reset
|
|
143
|
+
return if processed.zero? && failed.zero? && expired.zero?
|
|
144
|
+
|
|
145
|
+
write_stats(processed, failed, expired)
|
|
146
|
+
rescue StandardError => e
|
|
147
|
+
# Replay-safety: counters were reset above, so a Redis blip would
|
|
148
|
+
# otherwise drop stats. We log and accept — the per-job at-least-once
|
|
149
|
+
# semantics don't apply to *counters*, and the next beat resets again.
|
|
150
|
+
handle_exception(e, { context: 'flush_stats' })
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Used by tests to inspect the heartbeat thread; not part of the
|
|
154
|
+
# Sidekiq public surface.
|
|
155
|
+
attr_reader :heartbeat_thread
|
|
156
|
+
|
|
157
|
+
private
|
|
158
|
+
|
|
159
|
+
def write_stats(processed, failed, expired)
|
|
160
|
+
day = Time.now.utc.strftime('%F')
|
|
161
|
+
@config.redis do |conn|
|
|
162
|
+
conn.pipelined do |pipe|
|
|
163
|
+
incr_stat_key(pipe, Keys::STAT_PROCESSED, processed, day)
|
|
164
|
+
incr_stat_key(pipe, 'stat:failed', failed, day)
|
|
165
|
+
incr_stat_key(pipe, Keys::STAT_EXPIRED, expired, day)
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def incr_stat_key(pipe, key, value, day)
|
|
171
|
+
return unless value.positive?
|
|
172
|
+
|
|
173
|
+
pipe.call('INCRBY', key, value)
|
|
174
|
+
pipe.call('INCRBY', "#{key}:#{day}", value)
|
|
175
|
+
pipe.call('EXPIRE', "#{key}:#{day}", STATS_TTL)
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Pipelined identity write via Heartbeat, then dispatch any signals
|
|
179
|
+
# the dashboard queued at `<identity>-signals`. Lazily builds the
|
|
180
|
+
# Heartbeat the first time we beat so callers that bypass `run`
|
|
181
|
+
# (embedded mode, tests) still work.
|
|
182
|
+
def beat
|
|
183
|
+
ensure_heartbeat
|
|
184
|
+
sigs = @heartbeat.beat!
|
|
185
|
+
sigs&.each { |sig| dispatch_signal(sig) }
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def ensure_heartbeat
|
|
189
|
+
return if @heartbeat
|
|
190
|
+
|
|
191
|
+
@heartbeat = Heartbeat.new(
|
|
192
|
+
identity: identity,
|
|
193
|
+
config: @config,
|
|
194
|
+
started_at: @started_at || Time.now.to_f,
|
|
195
|
+
embedded: @embedded,
|
|
196
|
+
quiet: -> { @done }
|
|
197
|
+
)
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Erase the live-process footprint. flush_stats first so we don't drop
|
|
201
|
+
# the final batch of counters; then Heartbeat#stop! removes us from the
|
|
202
|
+
# `processes` SET and UNLINK-s the identity + work hashes. The probe
|
|
203
|
+
# server is closed alongside so kubelet stops getting 200s after the
|
|
204
|
+
# process is no longer healthy.
|
|
205
|
+
def clear_heartbeat
|
|
206
|
+
flush_stats
|
|
207
|
+
@heartbeat&.stop!
|
|
208
|
+
@health_server&.stop
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Heartbeat thread loop. `safe_thread` already wraps exceptions; we
|
|
212
|
+
# exit the loop the moment `stop` flips @done so the thread doesn't
|
|
213
|
+
# outlive the shutdown.
|
|
214
|
+
def start_heartbeat
|
|
215
|
+
until @done
|
|
216
|
+
heartbeat
|
|
217
|
+
sleep BEAT_PAUSE
|
|
218
|
+
end
|
|
219
|
+
logger.info('Heartbeat stopping...')
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def dispatch_signal(sig)
|
|
223
|
+
case sig
|
|
224
|
+
when 'TSTP' then quiet
|
|
225
|
+
when 'TERM' then stop
|
|
226
|
+
else
|
|
227
|
+
logger.warn { "Unknown signal in #{identity}-signals: #{sig.inspect}" }
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def build_poller
|
|
232
|
+
Wurk::Scheduled::Poller.new(@config)
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Periodic (cron) tick loop. Like the scheduler poller, every process runs
|
|
236
|
+
# one, but only the elected leader enqueues — the single-leader invariant is
|
|
237
|
+
# what guarantees exactly one enqueue per (loop, tick) across the cluster.
|
|
238
|
+
def build_cron_poller
|
|
239
|
+
Wurk::Cron::Poller.new(@config)
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# Leader-only metrics rollup. Every process runs one, but only the elected
|
|
243
|
+
# leader writes the cluster-total time-series buckets the dashboard charts
|
|
244
|
+
# read — a non-leader tick returns early. Tune the cadence (tests shrink it)
|
|
245
|
+
# with `config.metrics_rollup_interval`.
|
|
246
|
+
def build_metrics_rollup
|
|
247
|
+
Wurk::Metrics::Rollup.new(@config)
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# Every worker process campaigns for the single cluster lock (`dear-leader`);
|
|
251
|
+
# one wins and renews it, the rest follow and promote on its death. Cadence
|
|
252
|
+
# falls back to the spec defaults (TTL 30 / renew 15 / follower 60) unless
|
|
253
|
+
# the host tunes it. `Leader#start` no-ops under `WURK_LEADER=false`.
|
|
254
|
+
def build_leader
|
|
255
|
+
Wurk::Leader.new(
|
|
256
|
+
config: @config,
|
|
257
|
+
ttl: @config[:leader_ttl] || Wurk::Leader::DEFAULT_TTL,
|
|
258
|
+
renew_interval: @config[:leader_renew_interval] || Wurk::Leader::DEFAULT_RENEW_INTERVAL,
|
|
259
|
+
follower_interval: @config[:leader_follower_interval] || Wurk::Leader::DEFAULT_FOLLOWER_INTERVAL
|
|
260
|
+
)
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# Reliable-fetch orphan reclamation. Every worker runs one; a cluster
|
|
264
|
+
# `SET NX EX` lock ensures only one actually sweeps per interval, so this
|
|
265
|
+
# is leader-independent (it keeps working if the leader dies). Tune the
|
|
266
|
+
# cadence with `config.super_fetch_reaper_interval`.
|
|
267
|
+
def build_reaper
|
|
268
|
+
Wurk::Fetcher::Reaper.new(
|
|
269
|
+
@config,
|
|
270
|
+
interval: @config[:super_fetch_reaper_interval] || Wurk::Fetcher::Reaper::DEFAULT_INTERVAL
|
|
271
|
+
)
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# Returns a Health::Server when `config.health_check(...)` has set
|
|
275
|
+
# `:health_check_options`; nil otherwise. Off by default — the listener
|
|
276
|
+
# is opt-in to keep the worker's port surface minimal.
|
|
277
|
+
def build_health_server
|
|
278
|
+
opts = @config[:health_check_options]
|
|
279
|
+
return nil unless opts
|
|
280
|
+
|
|
281
|
+
Health::Server.new(
|
|
282
|
+
self,
|
|
283
|
+
port: opts.fetch(:port, Health::DEFAULT_PORT),
|
|
284
|
+
bind: opts.fetch(:bind, Health::DEFAULT_BIND),
|
|
285
|
+
ready_window: opts.fetch(:ready_window, Health::DEFAULT_READY_WINDOW)
|
|
286
|
+
)
|
|
287
|
+
end
|
|
288
|
+
end
|
|
289
|
+
end
|