wurk 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +43 -0
  3. data/CONTRIBUTING.md +73 -0
  4. data/LICENSE +21 -0
  5. data/README.md +137 -0
  6. data/SECURITY.md +39 -0
  7. data/app/controllers/wurk/api/pagination.rb +67 -0
  8. data/app/controllers/wurk/api/serializers.rb +131 -0
  9. data/app/controllers/wurk/api_controller.rb +248 -0
  10. data/app/controllers/wurk/application_controller.rb +7 -0
  11. data/app/controllers/wurk/dashboard_controller.rb +48 -0
  12. data/config/locales/en.yml +15 -0
  13. data/config/routes.rb +34 -0
  14. data/exe/wurk +22 -0
  15. data/lib/active_job/queue_adapters/wurk_adapter.rb +96 -0
  16. data/lib/generators/wurk/install/install_generator.rb +22 -0
  17. data/lib/generators/wurk/install/templates/wurk.rb +16 -0
  18. data/lib/wurk/active_job/wrapper.rb +32 -0
  19. data/lib/wurk/api/fast.rb +78 -0
  20. data/lib/wurk/batch/buffer.rb +26 -0
  21. data/lib/wurk/batch/callback_job.rb +37 -0
  22. data/lib/wurk/batch/callbacks.rb +176 -0
  23. data/lib/wurk/batch/client_middleware.rb +27 -0
  24. data/lib/wurk/batch/death_handler.rb +39 -0
  25. data/lib/wurk/batch/empty.rb +21 -0
  26. data/lib/wurk/batch/server_middleware.rb +62 -0
  27. data/lib/wurk/batch/status.rb +140 -0
  28. data/lib/wurk/batch.rb +351 -0
  29. data/lib/wurk/batch_set.rb +67 -0
  30. data/lib/wurk/capsule.rb +176 -0
  31. data/lib/wurk/cli.rb +349 -0
  32. data/lib/wurk/client/buffered.rb +372 -0
  33. data/lib/wurk/client.rb +330 -0
  34. data/lib/wurk/compat.rb +136 -0
  35. data/lib/wurk/component.rb +136 -0
  36. data/lib/wurk/configuration.rb +373 -0
  37. data/lib/wurk/context.rb +35 -0
  38. data/lib/wurk/cron.rb +636 -0
  39. data/lib/wurk/dashboard_manifest.rb +39 -0
  40. data/lib/wurk/dead_set.rb +78 -0
  41. data/lib/wurk/deploy.rb +91 -0
  42. data/lib/wurk/embedded.rb +94 -0
  43. data/lib/wurk/encryption.rb +276 -0
  44. data/lib/wurk/engine.rb +81 -0
  45. data/lib/wurk/fetcher/reaper.rb +264 -0
  46. data/lib/wurk/fetcher/reliable.rb +138 -0
  47. data/lib/wurk/fetcher.rb +11 -0
  48. data/lib/wurk/health.rb +193 -0
  49. data/lib/wurk/heartbeat.rb +211 -0
  50. data/lib/wurk/iterable_job.rb +292 -0
  51. data/lib/wurk/job/options.rb +70 -0
  52. data/lib/wurk/job.rb +33 -0
  53. data/lib/wurk/job_logger.rb +68 -0
  54. data/lib/wurk/job_record.rb +156 -0
  55. data/lib/wurk/job_retry.rb +320 -0
  56. data/lib/wurk/job_set.rb +212 -0
  57. data/lib/wurk/job_util.rb +162 -0
  58. data/lib/wurk/keys.rb +52 -0
  59. data/lib/wurk/launcher.rb +289 -0
  60. data/lib/wurk/leader.rb +221 -0
  61. data/lib/wurk/limiter/base.rb +138 -0
  62. data/lib/wurk/limiter/bucket.rb +80 -0
  63. data/lib/wurk/limiter/concurrent.rb +132 -0
  64. data/lib/wurk/limiter/leaky.rb +91 -0
  65. data/lib/wurk/limiter/points.rb +89 -0
  66. data/lib/wurk/limiter/server_middleware.rb +77 -0
  67. data/lib/wurk/limiter/unlimited.rb +48 -0
  68. data/lib/wurk/limiter/window.rb +80 -0
  69. data/lib/wurk/limiter.rb +255 -0
  70. data/lib/wurk/logger.rb +81 -0
  71. data/lib/wurk/lua/loader.rb +53 -0
  72. data/lib/wurk/lua.rb +187 -0
  73. data/lib/wurk/manager.rb +132 -0
  74. data/lib/wurk/metrics/history.rb +151 -0
  75. data/lib/wurk/metrics/query.rb +173 -0
  76. data/lib/wurk/metrics/rollup.rb +169 -0
  77. data/lib/wurk/metrics/statsd.rb +197 -0
  78. data/lib/wurk/metrics.rb +7 -0
  79. data/lib/wurk/middleware/chain.rb +128 -0
  80. data/lib/wurk/middleware/current_attributes.rb +87 -0
  81. data/lib/wurk/middleware/expiry.rb +50 -0
  82. data/lib/wurk/middleware/i18n.rb +63 -0
  83. data/lib/wurk/middleware/interrupt_handler.rb +45 -0
  84. data/lib/wurk/middleware/poison_pill.rb +149 -0
  85. data/lib/wurk/middleware.rb +34 -0
  86. data/lib/wurk/process_set.rb +243 -0
  87. data/lib/wurk/processor.rb +247 -0
  88. data/lib/wurk/queue.rb +108 -0
  89. data/lib/wurk/queues.rb +80 -0
  90. data/lib/wurk/rails.rb +9 -0
  91. data/lib/wurk/railtie.rb +28 -0
  92. data/lib/wurk/redis_pool.rb +79 -0
  93. data/lib/wurk/retry_set.rb +17 -0
  94. data/lib/wurk/scheduled.rb +189 -0
  95. data/lib/wurk/scheduled_set.rb +18 -0
  96. data/lib/wurk/sorted_entry.rb +95 -0
  97. data/lib/wurk/stats.rb +190 -0
  98. data/lib/wurk/swarm/child_boot.rb +105 -0
  99. data/lib/wurk/swarm.rb +260 -0
  100. data/lib/wurk/testing.rb +102 -0
  101. data/lib/wurk/topology.rb +74 -0
  102. data/lib/wurk/unique.rb +240 -0
  103. data/lib/wurk/version.rb +5 -0
  104. data/lib/wurk/web/config.rb +180 -0
  105. data/lib/wurk/web/enterprise.rb +138 -0
  106. data/lib/wurk/web/search.rb +139 -0
  107. data/lib/wurk/web.rb +25 -0
  108. data/lib/wurk/work_set.rb +116 -0
  109. data/lib/wurk/worker/setter.rb +93 -0
  110. data/lib/wurk/worker.rb +216 -0
  111. data/lib/wurk.rb +238 -0
  112. data/vendor/assets/dashboard/assets/index-8P3N_m1X.js +152 -0
  113. data/vendor/assets/dashboard/assets/index-Bqz4_SOQ.css +1 -0
  114. data/vendor/assets/dashboard/index.html +13 -0
  115. data/vendor/assets/dashboard/wurk-manifest.json +4 -0
  116. metadata +232 -0
@@ -0,0 +1,162 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'securerandom'
5
+
6
+ module Wurk
7
+ # Mixin shared by Wurk::Client (and Wurk::Job::Setter) to validate, normalize,
8
+ # and JSON-verify job payloads before they hit Redis.
9
+ #
10
+ # Spec: docs/target/sidekiq-free.md §9 (Sidekiq::JobUtil).
11
+ module JobUtil # rubocop:disable Metrics/ModuleLength
12
+ # Top-level keys stripped from every payload before raw_push. Mutable so
13
+ # Pro/Ent/extension code (e.g. TransactionAwareClient adding "client_class")
14
+ # can append at load time without monkey-patching.
15
+ TRANSIENT_ATTRIBUTES = [] # rubocop:disable Style/MutableConstant
16
+
17
+ RETRY_FOR_MAX = 1_000_000_000
18
+
19
+ # @raise [ArgumentError] if the payload is structurally invalid.
20
+ def validate(item)
21
+ raise(ArgumentError, "Job must be a Hash with 'class' and 'args' keys: `#{item}`") unless valid_shape?(item)
22
+ raise(ArgumentError, "Job args must be an Array: `#{item}`") unless item['args'].is_a?(Array)
23
+ raise(ArgumentError, "Job class must be a Class or String: `#{item}`") unless valid_class?(item['class'])
24
+ raise(ArgumentError, "Job 'at' must be a Numeric timestamp: `#{item}`") unless valid_at?(item)
25
+ raise(ArgumentError, "Job tags must be an Array: `#{item}`") unless valid_tags?(item)
26
+ return if valid_retry_for?(item)
27
+
28
+ raise(ArgumentError, "Job retry_for over #{RETRY_FOR_MAX} is unreasonable: `#{item}`")
29
+ end
30
+
31
+ # Walk args; report the first non-JSON-native value according to the
32
+ # configured strict mode. Hash keys must be Strings.
33
+ def verify_json(item)
34
+ mode = Wurk.strict_args_mode
35
+ return if mode == false
36
+
37
+ offender = json_unsafe(item['args'])
38
+ return if offender.nil?
39
+
40
+ report_unsafe(item, offender, mode)
41
+ end
42
+
43
+ # Validate → merge class/default options → stringify → assign jid &
44
+ # created_at → strip transient keys. Returns the canonical payload.
45
+ def normalize_item(item)
46
+ validate(item)
47
+ normalized = class_defaults_for(item['class']).merge(item)
48
+ normalized = wrap_options(normalized)
49
+ stringify_identity!(normalized, item['class'])
50
+ finalize(normalized)
51
+ end
52
+
53
+ def now_in_millis
54
+ ::Process.clock_gettime(::Process::CLOCK_REALTIME, :millisecond)
55
+ end
56
+
57
+ private
58
+
59
+ def valid_shape?(item) = item.is_a?(Hash) && item.key?('class') && item.key?('args')
60
+ def valid_class?(klass) = klass.is_a?(Class) || klass.is_a?(String)
61
+ def valid_at?(item) = !item.key?('at') || item['at'].is_a?(Numeric)
62
+ def valid_tags?(item) = !item.key?('tags') || item['tags'].is_a?(Array)
63
+
64
+ def valid_retry_for?(item)
65
+ return true unless item.key?('retry_for')
66
+
67
+ value = item['retry_for']
68
+ parsed = numeric_retry_for(value)
69
+ return false if parsed.nil?
70
+
71
+ parsed <= RETRY_FOR_MAX
72
+ end
73
+
74
+ def numeric_retry_for(value)
75
+ case value
76
+ when Integer then value
77
+ when Numeric then value.to_i
78
+ when String then (Integer(value, 10) if value.match?(/\A-?\d+\z/))
79
+ end
80
+ end
81
+
82
+ def class_defaults_for(job_class)
83
+ respondable_class?(job_class) ? job_class.get_sidekiq_options : Wurk.default_job_options
84
+ end
85
+
86
+ def respondable_class?(klass)
87
+ klass.is_a?(Class) && klass.respond_to?(:get_sidekiq_options)
88
+ end
89
+
90
+ def wrap_options(normalized)
91
+ wrapped = normalized['wrapped']
92
+ respondable_class?(wrapped) ? wrapped.get_sidekiq_options.merge(normalized) : normalized
93
+ end
94
+
95
+ def stringify_identity!(normalized, job_class)
96
+ normalized['class'] = job_class.to_s
97
+ normalized['queue'] = normalized['queue'].to_s
98
+ return unless normalized['queue'].empty?
99
+
100
+ raise ArgumentError, "Job must include a non-empty queue name: `#{normalized}`"
101
+ end
102
+
103
+ def finalize(normalized)
104
+ TRANSIENT_ATTRIBUTES.each { |k| normalized.delete(k) }
105
+ normalized['jid'] ||= SecureRandom.hex(12)
106
+ normalized['retry_for'] = numeric_retry_for(normalized['retry_for']) if normalized.key?('retry_for')
107
+ normalized['created_at'] ||= now_in_millis
108
+ stamp_expiry(normalized)
109
+ normalized
110
+ end
111
+
112
+ # Pro `expires_in:` → absolute epoch-float `expiry` resolved once at push,
113
+ # so the server middleware doesn't redo the math. Spec: sidekiq-pro.md §7.
114
+ # nil.respond_to?(:to_f) is true on modern Ruby (returns 0.0), so we must
115
+ # gate on a non-nil duration before coercing.
116
+ def stamp_expiry(item)
117
+ d = item['expires_in']
118
+ return if d.nil?
119
+
120
+ item['expiry'] ||= (item['created_at'].to_f / 1000.0) + d.to_f if d.respond_to?(:to_f)
121
+ end
122
+
123
+ def report_unsafe(item, offender, mode)
124
+ job_class = item['wrapped'] || item['class']
125
+ msg = "Job arguments to #{job_class} must be native JSON types, " \
126
+ "but #{offender.inspect} is a #{offender.class}. " \
127
+ 'See https://github.com/sidekiq/sidekiq/wiki/Best-Practices'
128
+ case mode
129
+ when :raise then raise ArgumentError, msg
130
+ when :warn then Wurk.logger.warn(msg)
131
+ end
132
+ end
133
+
134
+ # Returns the first offending value, or nil when the tree is JSON-native.
135
+ def json_unsafe(obj)
136
+ case obj
137
+ when String, Integer, Float, TrueClass, FalseClass, NilClass then nil
138
+ when Array then json_unsafe_array(obj)
139
+ when Hash then json_unsafe_hash(obj)
140
+ else obj
141
+ end
142
+ end
143
+
144
+ def json_unsafe_array(arr)
145
+ arr.each do |v|
146
+ bad = json_unsafe(v)
147
+ return bad unless bad.nil?
148
+ end
149
+ nil
150
+ end
151
+
152
+ def json_unsafe_hash(hash)
153
+ hash.each do |k, v|
154
+ return k unless k.is_a?(String)
155
+
156
+ bad = json_unsafe(v)
157
+ return bad unless bad.nil?
158
+ end
159
+ nil
160
+ end
161
+ end
162
+ end
data/lib/wurk/keys.rb ADDED
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Wurk
4
+ # Canonical Redis key constants. Wire-compat is sacred: these strings are
5
+ # the on-disk schema shared with Sidekiq OSS and every third-party gem that
6
+ # reads Redis directly (sidekiq-cron, sidekiq-unique-jobs, etc.). Renaming
7
+ # or namespacing any of them silently breaks the drop-in contract.
8
+ #
9
+ # OSS uses no namespace. Pro/Ent layer a prefix on top; that lives outside
10
+ # the free gem.
11
+ #
12
+ # Spec: docs/target/sidekiq-free.md §1 (Redis Key Schema).
13
+ module Keys
14
+ # Queue list keys: `queue:<name>` (LIST, LPUSH/BRPOP).
15
+ QUEUE_PREFIX = 'queue:'
16
+
17
+ # Set of known queue names, without the `queue:` prefix.
18
+ QUEUES_SET = 'queues'
19
+
20
+ # Set of paused queue names (Pro feature; Wurk ships it free).
21
+ # Members are unprefixed queue names. Fetchers exclude these on each pass.
22
+ PAUSED_SET = 'paused'
23
+
24
+ # Sorted sets keyed by score = unix epoch float seconds.
25
+ SCHEDULE = 'schedule'
26
+ RETRY = 'retry'
27
+ DEAD = 'dead'
28
+
29
+ # Live process identities (heartbeat membership).
30
+ PROCESSES = 'processes'
31
+
32
+ # Global processed counter; per-day variants append `:YYYY-MM-DD`.
33
+ STAT_PROCESSED = 'stat:processed'
34
+
35
+ # Global expired counter — subset of processed: jobs the Expiry server
36
+ # middleware dropped before `perform` because `expiry` had already
37
+ # elapsed. Per-day variants append `:YYYY-MM-DD`. Spec: sidekiq-pro.md §7.
38
+ STAT_EXPIRED = 'stat:expired'
39
+
40
+ # TTL applied to per-day `stat:processed:*` / `stat:failed:*` /
41
+ # `stat:expired:*` strings. 5 years, in seconds. Matches
42
+ # Sidekiq::Launcher::STATS_TTL.
43
+ STATS_TTL = 5 * 365 * 24 * 60 * 60
44
+
45
+ # Build a queue list key from a queue name. Centralizing the concat keeps
46
+ # the prefix in one place even though it's a constant — third-party gems
47
+ # that grep for `"queue:"` still find it via the constant.
48
+ def self.queue(name)
49
+ "#{QUEUE_PREFIX}#{name}"
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,289 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'component'
4
+ require_relative 'manager'
5
+ require_relative 'processor'
6
+ require_relative 'heartbeat'
7
+ require_relative 'health'
8
+ require_relative 'keys'
9
+ require_relative 'scheduled'
10
+ require_relative 'leader'
11
+ require_relative 'cron'
12
+ require_relative 'metrics/rollup'
13
+ require_relative 'fetcher/reaper'
14
+
15
+ module Wurk
16
+ # Top-level supervisor inside each worker process. Owns the Manager pool
17
+ # (one per Capsule), the scheduler poller, and the heartbeat thread.
18
+ # The heartbeat WIRE lives in Wurk::Heartbeat — Launcher owns lifecycle,
19
+ # signal dispatch, and stats rollup; Heartbeat owns the Redis writes.
20
+ #
21
+ # Lifecycle:
22
+ # * `run(async_beat:)` — freeze config, start heartbeat, poller, managers.
23
+ # * `quiet` — stop fetching across all managers + poller.
24
+ # * `stop` — graceful drain inside `config[:timeout]`.
25
+ # * `heartbeat` — one-shot beat (also driven by the heartbeat thread).
26
+ #
27
+ # `flush_stats` rolls per-process Processor counters (PROCESSED / FAILURE
28
+ # / EXPIRED) into the global + per-day Redis strings every beat. Per-day
29
+ # keys carry `STATS_TTL` so old days expire automatically.
30
+ #
31
+ # Spec: docs/target/sidekiq-free.md §12 (Sidekiq::Launcher).
32
+ class Launcher
33
+ include Component
34
+
35
+ # 5 years, in seconds. Per-day `stat:processed:YYYY-MM-DD` /
36
+ # `stat:failed:YYYY-MM-DD` / `stat:expired:YYYY-MM-DD` strings carry
37
+ # this TTL so they roll off without manual cleanup.
38
+ STATS_TTL = 5 * 365 * 24 * 60 * 60
39
+
40
+ # Re-exported for test/third-party callers that read it off Launcher
41
+ # (Sidekiq's drop-in surface). The single source of truth is Heartbeat.
42
+ BEAT_PAUSE = Heartbeat::BEAT_PAUSE
43
+
44
+ attr_accessor :managers, :poller, :cron_poller, :metrics_rollup
45
+
46
+ def initialize(config, embedded: false)
47
+ @config = config
48
+ @embedded = embedded
49
+ @done = false
50
+ @managers = config.capsules.values.map { |cap| Manager.new(cap) }
51
+ @poller = build_poller
52
+ @cron_poller = build_cron_poller
53
+ @metrics_rollup = build_metrics_rollup
54
+ @leader = build_leader
55
+ @reaper = build_reaper
56
+ @started_at = nil
57
+ @heartbeat = nil
58
+ @heartbeat_thread = nil
59
+ @health_server = build_health_server
60
+ end
61
+
62
+ # Boot order matters:
63
+ # 1. freeze! the config so mutations after fork are visible mistakes.
64
+ # 2. spawn the heartbeat thread BEFORE the managers so the dashboard
65
+ # sees the process the moment it can pick up jobs.
66
+ # 3. start the scheduler poller + the cron poller (both leader-gated for
67
+ # what they enqueue; safe to start before leadership is settled since
68
+ # a non-leader tick just returns early).
69
+ # 4. start the managers (which start their processors).
70
+ # 5. start the health probe server LAST so the listener doesn't
71
+ # accept k8s probes until the rest of the launcher is up.
72
+ def run(async_beat: true)
73
+ @started_at = Time.now.to_f
74
+ # Default each capsule's fetcher + materialize its lazy pools/middleware
75
+ # before the config freezes. Every entry point (swarm child, standalone
76
+ # CLI, embedded) runs through here, so none boots with a nil fetcher.
77
+ @config.capsules.each_value(&:prepare!)
78
+ @config.freeze!
79
+ @heartbeat_thread = safe_thread('heartbeat', &method(:start_heartbeat)) if async_beat
80
+ @poller&.start
81
+ @leader&.start
82
+ @cron_poller&.start
83
+ @metrics_rollup&.start
84
+ @managers.each(&:start)
85
+ @reaper.start
86
+ @health_server&.start
87
+ end
88
+
89
+ # Idempotent. Flips `stopping?` true, halts fetching across every
90
+ # Manager + the poller, then fires the `:quiet` event in reverse
91
+ # registration order so teardown hooks run LIFO.
92
+ def quiet
93
+ return if @done
94
+
95
+ @done = true
96
+ @managers.each(&:quiet)
97
+ @poller&.terminate
98
+ # The cron poller is intentionally NOT terminated here: a USR1-quieted
99
+ # leader still enqueues periodic jobs — it only stops fetching for itself.
100
+ # Loops stop only on full shutdown (#stop). Spec: sidekiq-ent.md §2.6.
101
+ fire_event(:quiet, reverse: true)
102
+ end
103
+
104
+ # Graceful shutdown. Deadline is monotonic so wall-clock skew can't
105
+ # extend it. Managers stop in parallel threads so a slow capsule
106
+ # doesn't block its siblings.
107
+ def stop
108
+ deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + (@config[:timeout] || 25)
109
+ quiet
110
+ stoppers = @managers.map { |m| Thread.new { m.stop(deadline) } }
111
+ fire_event(:shutdown, reverse: true)
112
+ stoppers.each(&:join)
113
+ # Full shutdown stops periodic firing (it survived #quiet); do this before
114
+ # releasing the lock so no tick races a follower's promotion.
115
+ @cron_poller&.terminate
116
+ @metrics_rollup&.terminate
117
+ @reaper&.stop
118
+ # CAS-release the cluster lock now (planned shutdown) so a follower can
119
+ # take over immediately instead of waiting out the TTL.
120
+ @leader&.stop
121
+ clear_heartbeat
122
+ fire_event(:exit, reverse: true)
123
+ end
124
+
125
+ def stopping?
126
+ @done
127
+ end
128
+
129
+ # One-shot beat. Public for embedded mode (and for tests) — the
130
+ # heartbeat thread calls this on `BEAT_PAUSE` cadence.
131
+ def heartbeat
132
+ flush_stats
133
+ beat
134
+ end
135
+
136
+ # Rolls in-process Processor counters into Redis. Pipelined so a single
137
+ # round trip covers all writes. Skips when all counters are zero to
138
+ # avoid touching keys we have nothing to add to.
139
+ def flush_stats
140
+ processed = Processor::PROCESSED.reset
141
+ failed = Processor::FAILURE.reset
142
+ expired = Processor::EXPIRED.reset
143
+ return if processed.zero? && failed.zero? && expired.zero?
144
+
145
+ write_stats(processed, failed, expired)
146
+ rescue StandardError => e
147
+ # Replay-safety: counters were reset above, so a Redis blip would
148
+ # otherwise drop stats. We log and accept — the per-job at-least-once
149
+ # semantics don't apply to *counters*, and the next beat resets again.
150
+ handle_exception(e, { context: 'flush_stats' })
151
+ end
152
+
153
+ # Used by tests to inspect the heartbeat thread; not part of the
154
+ # Sidekiq public surface.
155
+ attr_reader :heartbeat_thread
156
+
157
+ private
158
+
159
+ def write_stats(processed, failed, expired)
160
+ day = Time.now.utc.strftime('%F')
161
+ @config.redis do |conn|
162
+ conn.pipelined do |pipe|
163
+ incr_stat_key(pipe, Keys::STAT_PROCESSED, processed, day)
164
+ incr_stat_key(pipe, 'stat:failed', failed, day)
165
+ incr_stat_key(pipe, Keys::STAT_EXPIRED, expired, day)
166
+ end
167
+ end
168
+ end
169
+
170
+ def incr_stat_key(pipe, key, value, day)
171
+ return unless value.positive?
172
+
173
+ pipe.call('INCRBY', key, value)
174
+ pipe.call('INCRBY', "#{key}:#{day}", value)
175
+ pipe.call('EXPIRE', "#{key}:#{day}", STATS_TTL)
176
+ end
177
+
178
+ # Pipelined identity write via Heartbeat, then dispatch any signals
179
+ # the dashboard queued at `<identity>-signals`. Lazily builds the
180
+ # Heartbeat the first time we beat so callers that bypass `run`
181
+ # (embedded mode, tests) still work.
182
+ def beat
183
+ ensure_heartbeat
184
+ sigs = @heartbeat.beat!
185
+ sigs&.each { |sig| dispatch_signal(sig) }
186
+ end
187
+
188
+ def ensure_heartbeat
189
+ return if @heartbeat
190
+
191
+ @heartbeat = Heartbeat.new(
192
+ identity: identity,
193
+ config: @config,
194
+ started_at: @started_at || Time.now.to_f,
195
+ embedded: @embedded,
196
+ quiet: -> { @done }
197
+ )
198
+ end
199
+
200
+ # Erase the live-process footprint. flush_stats first so we don't drop
201
+ # the final batch of counters; then Heartbeat#stop! removes us from the
202
+ # `processes` SET and UNLINK-s the identity + work hashes. The probe
203
+ # server is closed alongside so kubelet stops getting 200s after the
204
+ # process is no longer healthy.
205
+ def clear_heartbeat
206
+ flush_stats
207
+ @heartbeat&.stop!
208
+ @health_server&.stop
209
+ end
210
+
211
+ # Heartbeat thread loop. `safe_thread` already wraps exceptions; we
212
+ # exit the loop the moment `stop` flips @done so the thread doesn't
213
+ # outlive the shutdown.
214
+ def start_heartbeat
215
+ until @done
216
+ heartbeat
217
+ sleep BEAT_PAUSE
218
+ end
219
+ logger.info('Heartbeat stopping...')
220
+ end
221
+
222
+ def dispatch_signal(sig)
223
+ case sig
224
+ when 'TSTP' then quiet
225
+ when 'TERM' then stop
226
+ else
227
+ logger.warn { "Unknown signal in #{identity}-signals: #{sig.inspect}" }
228
+ end
229
+ end
230
+
231
+ def build_poller
232
+ Wurk::Scheduled::Poller.new(@config)
233
+ end
234
+
235
+ # Periodic (cron) tick loop. Like the scheduler poller, every process runs
236
+ # one, but only the elected leader enqueues — the single-leader invariant is
237
+ # what guarantees exactly one enqueue per (loop, tick) across the cluster.
238
+ def build_cron_poller
239
+ Wurk::Cron::Poller.new(@config)
240
+ end
241
+
242
+ # Leader-only metrics rollup. Every process runs one, but only the elected
243
+ # leader writes the cluster-total time-series buckets the dashboard charts
244
+ # read — a non-leader tick returns early. Tune the cadence (tests shrink it)
245
+ # with `config.metrics_rollup_interval`.
246
+ def build_metrics_rollup
247
+ Wurk::Metrics::Rollup.new(@config)
248
+ end
249
+
250
+ # Every worker process campaigns for the single cluster lock (`dear-leader`);
251
+ # one wins and renews it, the rest follow and promote on its death. Cadence
252
+ # falls back to the spec defaults (TTL 30 / renew 15 / follower 60) unless
253
+ # the host tunes it. `Leader#start` no-ops under `WURK_LEADER=false`.
254
+ def build_leader
255
+ Wurk::Leader.new(
256
+ config: @config,
257
+ ttl: @config[:leader_ttl] || Wurk::Leader::DEFAULT_TTL,
258
+ renew_interval: @config[:leader_renew_interval] || Wurk::Leader::DEFAULT_RENEW_INTERVAL,
259
+ follower_interval: @config[:leader_follower_interval] || Wurk::Leader::DEFAULT_FOLLOWER_INTERVAL
260
+ )
261
+ end
262
+
263
+ # Reliable-fetch orphan reclamation. Every worker runs one; a cluster
264
+ # `SET NX EX` lock ensures only one actually sweeps per interval, so this
265
+ # is leader-independent (it keeps working if the leader dies). Tune the
266
+ # cadence with `config.super_fetch_reaper_interval`.
267
+ def build_reaper
268
+ Wurk::Fetcher::Reaper.new(
269
+ @config,
270
+ interval: @config[:super_fetch_reaper_interval] || Wurk::Fetcher::Reaper::DEFAULT_INTERVAL
271
+ )
272
+ end
273
+
274
+ # Returns a Health::Server when `config.health_check(...)` has set
275
+ # `:health_check_options`; nil otherwise. Off by default — the listener
276
+ # is opt-in to keep the worker's port surface minimal.
277
+ def build_health_server
278
+ opts = @config[:health_check_options]
279
+ return nil unless opts
280
+
281
+ Health::Server.new(
282
+ self,
283
+ port: opts.fetch(:port, Health::DEFAULT_PORT),
284
+ bind: opts.fetch(:bind, Health::DEFAULT_BIND),
285
+ ready_window: opts.fetch(:ready_window, Health::DEFAULT_READY_WINDOW)
286
+ )
287
+ end
288
+ end
289
+ end