wurk 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +43 -0
  3. data/CONTRIBUTING.md +73 -0
  4. data/LICENSE +21 -0
  5. data/README.md +137 -0
  6. data/SECURITY.md +39 -0
  7. data/app/controllers/wurk/api/pagination.rb +67 -0
  8. data/app/controllers/wurk/api/serializers.rb +131 -0
  9. data/app/controllers/wurk/api_controller.rb +248 -0
  10. data/app/controllers/wurk/application_controller.rb +7 -0
  11. data/app/controllers/wurk/dashboard_controller.rb +48 -0
  12. data/config/locales/en.yml +15 -0
  13. data/config/routes.rb +34 -0
  14. data/exe/wurk +22 -0
  15. data/lib/active_job/queue_adapters/wurk_adapter.rb +96 -0
  16. data/lib/generators/wurk/install/install_generator.rb +22 -0
  17. data/lib/generators/wurk/install/templates/wurk.rb +16 -0
  18. data/lib/wurk/active_job/wrapper.rb +32 -0
  19. data/lib/wurk/api/fast.rb +78 -0
  20. data/lib/wurk/batch/buffer.rb +26 -0
  21. data/lib/wurk/batch/callback_job.rb +37 -0
  22. data/lib/wurk/batch/callbacks.rb +176 -0
  23. data/lib/wurk/batch/client_middleware.rb +27 -0
  24. data/lib/wurk/batch/death_handler.rb +39 -0
  25. data/lib/wurk/batch/empty.rb +21 -0
  26. data/lib/wurk/batch/server_middleware.rb +62 -0
  27. data/lib/wurk/batch/status.rb +140 -0
  28. data/lib/wurk/batch.rb +351 -0
  29. data/lib/wurk/batch_set.rb +67 -0
  30. data/lib/wurk/capsule.rb +176 -0
  31. data/lib/wurk/cli.rb +349 -0
  32. data/lib/wurk/client/buffered.rb +372 -0
  33. data/lib/wurk/client.rb +330 -0
  34. data/lib/wurk/compat.rb +136 -0
  35. data/lib/wurk/component.rb +136 -0
  36. data/lib/wurk/configuration.rb +373 -0
  37. data/lib/wurk/context.rb +35 -0
  38. data/lib/wurk/cron.rb +636 -0
  39. data/lib/wurk/dashboard_manifest.rb +39 -0
  40. data/lib/wurk/dead_set.rb +78 -0
  41. data/lib/wurk/deploy.rb +91 -0
  42. data/lib/wurk/embedded.rb +94 -0
  43. data/lib/wurk/encryption.rb +276 -0
  44. data/lib/wurk/engine.rb +81 -0
  45. data/lib/wurk/fetcher/reaper.rb +264 -0
  46. data/lib/wurk/fetcher/reliable.rb +138 -0
  47. data/lib/wurk/fetcher.rb +11 -0
  48. data/lib/wurk/health.rb +193 -0
  49. data/lib/wurk/heartbeat.rb +211 -0
  50. data/lib/wurk/iterable_job.rb +292 -0
  51. data/lib/wurk/job/options.rb +70 -0
  52. data/lib/wurk/job.rb +33 -0
  53. data/lib/wurk/job_logger.rb +68 -0
  54. data/lib/wurk/job_record.rb +156 -0
  55. data/lib/wurk/job_retry.rb +320 -0
  56. data/lib/wurk/job_set.rb +212 -0
  57. data/lib/wurk/job_util.rb +162 -0
  58. data/lib/wurk/keys.rb +52 -0
  59. data/lib/wurk/launcher.rb +289 -0
  60. data/lib/wurk/leader.rb +221 -0
  61. data/lib/wurk/limiter/base.rb +138 -0
  62. data/lib/wurk/limiter/bucket.rb +80 -0
  63. data/lib/wurk/limiter/concurrent.rb +132 -0
  64. data/lib/wurk/limiter/leaky.rb +91 -0
  65. data/lib/wurk/limiter/points.rb +89 -0
  66. data/lib/wurk/limiter/server_middleware.rb +77 -0
  67. data/lib/wurk/limiter/unlimited.rb +48 -0
  68. data/lib/wurk/limiter/window.rb +80 -0
  69. data/lib/wurk/limiter.rb +255 -0
  70. data/lib/wurk/logger.rb +81 -0
  71. data/lib/wurk/lua/loader.rb +53 -0
  72. data/lib/wurk/lua.rb +187 -0
  73. data/lib/wurk/manager.rb +132 -0
  74. data/lib/wurk/metrics/history.rb +151 -0
  75. data/lib/wurk/metrics/query.rb +173 -0
  76. data/lib/wurk/metrics/rollup.rb +169 -0
  77. data/lib/wurk/metrics/statsd.rb +197 -0
  78. data/lib/wurk/metrics.rb +7 -0
  79. data/lib/wurk/middleware/chain.rb +128 -0
  80. data/lib/wurk/middleware/current_attributes.rb +87 -0
  81. data/lib/wurk/middleware/expiry.rb +50 -0
  82. data/lib/wurk/middleware/i18n.rb +63 -0
  83. data/lib/wurk/middleware/interrupt_handler.rb +45 -0
  84. data/lib/wurk/middleware/poison_pill.rb +149 -0
  85. data/lib/wurk/middleware.rb +34 -0
  86. data/lib/wurk/process_set.rb +243 -0
  87. data/lib/wurk/processor.rb +247 -0
  88. data/lib/wurk/queue.rb +108 -0
  89. data/lib/wurk/queues.rb +80 -0
  90. data/lib/wurk/rails.rb +9 -0
  91. data/lib/wurk/railtie.rb +28 -0
  92. data/lib/wurk/redis_pool.rb +79 -0
  93. data/lib/wurk/retry_set.rb +17 -0
  94. data/lib/wurk/scheduled.rb +189 -0
  95. data/lib/wurk/scheduled_set.rb +18 -0
  96. data/lib/wurk/sorted_entry.rb +95 -0
  97. data/lib/wurk/stats.rb +190 -0
  98. data/lib/wurk/swarm/child_boot.rb +105 -0
  99. data/lib/wurk/swarm.rb +260 -0
  100. data/lib/wurk/testing.rb +102 -0
  101. data/lib/wurk/topology.rb +74 -0
  102. data/lib/wurk/unique.rb +240 -0
  103. data/lib/wurk/version.rb +5 -0
  104. data/lib/wurk/web/config.rb +180 -0
  105. data/lib/wurk/web/enterprise.rb +138 -0
  106. data/lib/wurk/web/search.rb +139 -0
  107. data/lib/wurk/web.rb +25 -0
  108. data/lib/wurk/work_set.rb +116 -0
  109. data/lib/wurk/worker/setter.rb +93 -0
  110. data/lib/wurk/worker.rb +216 -0
  111. data/lib/wurk.rb +238 -0
  112. data/vendor/assets/dashboard/assets/index-8P3N_m1X.js +152 -0
  113. data/vendor/assets/dashboard/assets/index-Bqz4_SOQ.css +1 -0
  114. data/vendor/assets/dashboard/index.html +13 -0
  115. data/vendor/assets/dashboard/wurk-manifest.json +4 -0
  116. metadata +232 -0
data/lib/wurk/lua.rb ADDED
@@ -0,0 +1,187 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+
5
+ module Wurk
6
+ # EVALSHA-cached Lua scripts. Loaded once per pool, never re-uploaded.
7
+ # Bulk enqueue, multi-pop, atomic schedule promotion, batch ops.
8
+ #
9
+ # Source strings are intentionally bare — the SHA1 of each is computed
10
+ # at load time and is the same value Redis reports from `SCRIPT LOAD`.
11
+ # Whitespace edits change the SHA, which forces a re-upload at runtime.
12
+ #
13
+ # `:zpopbyscore` is reproduced verbatim from sidekiq-free.md §1.8 and
14
+ # MUST NOT diverge — parity tests will fail on a single byte change.
15
+ module Lua
16
+ ZPOPBYSCORE = <<~LUA
17
+ local key, now = KEYS[1], ARGV[1]
18
+ local jobs = redis.call("zrange", key, "-inf", now, "byscore", "limit", 0, 1)
19
+ if jobs[1] then
20
+ redis.call("zrem", key, jobs[1])
21
+ return jobs[1]
22
+ end
23
+ LUA
24
+
25
+ # Bulk enqueue to a single queue.
26
+ # KEYS = [queue_list, queues_set]
27
+ # ARGV = [queue_name, job_json, ...]
28
+ # Returns the number of jobs pushed.
29
+ BULK_PUSH = <<~LUA
30
+ redis.call("sadd", KEYS[2], ARGV[1])
31
+ for i = 2, #ARGV do
32
+ redis.call("lpush", KEYS[1], ARGV[i])
33
+ end
34
+ return #ARGV - 1
35
+ LUA
36
+
37
+ # Pro reliable scheduler: atomically promote all due jobs in a sorted
38
+ # set to their target queues. Pure-Ruby promotion does ZRANGE → ZREM →
39
+ # LPUSH non-atomically and can lose jobs on a mid-step crash.
40
+ # KEYS = [sorted_set, queues_set]
41
+ # ARGV = [now, queue_prefix]
42
+ # Returns the number of jobs promoted.
43
+ # Order matters: decode + push BEFORE zrem. Redis Lua has no rollback,
44
+ # so a failed cjson.decode after a zrem would lose the job. Decode first;
45
+ # push first; only then remove from the sorted set. Worst case is a
46
+ # crash between lpush and zrem → at-least-once redelivery, never loss.
47
+ RELIABLE_SCHEDULE_PROMOTE = <<~LUA
48
+ local jobs = redis.call("zrangebyscore", KEYS[1], "-inf", ARGV[1])
49
+ for i = 1, #jobs do
50
+ local job = jobs[i]
51
+ local q = cjson.decode(job)["queue"]
52
+ redis.call("sadd", KEYS[2], q)
53
+ redis.call("lpush", ARGV[2] .. q, job)
54
+ redis.call("zrem", KEYS[1], job)
55
+ end
56
+ return #jobs
57
+ LUA
58
+
59
+ # Pro Batch: register a job into a batch and push it to its queue
60
+ # atomically. Keeps total/pending in sync with the jids set.
61
+ # KEYS = [b-<bid>, b-<bid>-jids, queue_list, queues_set]
62
+ # ARGV = [queue_name, jid, job_json]
63
+ # Returns 1.
64
+ BATCH_PUSH = <<~LUA
65
+ redis.call("hincrby", KEYS[1], "total", 1)
66
+ redis.call("hincrby", KEYS[1], "pending", 1)
67
+ redis.call("sadd", KEYS[2], ARGV[2])
68
+ redis.call("sadd", KEYS[4], ARGV[1])
69
+ redis.call("lpush", KEYS[3], ARGV[3])
70
+ return 1
71
+ LUA
72
+
73
+ # Pro Batch: ACK a job that completed successfully. SREM from the live
74
+ # jids set and decrement pending iff the jid was a member (idempotent
75
+ # against double-success on a flaky retry).
76
+ # KEYS = [b-<bid>, b-<bid>-jids]
77
+ # ARGV = [jid]
78
+ # Returns [new_pending, live_jids_remaining], or [-1, -1] when the jid
79
+ # was not a member (treat as already acked).
80
+ BATCH_ACK_SUCCESS = <<~LUA
81
+ local removed = redis.call("srem", KEYS[2], ARGV[1])
82
+ if removed == 1 then
83
+ local pending = redis.call("hincrby", KEYS[1], "pending", -1)
84
+ return { pending, redis.call("scard", KEYS[2]) }
85
+ end
86
+ return { -1, -1 }
87
+ LUA
88
+
89
+ # Pro Batch: ACK a job that exhausted retries and died. Records death,
90
+ # bumps failures, and SREMs from live jids so the batch can fire
91
+ # `:complete` even with terminally failed jobs.
92
+ # KEYS = [b-<bid>, b-<bid>-jids, b-<bid>-died, b-<bid>-failed]
93
+ # ARGV = [jid]
94
+ # Returns [live_jids_remaining, died_count, first_death]. `first_death`
95
+ # is 1 the first time *any* jid is SADDed into the died set, 0 thereafter
96
+ # — caller uses it to fire `:death` exactly once per batch.
97
+ BATCH_ACK_COMPLETE = <<~LUA
98
+ local was_pre_existing_death = redis.call("scard", KEYS[3])
99
+ redis.call("srem", KEYS[2], ARGV[1])
100
+ redis.call("sadd", KEYS[4], ARGV[1])
101
+ local died_added = redis.call("sadd", KEYS[3], ARGV[1])
102
+ redis.call("hincrby", KEYS[1], "failures", 1)
103
+ local first_death = 0
104
+ if was_pre_existing_death == 0 and died_added == 1 then
105
+ first_death = 1
106
+ end
107
+ return { redis.call("scard", KEYS[2]), redis.call("scard", KEYS[3]), first_death }
108
+ LUA
109
+
110
+ # Pro Batch: invalidate all pending jobs. The jobs themselves stay
111
+ # in their queues — the server middleware short-circuits when it sees
112
+ # the invalidated flag — but the jids set is cleared so the batch can
113
+ # no longer accept completion callbacks.
114
+ # KEYS = [b-<bid>, b-<bid>-jids]
115
+ # ARGV = []
116
+ # Returns 1.
117
+ BATCH_INVALIDATE = <<~LUA
118
+ redis.call("del", KEYS[2])
119
+ redis.call("hset", KEYS[1], "invalidated", "1")
120
+ return 1
121
+ LUA
122
+
123
+ # Pro Fast API (§11): server-side LRANGE+LREM to delete a single job by
124
+ # jid from a queue list. Pure-Ruby Queue#find_job + JobRecord#delete is
125
+ # O(N) round-trips; this is O(1) round-trip with O(N) Lua work.
126
+ # KEYS = [queue:<name>]
127
+ # ARGV = [jid]
128
+ # Returns the number of payloads removed (0 or 1; can be >1 in pathological
129
+ # duplicate-jid corruption — caller doesn't rely on the value).
130
+ FAST_DELETE_JOB = <<~LUA
131
+ local items = redis.call("lrange", KEYS[1], 0, -1)
132
+ local removed = 0
133
+ for i = 1, #items do
134
+ if string.find(items[i], '"jid":"' .. ARGV[1] .. '"', 1, true) then
135
+ removed = removed + redis.call("lrem", KEYS[1], 1, items[i])
136
+ end
137
+ end
138
+ return removed
139
+ LUA
140
+
141
+ # Pro Fast API (§11): server-side LRANGE+LREM removing every payload whose
142
+ # `"class":"<klass>"` field matches. Plain-text scan (no JSON parse) so
143
+ # it tolerates partial corruption — caller drops only well-formed matches.
144
+ # KEYS = [queue:<name>]
145
+ # ARGV = [klass]
146
+ # Returns the number of payloads removed.
147
+ FAST_DELETE_BY_CLASS = <<~LUA
148
+ local items = redis.call("lrange", KEYS[1], 0, -1)
149
+ local removed = 0
150
+ local needle = '"class":"' .. ARGV[1] .. '"'
151
+ for i = 1, #items do
152
+ if string.find(items[i], needle, 1, true) then
153
+ removed = removed + redis.call("lrem", KEYS[1], 1, items[i])
154
+ end
155
+ end
156
+ return removed
157
+ LUA
158
+
159
+ # Limiter scripts live in `lib/wurk/lua/limiter_*.lua` — one file per
160
+ # type. Loaded at boot, the file's basename (minus `.lua`) becomes the
161
+ # SCRIPTS key as a symbol. Keeping them as separate files makes diffing
162
+ # individual rate-limiter changes painless and keeps each script self-
163
+ # contained for the `redis-cli --eval` debug workflow.
164
+ LUA_DIR = File.expand_path('lua', __dir__)
165
+ FILE_SCRIPTS = Dir.glob(File.join(LUA_DIR, '*.lua')).each_with_object({}) do |path, h|
166
+ h[File.basename(path, '.lua').to_sym] = File.read(path)
167
+ end.freeze
168
+
169
+ SCRIPTS = {
170
+ zpopbyscore: ZPOPBYSCORE,
171
+ bulk_push: BULK_PUSH,
172
+ reliable_schedule_promote: RELIABLE_SCHEDULE_PROMOTE,
173
+ batch_push: BATCH_PUSH,
174
+ batch_ack_success: BATCH_ACK_SUCCESS,
175
+ batch_ack_complete: BATCH_ACK_COMPLETE,
176
+ batch_invalidate: BATCH_INVALIDATE,
177
+ fast_delete_job: FAST_DELETE_JOB,
178
+ fast_delete_by_class: FAST_DELETE_BY_CLASS
179
+ }.merge(FILE_SCRIPTS).freeze
180
+
181
+ # SHA1 of each script source — matches what `SCRIPT LOAD` returns.
182
+ # Precomputing keeps `eval_cached` allocation-free in the hot path.
183
+ SHAS = SCRIPTS.transform_values { |src| Digest::SHA1.hexdigest(src) }.freeze
184
+ end
185
+ end
186
+
187
+ require_relative 'lua/loader'
@@ -0,0 +1,132 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'component'
4
+ require_relative 'processor'
5
+
6
+ module Wurk
7
+ # One per Capsule. Lives inside each forked child and owns the Processor
8
+ # pool. Replaces dead processors on the fly (replace-on-die), forwards
9
+ # the quiet/stop signals received by the Swarm to its processors, and
10
+ # ensures in-flight UnitsOfWork are bulk_requeued before threads are killed.
11
+ #
12
+ # Lifecycle:
13
+ # * `start` — spawn each processor thread.
14
+ # * `quiet` — stop fetching; in-flight jobs run to completion.
15
+ # * `stop(deadline)`— quiet + wait for drain; hard_shutdown on timeout.
16
+ #
17
+ # Spec: docs/target/sidekiq-free.md §13.
18
+ class Manager
19
+ include Component
20
+
21
+ # 0.1 in TTY mode so interactive shutdown feels snappy; 0.5 in
22
+ # production so the supervisor isn't spinning while threads drain.
23
+ PAUSE_TIME = $stdout.tty? ? 0.1 : 0.5
24
+
25
+ attr_reader :workers, :capsule
26
+
27
+ def initialize(capsule)
28
+ @config = @capsule = capsule
29
+ @count = capsule.concurrency
30
+ raise ArgumentError, "Concurrency of #{@count} is not supported" if @count < 1
31
+
32
+ @done = false
33
+ @workers = Set.new
34
+ @plock = ::Mutex.new
35
+ @count.times do
36
+ @workers << Processor.new(@capsule, &method(:processor_result))
37
+ end
38
+ end
39
+
40
+ def start
41
+ @workers.each(&:start)
42
+ end
43
+
44
+ def quiet
45
+ return if @done
46
+
47
+ @done = true
48
+ logger.info { "Terminating quiet threads for #{capsule.name} capsule" }
49
+ @workers.each(&:terminate)
50
+ end
51
+
52
+ # Graceful shutdown: quiet first, then poll for workers to clear.
53
+ # If the deadline elapses with workers still alive we fall through to
54
+ # hard_shutdown, which bulk_requeues their UoWs before killing threads.
55
+ def stop(deadline)
56
+ quiet
57
+ # Lifecycle hooks (e.g. :quiet) can be async; give them a tick to settle
58
+ # before we start polling. Matches Sidekiq's PAUSE_TIME behavior.
59
+ sleep PAUSE_TIME
60
+ return if @workers.empty?
61
+
62
+ logger.info { 'Pausing to allow jobs to finish...' }
63
+ wait_for(deadline) { @workers.empty? }
64
+ return if @workers.empty?
65
+
66
+ hard_shutdown
67
+ ensure
68
+ capsule.stop
69
+ end
70
+
71
+ def stopped?
72
+ @done
73
+ end
74
+
75
+ # Processor#run callback: invoked when a Processor thread exits, whether
76
+ # cleanly or via raised exception. Removes the dead processor from the
77
+ # pool and (unless we're already stopping) spawns a replacement so the
78
+ # capsule's concurrency stays constant.
79
+ def processor_result(processor, _reason = nil)
80
+ @plock.synchronize do
81
+ @workers.delete(processor)
82
+ unless @done
83
+ p = Processor.new(@capsule, &method(:processor_result))
84
+ @workers << p
85
+ p.start
86
+ end
87
+ end
88
+ end
89
+
90
+ # Reached when the deadline expired with workers still busy. We must
91
+ # push their in-flight UoWs back to the public queues BEFORE raising
92
+ # Wurk::Shutdown into the threads — losing a job is worse than running
93
+ # it twice (Sidekiq's at-least-once contract).
94
+ def hard_shutdown # rubocop:disable Metrics/AbcSize
95
+ cleanup = nil
96
+ @plock.synchronize do
97
+ cleanup = @workers.dup
98
+ end
99
+
100
+ if cleanup.any?
101
+ jobs = cleanup.map(&:job).compact
102
+
103
+ logger.warn { "Terminating #{cleanup.size} busy threads" }
104
+ logger.debug { "Jobs still in progress #{jobs.inspect}" }
105
+
106
+ capsule.fetcher.bulk_requeue(jobs)
107
+ end
108
+
109
+ cleanup.each(&:kill)
110
+
111
+ # The caller typically `exit`s immediately after we return; give
112
+ # threads a brief window to run their `ensure` blocks.
113
+ deadline = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC) + 3
114
+ wait_for(deadline) { @workers.empty? }
115
+ end
116
+
117
+ private
118
+
119
+ # Polls `condblock` until it returns true or the monotonic deadline
120
+ # passes. The PAUSE_TIME floor stops us from spinning when only a few
121
+ # milliseconds remain.
122
+ def wait_for(deadline)
123
+ remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
124
+ while remaining > PAUSE_TIME
125
+ return if yield
126
+
127
+ sleep PAUSE_TIME
128
+ remaining = deadline - ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
129
+ end
130
+ end
131
+ end
132
+ end
@@ -0,0 +1,151 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../middleware'
4
+
5
+ module Wurk
6
+ module Metrics
7
+ # Ent feature parity (§5): server middleware that records per-job-class
8
+ # execution metrics into Redis time-buckets. The on-the-wire schema is
9
+ # wire-compat with Sidekiq 8.x's history pane so dashboards built against
10
+ # `j|YYMMDD|H:M` HASH keys keep working unchanged.
11
+ #
12
+ # Bucket layout (spec: docs/target/sidekiq-free.md §1.6):
13
+ #
14
+ # j|YYMMDD|H:M HASH per-minute bucket, TTL = MID_TERM (3 days)
15
+ # <klass>|p INT processed count
16
+ # <klass>|f INT failed count
17
+ # <klass>|ms INT total ms spent
18
+ #
19
+ # j|YYMMDD|H:m0 HASH 10-minute rollup (last digit zeroed),
20
+ # TTL = SHORT_TERM (8 hours) — short window for
21
+ # quick aggregate queries without scanning 600 minute keys.
22
+ #
23
+ # <klass>-YYMMDD-H HASH per-class hourly histogram, TTL = MID_TERM
24
+ #
25
+ # Every bucket TTL is set on first write (EXPIRE NX-equivalent: only when
26
+ # the HASH was newly created in this call) — re-asserting TTL on every
27
+ # write would keep the bucket alive indefinitely while traffic continues,
28
+ # but that's the desired behavior here: as long as a class keeps running,
29
+ # we keep the minute bucket around for the retention window measured from
30
+ # *last write*, not from first write. So we EXPIRE unconditionally.
31
+ #
32
+ # The middleware is hot-path — every successful job pays for it. Writes
33
+ # are pipelined in a single round-trip per job (1 HINCRBY × 3 + 1 EXPIRE
34
+ # per bucket × 3 buckets = 12 commands, batched).
35
+ class History
36
+ include Wurk::Middleware::ServerMiddleware
37
+
38
+ # Per spec §1.6 — naming mirrors the upstream constants so anyone
39
+ # grepping the Sidekiq source for `MID_TERM` lands here.
40
+ MID_TERM = 3 * 24 * 60 * 60 # 3 days, in seconds
41
+ SHORT_TERM = 8 * 60 * 60 # 8 hours, in seconds
42
+
43
+ MINUTE_KEY_PREFIX = 'j|'
44
+ DATE_FORMAT = '%y%m%d' # YYMMDD — two-digit year per spec
45
+
46
+ def call(_worker, job, _queue)
47
+ klass = job['class']
48
+ started = monotonic_ms
49
+ success = false
50
+ begin
51
+ result = yield
52
+ success = true
53
+ result
54
+ ensure
55
+ duration = (monotonic_ms - started).round
56
+ # Best-effort: a metrics write failure must never propagate into
57
+ # the job result. The processor already finalized the ack path.
58
+ begin
59
+ self.class.record(klass, duration, success: success, redis_pool: redis_pool)
60
+ rescue StandardError => e
61
+ handle_error(e)
62
+ end
63
+ end
64
+ end
65
+
66
+ class << self
67
+ # Single Redis round-trip per job. `success: true` → `<klass>|p`;
68
+ # `success: false` → `<klass>|f`. `<klass>|ms` accumulates total
69
+ # runtime in milliseconds for *both* outcomes (so an operator can
70
+ # ask "how much wall-clock time has FooJob consumed?" without
71
+ # branching on outcome).
72
+ def record(klass, duration_ms, success:, redis_pool: nil, at: ::Time.now)
73
+ return if klass.nil? || klass.empty?
74
+
75
+ ms = duration_ms.to_i
76
+ ms = 0 if ms.negative?
77
+ buckets = { minute: minute_key(at), rollup: rollup_key(at), hour: hour_key(klass, at) }
78
+ with_pool(redis_pool) { |conn| pipeline_write(conn, klass, ms, success, buckets) }
79
+ nil
80
+ end
81
+
82
+ # Minute + 10-min rollup share a per-class `|p|f|ms` field layout;
83
+ # the hourly bucket is already class-scoped so its fields are bare
84
+ # `p|f|ms`. Pipeline all 9 commands in one round-trip.
85
+ def pipeline_write(conn, klass, ms, success, buckets)
86
+ outcome = success ? 'p' : 'f'
87
+ class_outcome = "#{klass}|#{outcome}"
88
+ class_ms = "#{klass}|ms"
89
+ conn.pipelined do |pipe|
90
+ incr_bucket(pipe, buckets[:minute], [class_outcome, class_ms, ms, MID_TERM])
91
+ # The minute key and the 10-min rollup key coincide whenever the
92
+ # minute ends in 0 (rollup zeroes the last digit). Writing both
93
+ # would double-count the shared field — the minute write above
94
+ # already lands on it — so skip the rollup write then. Minutes
95
+ # x1..x9 still accumulate into the x0 rollup key as normal.
96
+ unless buckets[:rollup] == buckets[:minute]
97
+ incr_bucket(pipe, buckets[:rollup], [class_outcome, class_ms, ms, SHORT_TERM])
98
+ end
99
+ incr_bucket(pipe, buckets[:hour], [outcome, 'ms', ms, MID_TERM])
100
+ end
101
+ end
102
+
103
+ def incr_bucket(pipe, key, parts)
104
+ outcome_field, ms_field, ms, ttl = parts
105
+ pipe.call('HINCRBY', key, outcome_field, 1)
106
+ pipe.call('HINCRBY', key, ms_field, ms)
107
+ pipe.call('EXPIRE', key, ttl)
108
+ end
109
+
110
+ # Public formatters — Wurk::Metrics::Query reuses these so the two
111
+ # cannot drift on bucket-naming convention.
112
+ def minute_key(time)
113
+ t = time.utc
114
+ format("#{MINUTE_KEY_PREFIX}%<date>s|%<hr>d:%<min>d",
115
+ date: t.strftime(DATE_FORMAT), hr: t.hour, min: t.min)
116
+ end
117
+
118
+ def rollup_key(time)
119
+ t = time.utc
120
+ format("#{MINUTE_KEY_PREFIX}%<date>s|%<hr>d:%<min>d",
121
+ date: t.strftime(DATE_FORMAT), hr: t.hour, min: (t.min / 10) * 10)
122
+ end
123
+
124
+ def hour_key(klass, time)
125
+ t = time.utc
126
+ "#{klass}-#{t.strftime(DATE_FORMAT)}-#{t.hour}"
127
+ end
128
+ end
129
+
130
+ private
131
+
132
+ def monotonic_ms
133
+ ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :float_millisecond)
134
+ end
135
+
136
+ def handle_error(err)
137
+ cfg = config || Wurk.configuration
138
+ cfg.handle_exception(err, context: 'Wurk::Metrics::History')
139
+ end
140
+
141
+ def self.with_pool(pool, &)
142
+ if pool
143
+ pool.with(&)
144
+ else
145
+ Wurk.redis(&)
146
+ end
147
+ end
148
+ private_class_method :with_pool
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,173 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'history'
4
+ require_relative 'rollup'
5
+
6
+ module Wurk
7
+ module Metrics
8
+ # Read-side for the per-class HASH bucket schema written by
9
+ # `Wurk::Metrics::History`. Backs the Web UI's history pane and any
10
+ # external dashboarding code; both rely on the same HGETALL fan-out
11
+ # over a contiguous range of minute / hour keys.
12
+ #
13
+ # Window caps are spec-enforced:
14
+ #
15
+ # minutes ≤ 480 (8h — same as the 10-minute rollup retention)
16
+ # hours ≤ 72 (3d — same as MID_TERM retention)
17
+ #
18
+ # A wider window has no data to read anyway (the buckets are TTL'd out),
19
+ # so we fail loudly rather than silently returning sparse results.
20
+ module Query # rubocop:disable Metrics/ModuleLength
21
+ MAX_MINUTES = 480
22
+ MAX_HOURS = 72
23
+ TOTAL_FIELDS = %w[p f ms].freeze
24
+ private_constant :TOTAL_FIELDS
25
+
26
+ class WindowTooWide < ::ArgumentError; end
27
+
28
+ module_function
29
+
30
+ # Aggregate per-job-class totals over a recent window of minute
31
+ # buckets. Returns array of `[class_name, {p:, f:, ms:}]` tuples
32
+ # sorted by volume (p + f) descending so the UI's "top jobs" table
33
+ # renders without a second sort pass.
34
+ def top_jobs(class_filter: nil, minutes: 60, hours: nil, now: ::Time.now)
35
+ minutes = hours * 60 if hours
36
+ cap_hours!(hours) if hours
37
+ rows = aggregate_minutes(now, cap_minutes!(minutes)).to_a
38
+ rows = rows.select { |(k, _)| k.start_with?(class_filter) } if class_filter && !class_filter.empty?
39
+ rows.sort_by { |(_k, s)| -(s[:p] + s[:f]) }
40
+ end
41
+
42
+ # Per-class time-series. `minutes` reads the per-minute bucket; `hours`
43
+ # reads the per-class hourly bucket (separate keys per spec, so a long
44
+ # window doesn't fan out over 4320 minute hashes).
45
+ def for_job(klass, minutes: nil, hours: nil, now: ::Time.now)
46
+ validate_for_job!(klass, minutes, hours)
47
+ minutes ? minute_series(klass, now, cap_minutes!(minutes)) : hour_series(klass, now, cap_hours!(hours))
48
+ end
49
+
50
+ # Cluster-total time-series for the dashboard throughput/failures charts,
51
+ # read from the compact buckets written by Wurk::Metrics::Rollup. `bucket`
52
+ # is '1m'/'5m'/'1h'; `window_seconds` is clamped to that bucket's
53
+ # retention. Returns `[{at:, p:, f:, ms:}, ...]` oldest→newest, gap-filled
54
+ # with zeros so a chart has a continuous x-axis.
55
+ def history(bucket, window_seconds, now: ::Time.now)
56
+ step, ttl = bucket_spec!(bucket)
57
+ starts = bucket_starts(now, step, clamp_history_window!(window_seconds, ttl))
58
+ rows = pipeline_hmget(starts.map { |s| Wurk::Metrics::Rollup.bucket_key(bucket, s) }, %w[p f ms])
59
+ starts.zip(rows).map { |at, (p, f, ms)| { at: at, p: p.to_i, f: f.to_i, ms: ms.to_i } }
60
+ end
61
+
62
+ def bucket_spec!(bucket)
63
+ Wurk::Metrics::Rollup::BUCKETS.fetch(bucket) do
64
+ raise ArgumentError, "bucket must be one of #{Wurk::Metrics::Rollup::BUCKETS.keys.inspect}"
65
+ end
66
+ end
67
+
68
+ def clamp_history_window!(window_seconds, ttl)
69
+ window = Integer(window_seconds)
70
+ raise ArgumentError, 'window must be positive' if window <= 0
71
+
72
+ [window, ttl].min
73
+ end
74
+
75
+ # The last `window/step` step-aligned bucket starts, oldest→newest, so
76
+ # they match the keys the rollup writes.
77
+ def bucket_starts(now, step, window)
78
+ last = (now.to_i / step) * step
79
+ (0...(window / step)).map { |i| last - (i * step) }.reverse
80
+ end
81
+
82
+ def validate_for_job!(klass, minutes, hours)
83
+ raise ArgumentError, 'klass required' if klass.nil? || klass.empty?
84
+ raise ArgumentError, 'pass exactly one of minutes: or hours:' if minutes && hours
85
+ raise ArgumentError, 'pass minutes: or hours:' if minutes.nil? && hours.nil?
86
+ end
87
+
88
+ def cap_minutes!(minutes)
89
+ check_window!(Integer(minutes), MAX_MINUTES, 'minutes')
90
+ end
91
+
92
+ def cap_hours!(hours)
93
+ check_window!(Integer(hours), MAX_HOURS, 'hours')
94
+ end
95
+
96
+ def check_window!(value, max, label)
97
+ raise ArgumentError, "#{label} must be positive" if value <= 0
98
+ raise WindowTooWide, "#{label} must be <= #{max} (got #{value})" if value > max
99
+
100
+ value
101
+ end
102
+
103
+ def aggregate_minutes(now, minutes)
104
+ totals = ::Hash.new { |h, k| h[k] = { p: 0, f: 0, ms: 0 } }
105
+ pipeline_hgetall(minute_keys(now, minutes)).each { |hash| accumulate!(totals, hash) }
106
+ totals
107
+ end
108
+
109
+ def accumulate!(totals, hash)
110
+ return if hash.nil? || hash.empty?
111
+
112
+ hash.each do |field, value|
113
+ klass, kind = field.split('|', 2)
114
+ next unless kind && TOTAL_FIELDS.include?(kind)
115
+
116
+ totals[klass][kind.to_sym] += Integer(value)
117
+ end
118
+ end
119
+
120
+ def minute_series(klass, now, minutes)
121
+ rows = pipeline_hmget(minute_keys(now, minutes), %W[#{klass}|p #{klass}|f #{klass}|ms])
122
+ zip_rows(minute_timestamps(now, minutes), rows)
123
+ end
124
+
125
+ def hour_series(klass, now, hours)
126
+ timestamps = hour_timestamps(now, hours)
127
+ keys = timestamps.map { |t| Wurk::Metrics::History.hour_key(klass, t) }
128
+ zip_rows(timestamps, pipeline_hmget(keys, %w[p f ms]))
129
+ end
130
+
131
+ def zip_rows(timestamps, rows)
132
+ timestamps.zip(rows).map { |at, (p, f, ms)| { at: at, p: p.to_i, f: f.to_i, ms: ms.to_i } }
133
+ end
134
+
135
+ def minute_keys(now, minutes)
136
+ minute_timestamps(now, minutes).map { |t| Wurk::Metrics::History.minute_key(t) }
137
+ end
138
+
139
+ # Truncate to the minute so the bucket boundary matches what the
140
+ # writer used. Fractional-second drift would otherwise pull in an
141
+ # unrelated minute on the edge of the window.
142
+ def minute_timestamps(now, minutes)
143
+ floor = floor_to(now, :min)
144
+ (0...minutes).map { |i| floor - (i * 60) }.reverse
145
+ end
146
+
147
+ def hour_timestamps(now, hours)
148
+ floor = floor_to(now, :hour)
149
+ (0...hours).map { |i| floor - (i * 3600) }.reverse
150
+ end
151
+
152
+ def floor_to(time, unit)
153
+ t = time.utc
154
+ case unit
155
+ when :min then ::Time.utc(t.year, t.month, t.day, t.hour, t.min)
156
+ when :hour then ::Time.utc(t.year, t.month, t.day, t.hour)
157
+ end
158
+ end
159
+
160
+ def pipeline_hgetall(keys)
161
+ return [] if keys.empty?
162
+
163
+ Wurk.redis { |c| c.pipelined { |p| keys.each { |k| p.call('HGETALL', k) } } }
164
+ end
165
+
166
+ def pipeline_hmget(keys, fields)
167
+ return [] if keys.empty?
168
+
169
+ Wurk.redis { |c| c.pipelined { |p| keys.each { |k| p.call('HMGET', k, *fields) } } }
170
+ end
171
+ end
172
+ end
173
+ end