@strav/queue 0.3.30 → 0.3.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -3
- package/src/queue/circuit_breaker.ts +135 -0
- package/src/queue/index.ts +11 -0
- package/src/queue/queue.ts +152 -6
- package/src/queue/worker.ts +55 -3
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@strav/queue",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.33",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Background job processing and task scheduling for the Strav framework",
|
|
6
6
|
"license": "MIT",
|
|
@@ -28,8 +28,8 @@
|
|
|
28
28
|
"./providers/*": "./src/providers/*.ts"
|
|
29
29
|
},
|
|
30
30
|
"peerDependencies": {
|
|
31
|
-
"@strav/kernel": "0.3.
|
|
32
|
-
"@strav/database": "0.3.
|
|
31
|
+
"@strav/kernel": "0.3.33",
|
|
32
|
+
"@strav/database": "0.3.33"
|
|
33
33
|
},
|
|
34
34
|
"scripts": {
|
|
35
35
|
"test": "bun test tests/",
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-handler circuit breaker. Tracks recent failure timestamps in
|
|
3
|
+
* memory; trips the circuit when the failure count within the window
|
|
4
|
+
* exceeds the threshold and pauses dispatch of that handler for
|
|
5
|
+
* `cooldownMs`. Dispatches are auto-resumed once the cooldown expires.
|
|
6
|
+
*
|
|
7
|
+
* Intended defense against retry storms — a handler that consistently
|
|
8
|
+
* fails (stale schema, downed dependency) shouldn't keep eating worker
|
|
9
|
+
* cycles and DB connections. Tripping pushes failed jobs back to the
|
|
10
|
+
* queue with a delay so they retry AFTER the cooldown.
|
|
11
|
+
*
|
|
12
|
+
* State is per-process (in-memory). Multi-worker deployments will each
|
|
13
|
+
* track independently — that's fine; each worker self-pauses without
|
|
14
|
+
* cross-talk, and a handler that's failing for a global reason will
|
|
15
|
+
* trip every worker quickly.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import Emitter from '@strav/kernel/events/emitter'
|
|
19
|
+
|
|
20
|
+
export interface CircuitBreakerOptions {
|
|
21
|
+
/** Number of failures within the window that trips the breaker. Default: 10. */
|
|
22
|
+
threshold?: number
|
|
23
|
+
/** Window in ms over which failures are counted. Default: 60_000 (1 min). */
|
|
24
|
+
windowMs?: number
|
|
25
|
+
/** Cooldown in ms after tripping before retry resumes. Default: 30_000 (30 s). */
|
|
26
|
+
cooldownMs?: number
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface ResolvedBreakerOptions {
|
|
30
|
+
threshold: number
|
|
31
|
+
windowMs: number
|
|
32
|
+
cooldownMs: number
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
interface BreakerState {
|
|
36
|
+
options: ResolvedBreakerOptions
|
|
37
|
+
failures: number[] // unix-ms timestamps, recent-first not enforced
|
|
38
|
+
trippedUntil: number | null // unix-ms; null when closed
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const DEFAULTS: ResolvedBreakerOptions = {
|
|
42
|
+
threshold: 10,
|
|
43
|
+
windowMs: 60_000,
|
|
44
|
+
cooldownMs: 30_000,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const breakers = new Map<string, BreakerState>()
|
|
48
|
+
|
|
49
|
+
/** Register / update a breaker for a handler. */
|
|
50
|
+
export function configureBreaker(handlerName: string, options: CircuitBreakerOptions): void {
|
|
51
|
+
breakers.set(handlerName, {
|
|
52
|
+
options: { ...DEFAULTS, ...options },
|
|
53
|
+
failures: [],
|
|
54
|
+
trippedUntil: null,
|
|
55
|
+
})
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** Forget all breaker state. Test-only. */
|
|
59
|
+
export function resetBreakers(): void {
|
|
60
|
+
breakers.clear()
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Check if a handler is currently tripped. Returns the remaining
|
|
65
|
+
* cooldown in ms (>= 0) when tripped, or `null` when the circuit is
|
|
66
|
+
* closed (handler is dispatchable). Auto-resets state when the
|
|
67
|
+
* cooldown has elapsed and emits `queue:circuit_reset` once on
|
|
68
|
+
* transition.
|
|
69
|
+
*/
|
|
70
|
+
export function checkBreaker(handlerName: string, now: number = Date.now()): number | null {
|
|
71
|
+
const state = breakers.get(handlerName)
|
|
72
|
+
if (!state) return null
|
|
73
|
+
if (state.trippedUntil === null) return null
|
|
74
|
+
|
|
75
|
+
if (now >= state.trippedUntil) {
|
|
76
|
+
// Cooldown expired — close the circuit. Reset failure history so
|
|
77
|
+
// the next set of failures starts a fresh window.
|
|
78
|
+
state.trippedUntil = null
|
|
79
|
+
state.failures = []
|
|
80
|
+
if (Emitter.listenerCount('queue:circuit_reset') > 0) {
|
|
81
|
+
void Emitter.emit('queue:circuit_reset', { handler: handlerName }).catch(() => {})
|
|
82
|
+
}
|
|
83
|
+
return null
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return state.trippedUntil - now
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Record a failure for a handler. Trips the circuit when the failure
|
|
91
|
+
* count within `windowMs` reaches `threshold`. Returns the new cooldown
|
|
92
|
+
* (ms) when tripping, or `null` when the threshold is not yet reached.
|
|
93
|
+
*/
|
|
94
|
+
export function recordFailure(handlerName: string, now: number = Date.now()): number | null {
|
|
95
|
+
const state = breakers.get(handlerName)
|
|
96
|
+
if (!state) return null
|
|
97
|
+
|
|
98
|
+
// Drop failures outside the window then push the new one.
|
|
99
|
+
const cutoff = now - state.options.windowMs
|
|
100
|
+
state.failures = state.failures.filter(t => t > cutoff)
|
|
101
|
+
state.failures.push(now)
|
|
102
|
+
|
|
103
|
+
if (state.trippedUntil !== null) {
|
|
104
|
+
// Already tripped — do nothing.
|
|
105
|
+
return state.trippedUntil - now
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (state.failures.length >= state.options.threshold) {
|
|
109
|
+
state.trippedUntil = now + state.options.cooldownMs
|
|
110
|
+
if (Emitter.listenerCount('queue:circuit_tripped') > 0) {
|
|
111
|
+
void Emitter.emit('queue:circuit_tripped', {
|
|
112
|
+
handler: handlerName,
|
|
113
|
+
threshold: state.options.threshold,
|
|
114
|
+
windowMs: state.options.windowMs,
|
|
115
|
+
cooldownMs: state.options.cooldownMs,
|
|
116
|
+
trippedUntil: state.trippedUntil,
|
|
117
|
+
}).catch(() => {})
|
|
118
|
+
}
|
|
119
|
+
return state.options.cooldownMs
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return null
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Record a success — clears the failure history for this handler so
|
|
127
|
+
* intermittent errors don't accumulate. Does NOT close a tripped
|
|
128
|
+
* circuit (only the cooldown expiry does).
|
|
129
|
+
*/
|
|
130
|
+
export function recordSuccess(handlerName: string): void {
|
|
131
|
+
const state = breakers.get(handlerName)
|
|
132
|
+
if (!state) return
|
|
133
|
+
if (state.trippedUntil !== null) return
|
|
134
|
+
state.failures = []
|
|
135
|
+
}
|
package/src/queue/index.ts
CHANGED
|
@@ -7,5 +7,16 @@ export type {
|
|
|
7
7
|
JobRecord,
|
|
8
8
|
FailedJobRecord,
|
|
9
9
|
JobHandler,
|
|
10
|
+
JobHandlerOptions,
|
|
11
|
+
JobHandlerRegistration,
|
|
12
|
+
JobPayloadSchema,
|
|
10
13
|
} from './queue.ts'
|
|
11
14
|
export type { WorkerOptions } from './worker.ts'
|
|
15
|
+
export {
|
|
16
|
+
configureBreaker,
|
|
17
|
+
checkBreaker,
|
|
18
|
+
recordFailure,
|
|
19
|
+
recordSuccess,
|
|
20
|
+
resetBreakers,
|
|
21
|
+
} from './circuit_breaker.ts'
|
|
22
|
+
export type { CircuitBreakerOptions, ResolvedBreakerOptions } from './circuit_breaker.ts'
|
package/src/queue/queue.ts
CHANGED
|
@@ -3,6 +3,7 @@ import Configuration from '@strav/kernel/config/configuration'
|
|
|
3
3
|
import Database from '@strav/database/database/database'
|
|
4
4
|
import Emitter from '@strav/kernel/events/emitter'
|
|
5
5
|
import { ConfigurationError } from '@strav/kernel/exceptions/errors'
|
|
6
|
+
import { configureBreaker, type CircuitBreakerOptions } from './circuit_breaker.ts'
|
|
6
7
|
|
|
7
8
|
export interface JobOptions {
|
|
8
9
|
queue?: string
|
|
@@ -26,6 +27,29 @@ export interface JobMeta {
|
|
|
26
27
|
job: string
|
|
27
28
|
attempts: number
|
|
28
29
|
maxAttempts: number
|
|
30
|
+
/**
|
|
31
|
+
* Report progress for a long-running job. `value` is `0..1`. The reported
|
|
32
|
+
* value is persisted to the job row so external consumers can poll via
|
|
33
|
+
* {@link Queue.progressOf}, and a `queue:progress` event is emitted for
|
|
34
|
+
* live consumers (e.g. SSE).
|
|
35
|
+
*
|
|
36
|
+
* Returns immediately after persisting; safe to call from a tight loop
|
|
37
|
+
* but throttle to avoid hammering the database (e.g. every N rows or
|
|
38
|
+
* every 1 s).
|
|
39
|
+
*/
|
|
40
|
+
progress: (value: number, message?: string) => Promise<void>
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/** Snapshot of a job's current progress, returned by {@link Queue.progressOf}. */
|
|
44
|
+
export interface JobProgress {
|
|
45
|
+
/** Job id. */
|
|
46
|
+
id: number
|
|
47
|
+
/** 0..1, last reported by the handler. */
|
|
48
|
+
value: number
|
|
49
|
+
/** Optional human-readable message attached to the last update. */
|
|
50
|
+
message: string | null
|
|
51
|
+
/** Current attempt count. */
|
|
52
|
+
attempts: number
|
|
29
53
|
}
|
|
30
54
|
|
|
31
55
|
/** A raw job row from the _strav_jobs table. */
|
|
@@ -54,6 +78,46 @@ export interface FailedJobRecord {
|
|
|
54
78
|
|
|
55
79
|
export type JobHandler<T = any> = (payload: T, meta: JobMeta) => void | Promise<void>
|
|
56
80
|
|
|
81
|
+
/**
|
|
82
|
+
* Minimal "schema-like" shape — anything that exposes `parse(input)`
|
|
83
|
+
* (Zod, ArkType, Valibot, hand-written validators) works. The schema
|
|
84
|
+
* is invoked at dequeue time, BEFORE the handler runs, so a tampered
|
|
85
|
+
* row in the DB or a payload from an older code revision is rejected
|
|
86
|
+
* loudly instead of executing with a half-formed shape.
|
|
87
|
+
*/
|
|
88
|
+
export interface JobPayloadSchema<T = unknown> {
|
|
89
|
+
parse(input: unknown): T
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/** Per-handler registration options. */
|
|
93
|
+
export interface JobHandlerOptions<T = any> {
|
|
94
|
+
/**
|
|
95
|
+
* Optional payload schema. When set, the worker calls `schema.parse(payload)`
|
|
96
|
+
* before invoking the handler; a parse failure routes the job to
|
|
97
|
+
* `_strav_failed_jobs` with the validation error message.
|
|
98
|
+
*
|
|
99
|
+
* Recommended for any handler whose payload comes from an external
|
|
100
|
+
* source (HTTP webhook, customer upload) or whose code has churned
|
|
101
|
+
* since older jobs were enqueued — the parse is a fail-fast invariant
|
|
102
|
+
* that catches drift before the handler corrupts state.
|
|
103
|
+
*/
|
|
104
|
+
schema?: JobPayloadSchema<T>
|
|
105
|
+
/**
|
|
106
|
+
* Per-handler circuit breaker. Trips when the failure count within
|
|
107
|
+
* `windowMs` reaches `threshold`, pausing dispatch for `cooldownMs`.
|
|
108
|
+
* Defends against retry storms — a stale-schema or downed-dependency
|
|
109
|
+
* handler shouldn't keep eating worker cycles. Defaults: threshold
|
|
110
|
+
* 10, windowMs 60_000, cooldownMs 30_000.
|
|
111
|
+
*/
|
|
112
|
+
circuitBreaker?: CircuitBreakerOptions
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/** Internal registration record stored in Queue._handlers. */
|
|
116
|
+
export interface JobHandlerRegistration<T = any> {
|
|
117
|
+
handler: JobHandler<T>
|
|
118
|
+
schema?: JobPayloadSchema<T>
|
|
119
|
+
}
|
|
120
|
+
|
|
57
121
|
/**
|
|
58
122
|
* PostgreSQL-backed job queue.
|
|
59
123
|
*
|
|
@@ -72,7 +136,7 @@ export type JobHandler<T = any> = (payload: T, meta: JobMeta) => void | Promise<
|
|
|
72
136
|
export default class Queue {
|
|
73
137
|
private static _db: Database
|
|
74
138
|
private static _config: QueueConfig
|
|
75
|
-
private static _handlers = new Map<string,
|
|
139
|
+
private static _handlers = new Map<string, JobHandlerRegistration>()
|
|
76
140
|
|
|
77
141
|
constructor(db: Database, config: Configuration) {
|
|
78
142
|
Queue._db = db
|
|
@@ -97,7 +161,7 @@ export default class Queue {
|
|
|
97
161
|
return Queue._config
|
|
98
162
|
}
|
|
99
163
|
|
|
100
|
-
static get handlers(): Map<string,
|
|
164
|
+
static get handlers(): Map<string, JobHandlerRegistration> {
|
|
101
165
|
return Queue._handlers
|
|
102
166
|
}
|
|
103
167
|
|
|
@@ -116,10 +180,23 @@ export default class Queue {
|
|
|
116
180
|
"timeout" INT NOT NULL DEFAULT 60000,
|
|
117
181
|
"available_at" TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
118
182
|
"reserved_at" TIMESTAMPTZ,
|
|
119
|
-
"created_at" TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
183
|
+
"created_at" TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
184
|
+
"progress" NUMERIC NOT NULL DEFAULT 0,
|
|
185
|
+
"progress_message" TEXT
|
|
120
186
|
)
|
|
121
187
|
`
|
|
122
188
|
|
|
189
|
+
// Additive migrations for progress columns — for tables that existed
|
|
190
|
+
// before progress reporting was introduced.
|
|
191
|
+
await sql`
|
|
192
|
+
ALTER TABLE "_strav_jobs"
|
|
193
|
+
ADD COLUMN IF NOT EXISTS "progress" NUMERIC NOT NULL DEFAULT 0
|
|
194
|
+
`
|
|
195
|
+
await sql`
|
|
196
|
+
ALTER TABLE "_strav_jobs"
|
|
197
|
+
ADD COLUMN IF NOT EXISTS "progress_message" TEXT
|
|
198
|
+
`
|
|
199
|
+
|
|
123
200
|
await sql`
|
|
124
201
|
CREATE INDEX IF NOT EXISTS "idx_strav_jobs_queue_available"
|
|
125
202
|
ON "_strav_jobs" ("queue", "available_at")
|
|
@@ -138,9 +215,27 @@ export default class Queue {
|
|
|
138
215
|
`
|
|
139
216
|
}
|
|
140
217
|
|
|
141
|
-
/**
|
|
142
|
-
|
|
143
|
-
|
|
218
|
+
/**
|
|
219
|
+
* Register a handler for a named job. Pass `options.schema` to have
|
|
220
|
+
* the worker validate the payload (Zod / ArkType / etc.) before
|
|
221
|
+
* invoking the handler — a parse failure routes the job to
|
|
222
|
+
* `_strav_failed_jobs` instead of running the handler with bad data.
|
|
223
|
+
*
|
|
224
|
+
* @example
|
|
225
|
+
* import { z } from 'zod'
|
|
226
|
+
* Queue.handle('send-email', async (payload) => { ... }, {
|
|
227
|
+
* schema: z.object({ to: z.string().email(), subject: z.string() }),
|
|
228
|
+
* })
|
|
229
|
+
*/
|
|
230
|
+
static handle<T = any>(
|
|
231
|
+
name: string,
|
|
232
|
+
handler: JobHandler<T>,
|
|
233
|
+
options?: JobHandlerOptions<T>
|
|
234
|
+
): void {
|
|
235
|
+
Queue._handlers.set(name, { handler, schema: options?.schema })
|
|
236
|
+
if (options?.circuitBreaker) {
|
|
237
|
+
configureBreaker(name, options.circuitBreaker)
|
|
238
|
+
}
|
|
144
239
|
}
|
|
145
240
|
|
|
146
241
|
/**
|
|
@@ -168,6 +263,57 @@ export default class Queue {
|
|
|
168
263
|
return id
|
|
169
264
|
}
|
|
170
265
|
|
|
266
|
+
/**
|
|
267
|
+
* Persist progress for an in-flight job and emit a `queue:progress` event.
|
|
268
|
+
* Called by the `JobMeta.progress` callback that workers hand to handlers,
|
|
269
|
+
* but exposed statically so other code (e.g. retry replay tools) can update
|
|
270
|
+
* progress directly. `value` is clamped to `[0, 1]`.
|
|
271
|
+
*/
|
|
272
|
+
static async reportProgress(
|
|
273
|
+
id: number,
|
|
274
|
+
value: number,
|
|
275
|
+
message?: string
|
|
276
|
+
): Promise<void> {
|
|
277
|
+
const sql = Queue.db.sql
|
|
278
|
+
const clamped = Math.max(0, Math.min(1, value))
|
|
279
|
+
const msg = message ?? null
|
|
280
|
+
await sql`
|
|
281
|
+
UPDATE "_strav_jobs"
|
|
282
|
+
SET "progress" = ${clamped}, "progress_message" = ${msg}
|
|
283
|
+
WHERE "id" = ${id}
|
|
284
|
+
`
|
|
285
|
+
if (Emitter.listenerCount('queue:progress') > 0) {
|
|
286
|
+
Emitter.emit('queue:progress', {
|
|
287
|
+
id,
|
|
288
|
+
value: clamped,
|
|
289
|
+
message: msg,
|
|
290
|
+
}).catch(() => {})
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* Read the latest progress snapshot for a job. Returns `null` once the
|
|
296
|
+
* job has completed (the row is deleted on success) or if the id is
|
|
297
|
+
* unknown.
|
|
298
|
+
*/
|
|
299
|
+
static async progressOf(id: number): Promise<JobProgress | null> {
|
|
300
|
+
const sql = Queue.db.sql
|
|
301
|
+
const rows = await sql`
|
|
302
|
+
SELECT "id", "progress", "progress_message", "attempts"
|
|
303
|
+
FROM "_strav_jobs"
|
|
304
|
+
WHERE "id" = ${id}
|
|
305
|
+
LIMIT 1
|
|
306
|
+
`
|
|
307
|
+
if (rows.length === 0) return null
|
|
308
|
+
const row = rows[0] as Record<string, unknown>
|
|
309
|
+
return {
|
|
310
|
+
id: Number(row.id),
|
|
311
|
+
value: Number(row.progress ?? 0),
|
|
312
|
+
message: (row.progress_message as string | null) ?? null,
|
|
313
|
+
attempts: Number(row.attempts ?? 0),
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
|
|
171
317
|
/**
|
|
172
318
|
* Create a listener function suitable for Emitter.on().
|
|
173
319
|
* When the event fires, the payload is pushed onto the queue.
|
package/src/queue/worker.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import Queue, { hydrateJob } from './queue.ts'
|
|
2
2
|
import Emitter from '@strav/kernel/events/emitter'
|
|
3
|
+
import { checkBreaker, recordFailure, recordSuccess } from './circuit_breaker.ts'
|
|
3
4
|
import type { JobRecord, JobMeta } from './queue.ts'
|
|
4
5
|
|
|
5
6
|
export interface WorkerOptions {
|
|
@@ -109,26 +110,55 @@ export default class Worker {
|
|
|
109
110
|
|
|
110
111
|
/** Process a single job: run handler, handle success/failure. */
|
|
111
112
|
private async process(job: JobRecord): Promise<void> {
|
|
112
|
-
const
|
|
113
|
+
const registration = Queue.handlers.get(job.job)
|
|
113
114
|
|
|
114
|
-
if (!
|
|
115
|
+
if (!registration) {
|
|
115
116
|
await this.fail(job, new Error(`No handler registered for job "${job.job}"`))
|
|
116
117
|
return
|
|
117
118
|
}
|
|
118
119
|
|
|
120
|
+
// Q-1: per-handler circuit breaker. If the handler has tripped its
|
|
121
|
+
// breaker (too many failures in the configured window), defer this
|
|
122
|
+
// job rather than running it. Push it back to the queue with
|
|
123
|
+
// `available_at = now + cooldown` so it retries AFTER the breaker
|
|
124
|
+
// resets — this clears the worker to drain unrelated jobs from the
|
|
125
|
+
// queue instead of compounding the failure storm.
|
|
126
|
+
const cooldownRemaining = checkBreaker(job.job)
|
|
127
|
+
if (cooldownRemaining !== null) {
|
|
128
|
+
await this.deferForCooldown(job, cooldownRemaining)
|
|
129
|
+
return
|
|
130
|
+
}
|
|
131
|
+
|
|
119
132
|
const meta: JobMeta = {
|
|
120
133
|
id: job.id,
|
|
121
134
|
queue: job.queue,
|
|
122
135
|
job: job.job,
|
|
123
136
|
attempts: job.attempts,
|
|
124
137
|
maxAttempts: job.maxAttempts,
|
|
138
|
+
progress: (value: number, message?: string) => Queue.reportProgress(job.id, value, message),
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Re-parse the payload through the registered schema (CC-5). Catches
|
|
142
|
+
// payloads that drifted from the handler's expected shape — older
|
|
143
|
+
// enqueues, manual DB edits, malicious tampering. A parse failure
|
|
144
|
+
// is fatal: the job goes straight to failed_jobs without retry,
|
|
145
|
+
// because retrying with the same bad payload won't help.
|
|
146
|
+
let payload = job.payload
|
|
147
|
+
if (registration.schema) {
|
|
148
|
+
try {
|
|
149
|
+
payload = registration.schema.parse(job.payload)
|
|
150
|
+
} catch (err) {
|
|
151
|
+
const detail = err instanceof Error ? err.message : String(err)
|
|
152
|
+
await this.fail(job, new Error(`Job "${job.job}" payload failed validation: ${detail}`))
|
|
153
|
+
return
|
|
154
|
+
}
|
|
125
155
|
}
|
|
126
156
|
|
|
127
157
|
const start = performance.now()
|
|
128
158
|
|
|
129
159
|
try {
|
|
130
160
|
await Promise.race([
|
|
131
|
-
Promise.resolve(handler(
|
|
161
|
+
Promise.resolve(registration.handler(payload, meta)),
|
|
132
162
|
new Promise<never>((_, reject) =>
|
|
133
163
|
setTimeout(
|
|
134
164
|
() => reject(new Error(`Job "${job.job}" timed out after ${job.timeout}ms`)),
|
|
@@ -137,6 +167,7 @@ export default class Worker {
|
|
|
137
167
|
),
|
|
138
168
|
])
|
|
139
169
|
await this.complete(job)
|
|
170
|
+
recordSuccess(job.job)
|
|
140
171
|
|
|
141
172
|
if (Emitter.listenerCount('queue:processed') > 0) {
|
|
142
173
|
const duration = performance.now() - start
|
|
@@ -149,6 +180,9 @@ export default class Worker {
|
|
|
149
180
|
}
|
|
150
181
|
} catch (error) {
|
|
151
182
|
const err = error instanceof Error ? error : new Error(String(error))
|
|
183
|
+
// Update breaker state regardless of retry decision so a job that
|
|
184
|
+
// exhausts its retries still counts toward the trip threshold.
|
|
185
|
+
recordFailure(job.job)
|
|
152
186
|
if (job.attempts >= job.maxAttempts) {
|
|
153
187
|
await this.fail(job, err)
|
|
154
188
|
|
|
@@ -197,6 +231,24 @@ export default class Worker {
|
|
|
197
231
|
`
|
|
198
232
|
}
|
|
199
233
|
|
|
234
|
+
/**
|
|
235
|
+
* Push a tripped-circuit job back to the queue with `available_at`
|
|
236
|
+
* scheduled past the cooldown. Also rolls back the attempts counter
|
|
237
|
+
* the fetcher incremented so a circuit trip doesn't eat retry
|
|
238
|
+
* budget — the job genuinely never executed.
|
|
239
|
+
*/
|
|
240
|
+
private async deferForCooldown(job: JobRecord, cooldownMs: number): Promise<void> {
|
|
241
|
+
const availableAt = new Date(Date.now() + Math.max(cooldownMs, 1_000))
|
|
242
|
+
|
|
243
|
+
await Queue.db.sql`
|
|
244
|
+
UPDATE "_strav_jobs"
|
|
245
|
+
SET "reserved_at" = NULL,
|
|
246
|
+
"available_at" = ${availableAt},
|
|
247
|
+
"attempts" = GREATEST("attempts" - 1, 0)
|
|
248
|
+
WHERE "id" = ${job.id}
|
|
249
|
+
`
|
|
250
|
+
}
|
|
251
|
+
|
|
200
252
|
/** Calculate backoff delay in ms based on attempt number. */
|
|
201
253
|
backoffDelay(attempts: number): number {
|
|
202
254
|
if (Queue.config.retryBackoff === 'linear') {
|