@strav/queue 0.4.31 → 1.0.0-alpha.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +243 -0
- package/package.json +20 -29
- package/src/cron.ts +194 -0
- package/src/database_queue.ts +177 -0
- package/src/failed_jobs_schema.ts +37 -0
- package/src/index.ts +52 -3
- package/src/job.ts +153 -0
- package/src/job_registry.ts +135 -0
- package/src/job_schema.ts +49 -0
- package/src/queue.ts +69 -0
- package/src/scheduler.ts +242 -0
- package/src/scheduler_runs_schema.ts +33 -0
- package/src/sync_queue.ts +126 -0
- package/src/worker.ts +351 -0
- package/src/providers/index.ts +0 -3
- package/src/providers/queue_provider.ts +0 -29
- package/src/queue/circuit_breaker.ts +0 -135
- package/src/queue/index.ts +0 -22
- package/src/queue/queue.ts +0 -493
- package/src/queue/worker.ts +0 -273
- package/src/scheduler/cron.ts +0 -146
- package/src/scheduler/index.ts +0 -8
- package/src/scheduler/runner.ts +0 -116
- package/src/scheduler/schedule.ts +0 -292
- package/src/scheduler/scheduler.ts +0 -71
- package/tsconfig.json +0 -5
package/src/worker.ts
ADDED
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `Worker` — consumer side of `DatabaseQueue`.
|
|
3
|
+
*
|
|
4
|
+
* The poll loop:
|
|
5
|
+
* 1. Inside one transaction: `SELECT … FOR UPDATE SKIP LOCKED` claims
|
|
6
|
+
* one available row (`available_at <= now() AND reserved_at IS
|
|
7
|
+
* NULL`), then `UPDATE` increments `attempts` + sets
|
|
8
|
+
* `reserved_at = now()`. SKIP LOCKED lets multiple Worker
|
|
9
|
+
* instances poll the same queue concurrently without picking
|
|
10
|
+
* the same row.
|
|
11
|
+
* 2. The transaction COMMITs — the claim is durable. The row stays
|
|
12
|
+
* reserved until the result handling clears it.
|
|
13
|
+
* 3. The Worker constructs the Job via the container and runs
|
|
14
|
+
* `handle(ctx)` with a per-attempt timeout (driven by
|
|
15
|
+
* `AbortSignal.timeout(...)` — handlers that loop should check
|
|
16
|
+
* `ctx.signal.aborted`).
|
|
17
|
+
* 4. On success: DELETE the row.
|
|
18
|
+
* 5. On failure: if `attempts < max_attempts`, schedule a retry —
|
|
19
|
+
* `UPDATE` sets `available_at = now() + backoff` + clears
|
|
20
|
+
* `reserved_at`. Otherwise terminal — INSERT into
|
|
21
|
+
* `strav_failed_jobs` + DELETE from `strav_jobs`, both in a
|
|
22
|
+
* single transaction so the move is atomic. The `queue:retry` /
|
|
23
|
+
* `queue:flush` console commands that act on the failed-jobs
|
|
24
|
+
* table land with `@strav/cli` in M4.
|
|
25
|
+
*
|
|
26
|
+
* Backoff: default exponential with ±25% jitter, capped at 300s. Per-
|
|
27
|
+
* job override via `static backoff(attempt)`; per-Worker override
|
|
28
|
+
* via `defaultBackoff`.
|
|
29
|
+
*
|
|
30
|
+
* Graceful shutdown: callers pass an `AbortSignal` to `run()`. The
|
|
31
|
+
* loop checks `signal.aborted` between iterations + before the next
|
|
32
|
+
* sleep, so the worker exits cleanly within one poll-interval window
|
|
33
|
+
* of the abort.
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
import type { Database } from '@strav/database'
|
|
37
|
+
import { type Container, type Logger, ulid } from '@strav/kernel'
|
|
38
|
+
import type { JobContext, JobFailedContext } from './job.ts'
|
|
39
|
+
import type { JobRegistry } from './job_registry.ts'
|
|
40
|
+
|
|
41
|
+
export interface WorkerOptions {
|
|
42
|
+
/** Postgres pool used for claim + result handling. */
|
|
43
|
+
db: Database
|
|
44
|
+
/** Job registry — used to resolve `job_name` → `JobClass`. */
|
|
45
|
+
registry: JobRegistry
|
|
46
|
+
/** Container used to construct Job instances (resolves `@inject()` deps). */
|
|
47
|
+
container: Container
|
|
48
|
+
/** Worker logger — used for control-plane events (claim, retry, fail). Default: no-op. */
|
|
49
|
+
logger?: Logger
|
|
50
|
+
/** Queue names this Worker polls. Default `['default']`. */
|
|
51
|
+
queues?: readonly string[]
|
|
52
|
+
/** Milliseconds to sleep when a poll finds no available rows. Default 1000. */
|
|
53
|
+
pollInterval?: number
|
|
54
|
+
/** Per-attempt timeout (seconds) when the JobClass doesn't override it. Default 60. */
|
|
55
|
+
timeoutSeconds?: number
|
|
56
|
+
/** `max_attempts` fallback when neither the JobClass nor the row sets it. Default 3. */
|
|
57
|
+
defaultAttempts?: number
|
|
58
|
+
/** Backoff fallback when the JobClass doesn't override `backoff`. Default: exponential + jitter. */
|
|
59
|
+
defaultBackoff?: (attempt: number) => number
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/** Outcome of `processOne()` — useful for tests + one-shot runs. */
|
|
63
|
+
export type JobResult =
|
|
64
|
+
| { status: 'completed'; jobId: string; jobName: string; attempts: number }
|
|
65
|
+
| { status: 'retried'; jobId: string; jobName: string; attempts: number; nextAt: Date }
|
|
66
|
+
| { status: 'failed'; jobId: string; jobName: string; attempts: number; error: unknown }
|
|
67
|
+
|
|
68
|
+
/** Row shape pulled from `strav_jobs` during claim. */
|
|
69
|
+
interface JobRow {
|
|
70
|
+
id: string
|
|
71
|
+
queue: string
|
|
72
|
+
job_name: string
|
|
73
|
+
payload: unknown
|
|
74
|
+
attempts: number
|
|
75
|
+
max_attempts: number
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export class Worker {
|
|
79
|
+
private readonly db: Database
|
|
80
|
+
private readonly registry: JobRegistry
|
|
81
|
+
private readonly container: Container
|
|
82
|
+
private readonly logger: Logger
|
|
83
|
+
private readonly queues: readonly string[]
|
|
84
|
+
private readonly pollInterval: number
|
|
85
|
+
private readonly timeoutSeconds: number
|
|
86
|
+
private readonly defaultAttempts: number
|
|
87
|
+
private readonly defaultBackoff: (attempt: number) => number
|
|
88
|
+
|
|
89
|
+
constructor(opts: WorkerOptions) {
|
|
90
|
+
this.db = opts.db
|
|
91
|
+
this.registry = opts.registry
|
|
92
|
+
this.container = opts.container
|
|
93
|
+
this.logger = opts.logger ?? createNoopLogger()
|
|
94
|
+
this.queues = opts.queues ?? ['default']
|
|
95
|
+
this.pollInterval = opts.pollInterval ?? 1000
|
|
96
|
+
this.timeoutSeconds = opts.timeoutSeconds ?? 60
|
|
97
|
+
this.defaultAttempts = opts.defaultAttempts ?? 3
|
|
98
|
+
this.defaultBackoff = opts.defaultBackoff ?? exponentialBackoff
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Process one available job. Returns `null` when the queue has nothing
|
|
103
|
+
* to claim, otherwise a `JobResult` describing the outcome. Tests +
|
|
104
|
+
* one-shot CLI invocations use this directly; `run()` calls it in
|
|
105
|
+
* a loop.
|
|
106
|
+
*/
|
|
107
|
+
async processOne(): Promise<JobResult | null> {
|
|
108
|
+
const row = await this.claim()
|
|
109
|
+
if (!row) return null
|
|
110
|
+
|
|
111
|
+
const jobClass = this.registry.get(row.job_name)
|
|
112
|
+
if (!jobClass) {
|
|
113
|
+
// Unknown job_name → can't deserialize. Delete the row + log —
|
|
114
|
+
// leaving it would block the queue forever (every poll would
|
|
115
|
+
// re-claim + fail). Apps that need to recover unknown rows
|
|
116
|
+
// should snapshot the queue before changing job_names.
|
|
117
|
+
this.logger.error('Worker: unknown job_name, deleting row', {
|
|
118
|
+
jobId: row.id,
|
|
119
|
+
jobName: row.job_name,
|
|
120
|
+
})
|
|
121
|
+
await this.deleteRow(row.id)
|
|
122
|
+
return {
|
|
123
|
+
status: 'failed',
|
|
124
|
+
jobId: row.id,
|
|
125
|
+
jobName: row.job_name,
|
|
126
|
+
attempts: row.attempts,
|
|
127
|
+
error: new Error(`unknown job_name "${row.job_name}"`),
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const job = this.container.make(jobClass)
|
|
132
|
+
const timeoutMs = (jobClass.timeout ?? this.timeoutSeconds) * 1000
|
|
133
|
+
const signal = AbortSignal.timeout(timeoutMs)
|
|
134
|
+
|
|
135
|
+
const ctx: JobContext = {
|
|
136
|
+
jobId: row.id,
|
|
137
|
+
attempt: row.attempts,
|
|
138
|
+
payload: row.payload,
|
|
139
|
+
signal,
|
|
140
|
+
log: this.logger,
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
try {
|
|
144
|
+
await job.handle(ctx)
|
|
145
|
+
await this.deleteRow(row.id)
|
|
146
|
+
return {
|
|
147
|
+
status: 'completed',
|
|
148
|
+
jobId: row.id,
|
|
149
|
+
jobName: row.job_name,
|
|
150
|
+
attempts: row.attempts,
|
|
151
|
+
}
|
|
152
|
+
} catch (error) {
|
|
153
|
+
// Best-effort failed() hook — runs on every failed attempt
|
|
154
|
+
// (intermediate + terminal). A throw here is logged but doesn't
|
|
155
|
+
// change the retry decision; the hook is a notification, not a
|
|
156
|
+
// control point.
|
|
157
|
+
if (job.failed) {
|
|
158
|
+
const failedCtx: JobFailedContext = { ...ctx, error }
|
|
159
|
+
try {
|
|
160
|
+
await job.failed(failedCtx)
|
|
161
|
+
} catch (hookError) {
|
|
162
|
+
this.logger.error('Worker: failed() hook threw', {
|
|
163
|
+
jobId: row.id,
|
|
164
|
+
jobName: row.job_name,
|
|
165
|
+
error: hookError,
|
|
166
|
+
})
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const maxAttempts = jobClass.maxAttempts ?? row.max_attempts ?? this.defaultAttempts
|
|
171
|
+
if (row.attempts >= maxAttempts) {
|
|
172
|
+
// Terminal — atomically move the row to `strav_failed_jobs`
|
|
173
|
+
// so apps can triage what blew up. INSERT into the dead-letter
|
|
174
|
+
// table + DELETE from strav_jobs share one transaction so we
|
|
175
|
+
// can't end up with a row in both (or neither) on a Postgres
|
|
176
|
+
// wobble mid-move.
|
|
177
|
+
this.logger.error('Worker: job terminal failure', {
|
|
178
|
+
jobId: row.id,
|
|
179
|
+
jobName: row.job_name,
|
|
180
|
+
attempts: row.attempts,
|
|
181
|
+
})
|
|
182
|
+
await this.moveToFailed(row, error)
|
|
183
|
+
return {
|
|
184
|
+
status: 'failed',
|
|
185
|
+
jobId: row.id,
|
|
186
|
+
jobName: row.job_name,
|
|
187
|
+
attempts: row.attempts,
|
|
188
|
+
error,
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const backoff = jobClass.backoff ?? this.defaultBackoff
|
|
193
|
+
const delaySeconds = Math.max(0, backoff(row.attempts))
|
|
194
|
+
await this.scheduleRetry(row.id, delaySeconds)
|
|
195
|
+
this.logger.warn('Worker: job retry scheduled', {
|
|
196
|
+
jobId: row.id,
|
|
197
|
+
jobName: row.job_name,
|
|
198
|
+
attempts: row.attempts,
|
|
199
|
+
delaySeconds,
|
|
200
|
+
})
|
|
201
|
+
return {
|
|
202
|
+
status: 'retried',
|
|
203
|
+
jobId: row.id,
|
|
204
|
+
jobName: row.job_name,
|
|
205
|
+
attempts: row.attempts,
|
|
206
|
+
nextAt: new Date(Date.now() + delaySeconds * 1000),
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Run the poll loop until `signal` aborts. Each iteration calls
|
|
213
|
+
* `processOne()`; an empty poll triggers a sleep of `pollInterval`
|
|
214
|
+
* ms. The sleep is abort-aware — `signal.abort()` exits the loop
|
|
215
|
+
* within one tick rather than waiting out the full interval.
|
|
216
|
+
*/
|
|
217
|
+
async run(signal: AbortSignal): Promise<void> {
|
|
218
|
+
while (!signal.aborted) {
|
|
219
|
+
try {
|
|
220
|
+
const result = await this.processOne()
|
|
221
|
+
if (result === null) {
|
|
222
|
+
await sleep(this.pollInterval, signal)
|
|
223
|
+
}
|
|
224
|
+
} catch (loopError) {
|
|
225
|
+
// Polling itself failed (network blip, DB restart). Log + sleep
|
|
226
|
+
// before retrying — without the sleep, a persistent failure
|
|
227
|
+
// would burn CPU.
|
|
228
|
+
this.logger.error('Worker: poll iteration failed', { error: loopError })
|
|
229
|
+
await sleep(this.pollInterval, signal)
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Atomically claim one row: SELECT … FOR UPDATE SKIP LOCKED + UPDATE
|
|
236
|
+
* to mark reserved + increment attempts. Single transaction so the
|
|
237
|
+
* claim is durable + safe against concurrent Workers.
|
|
238
|
+
*/
|
|
239
|
+
private async claim(): Promise<JobRow | null> {
|
|
240
|
+
return this.db.transaction(async (tx) => {
|
|
241
|
+
const row = await tx.queryOne<JobRow>(
|
|
242
|
+
`SELECT id, queue, job_name, payload, attempts, max_attempts
|
|
243
|
+
FROM "strav_jobs"
|
|
244
|
+
WHERE queue = ANY($1::text[])
|
|
245
|
+
AND available_at <= now()
|
|
246
|
+
AND reserved_at IS NULL
|
|
247
|
+
ORDER BY id
|
|
248
|
+
LIMIT 1
|
|
249
|
+
FOR UPDATE SKIP LOCKED`,
|
|
250
|
+
[this.queues],
|
|
251
|
+
)
|
|
252
|
+
if (!row) return null
|
|
253
|
+
await tx.execute(
|
|
254
|
+
`UPDATE "strav_jobs"
|
|
255
|
+
SET reserved_at = now(), attempts = attempts + 1, updated_at = now()
|
|
256
|
+
WHERE id = $1`,
|
|
257
|
+
[row.id],
|
|
258
|
+
)
|
|
259
|
+
// Reflect the increment in the returned row so the caller's
|
|
260
|
+
// attempt counter matches what's in the DB.
|
|
261
|
+
return { ...row, attempts: Number(row.attempts) + 1 }
|
|
262
|
+
})
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
private async deleteRow(id: string): Promise<void> {
|
|
266
|
+
await this.db.execute(`DELETE FROM "strav_jobs" WHERE id = $1`, [id])
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* Atomically move a terminal-failure row to `strav_failed_jobs`.
|
|
271
|
+
* INSERT + DELETE in one transaction so we can't half-move on a
|
|
272
|
+
* Postgres wobble. The `exception` column stores
|
|
273
|
+
* `error.stack ?? String(error)` — full stack when available, the
|
|
274
|
+
* stringified value otherwise (some libraries throw plain strings).
|
|
275
|
+
*/
|
|
276
|
+
private async moveToFailed(row: JobRow, error: unknown): Promise<void> {
|
|
277
|
+
const exception =
|
|
278
|
+
error instanceof Error ? (error.stack ?? `${error.name}: ${error.message}`) : String(error)
|
|
279
|
+
await this.db.transaction(async (tx) => {
|
|
280
|
+
await tx.execute(
|
|
281
|
+
`INSERT INTO "strav_failed_jobs"
|
|
282
|
+
(id, queue, job_name, payload, exception, attempts, failed_at, created_at, updated_at)
|
|
283
|
+
VALUES ($1, $2, $3, $4::jsonb, $5, $6, now(), now(), now())`,
|
|
284
|
+
[ulid(), row.queue, row.job_name, JSON.stringify(row.payload), exception, row.attempts],
|
|
285
|
+
)
|
|
286
|
+
await tx.execute(`DELETE FROM "strav_jobs" WHERE id = $1`, [row.id])
|
|
287
|
+
})
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
private async scheduleRetry(id: string, delaySeconds: number): Promise<void> {
|
|
291
|
+
await this.db.execute(
|
|
292
|
+
`UPDATE "strav_jobs"
|
|
293
|
+
SET available_at = now() + interval '${delaySeconds} seconds',
|
|
294
|
+
reserved_at = NULL,
|
|
295
|
+
updated_at = now()
|
|
296
|
+
WHERE id = $1`,
|
|
297
|
+
[id],
|
|
298
|
+
)
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
/**
|
|
303
|
+
* Default backoff — exponential with ±25% jitter, capped at 5 minutes.
|
|
304
|
+
*
|
|
305
|
+
* attempt=1 → ~2s (base 2)
|
|
306
|
+
* attempt=2 → ~4s (base 4)
|
|
307
|
+
* attempt=3 → ~8s (base 8)
|
|
308
|
+
* attempt=4 → ~16s (base 16)
|
|
309
|
+
* attempt=5 → ~32s
|
|
310
|
+
* …
|
|
311
|
+
* attempt=9+ → ~300s (clamped)
|
|
312
|
+
*
|
|
313
|
+
* Jitter prevents thundering-herd retries when many jobs fail at
|
|
314
|
+
* the same time (e.g. a downstream service blip).
|
|
315
|
+
*/
|
|
316
|
+
function exponentialBackoff(attempt: number): number {
|
|
317
|
+
const base = Math.min(300, 2 ** attempt)
|
|
318
|
+
const jitter = (Math.random() * 2 - 1) * base * 0.25
|
|
319
|
+
return Math.max(1, Math.round(base + jitter))
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
/** Abort-aware sleep. Returns when either the timer fires or the signal aborts. */
|
|
323
|
+
function sleep(ms: number, signal: AbortSignal): Promise<void> {
|
|
324
|
+
return new Promise<void>((resolve) => {
|
|
325
|
+
if (signal.aborted) {
|
|
326
|
+
resolve()
|
|
327
|
+
return
|
|
328
|
+
}
|
|
329
|
+
const timer = setTimeout(resolve, ms)
|
|
330
|
+
const onAbort = () => {
|
|
331
|
+
clearTimeout(timer)
|
|
332
|
+
signal.removeEventListener('abort', onAbort)
|
|
333
|
+
resolve()
|
|
334
|
+
}
|
|
335
|
+
signal.addEventListener('abort', onAbort, { once: true })
|
|
336
|
+
})
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
/** No-op Logger — same shape as the one in DatabaseQueue / SyncQueue. */
|
|
340
|
+
function createNoopLogger(): Logger {
|
|
341
|
+
const noop = () => undefined
|
|
342
|
+
return {
|
|
343
|
+
debug: noop,
|
|
344
|
+
info: noop,
|
|
345
|
+
warn: noop,
|
|
346
|
+
error: noop,
|
|
347
|
+
fatal: noop,
|
|
348
|
+
trace: noop,
|
|
349
|
+
child: () => createNoopLogger(),
|
|
350
|
+
} as unknown as Logger
|
|
351
|
+
}
|
package/src/providers/index.ts
DELETED
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
import ServiceProvider from '@strav/kernel/core/service_provider'
|
|
2
|
-
import type Application from '@strav/kernel/core/application'
|
|
3
|
-
import Queue from '../queue/queue.ts'
|
|
4
|
-
|
|
5
|
-
export interface QueueProviderOptions {
|
|
6
|
-
/** Whether to auto-create the jobs tables. Default: `true` */
|
|
7
|
-
ensureTables?: boolean
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
export default class QueueProvider extends ServiceProvider {
|
|
11
|
-
readonly name = 'queue'
|
|
12
|
-
override readonly dependencies = ['database']
|
|
13
|
-
|
|
14
|
-
constructor(private options?: QueueProviderOptions) {
|
|
15
|
-
super()
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
override register(app: Application): void {
|
|
19
|
-
app.singleton(Queue)
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
override async boot(app: Application): Promise<void> {
|
|
23
|
-
app.resolve(Queue)
|
|
24
|
-
|
|
25
|
-
if (this.options?.ensureTables !== false) {
|
|
26
|
-
await Queue.ensureTables()
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
}
|
|
@@ -1,135 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Per-handler circuit breaker. Tracks recent failure timestamps in
|
|
3
|
-
* memory; trips the circuit when the failure count within the window
|
|
4
|
-
* exceeds the threshold and pauses dispatch of that handler for
|
|
5
|
-
* `cooldownMs`. Dispatches are auto-resumed once the cooldown expires.
|
|
6
|
-
*
|
|
7
|
-
* Intended defense against retry storms — a handler that consistently
|
|
8
|
-
* fails (stale schema, downed dependency) shouldn't keep eating worker
|
|
9
|
-
* cycles and DB connections. Tripping pushes failed jobs back to the
|
|
10
|
-
* queue with a delay so they retry AFTER the cooldown.
|
|
11
|
-
*
|
|
12
|
-
* State is per-process (in-memory). Multi-worker deployments will each
|
|
13
|
-
* track independently — that's fine; each worker self-pauses without
|
|
14
|
-
* cross-talk, and a handler that's failing for a global reason will
|
|
15
|
-
* trip every worker quickly.
|
|
16
|
-
*/
|
|
17
|
-
|
|
18
|
-
import Emitter from '@strav/kernel/events/emitter'
|
|
19
|
-
|
|
20
|
-
export interface CircuitBreakerOptions {
|
|
21
|
-
/** Number of failures within the window that trips the breaker. Default: 10. */
|
|
22
|
-
threshold?: number
|
|
23
|
-
/** Window in ms over which failures are counted. Default: 60_000 (1 min). */
|
|
24
|
-
windowMs?: number
|
|
25
|
-
/** Cooldown in ms after tripping before retry resumes. Default: 30_000 (30 s). */
|
|
26
|
-
cooldownMs?: number
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
export interface ResolvedBreakerOptions {
|
|
30
|
-
threshold: number
|
|
31
|
-
windowMs: number
|
|
32
|
-
cooldownMs: number
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
interface BreakerState {
|
|
36
|
-
options: ResolvedBreakerOptions
|
|
37
|
-
failures: number[] // unix-ms timestamps, recent-first not enforced
|
|
38
|
-
trippedUntil: number | null // unix-ms; null when closed
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
const DEFAULTS: ResolvedBreakerOptions = {
|
|
42
|
-
threshold: 10,
|
|
43
|
-
windowMs: 60_000,
|
|
44
|
-
cooldownMs: 30_000,
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
const breakers = new Map<string, BreakerState>()
|
|
48
|
-
|
|
49
|
-
/** Register / update a breaker for a handler. */
|
|
50
|
-
export function configureBreaker(handlerName: string, options: CircuitBreakerOptions): void {
|
|
51
|
-
breakers.set(handlerName, {
|
|
52
|
-
options: { ...DEFAULTS, ...options },
|
|
53
|
-
failures: [],
|
|
54
|
-
trippedUntil: null,
|
|
55
|
-
})
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
/** Forget all breaker state. Test-only. */
|
|
59
|
-
export function resetBreakers(): void {
|
|
60
|
-
breakers.clear()
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
/**
|
|
64
|
-
* Check if a handler is currently tripped. Returns the remaining
|
|
65
|
-
* cooldown in ms (>= 0) when tripped, or `null` when the circuit is
|
|
66
|
-
* closed (handler is dispatchable). Auto-resets state when the
|
|
67
|
-
* cooldown has elapsed and emits `queue:circuit_reset` once on
|
|
68
|
-
* transition.
|
|
69
|
-
*/
|
|
70
|
-
export function checkBreaker(handlerName: string, now: number = Date.now()): number | null {
|
|
71
|
-
const state = breakers.get(handlerName)
|
|
72
|
-
if (!state) return null
|
|
73
|
-
if (state.trippedUntil === null) return null
|
|
74
|
-
|
|
75
|
-
if (now >= state.trippedUntil) {
|
|
76
|
-
// Cooldown expired — close the circuit. Reset failure history so
|
|
77
|
-
// the next set of failures starts a fresh window.
|
|
78
|
-
state.trippedUntil = null
|
|
79
|
-
state.failures = []
|
|
80
|
-
if (Emitter.listenerCount('queue:circuit_reset') > 0) {
|
|
81
|
-
void Emitter.emit('queue:circuit_reset', { handler: handlerName }).catch(() => {})
|
|
82
|
-
}
|
|
83
|
-
return null
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
return state.trippedUntil - now
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
/**
|
|
90
|
-
* Record a failure for a handler. Trips the circuit when the failure
|
|
91
|
-
* count within `windowMs` reaches `threshold`. Returns the new cooldown
|
|
92
|
-
* (ms) when tripping, or `null` when the threshold is not yet reached.
|
|
93
|
-
*/
|
|
94
|
-
export function recordFailure(handlerName: string, now: number = Date.now()): number | null {
|
|
95
|
-
const state = breakers.get(handlerName)
|
|
96
|
-
if (!state) return null
|
|
97
|
-
|
|
98
|
-
// Drop failures outside the window then push the new one.
|
|
99
|
-
const cutoff = now - state.options.windowMs
|
|
100
|
-
state.failures = state.failures.filter(t => t > cutoff)
|
|
101
|
-
state.failures.push(now)
|
|
102
|
-
|
|
103
|
-
if (state.trippedUntil !== null) {
|
|
104
|
-
// Already tripped — do nothing.
|
|
105
|
-
return state.trippedUntil - now
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
if (state.failures.length >= state.options.threshold) {
|
|
109
|
-
state.trippedUntil = now + state.options.cooldownMs
|
|
110
|
-
if (Emitter.listenerCount('queue:circuit_tripped') > 0) {
|
|
111
|
-
void Emitter.emit('queue:circuit_tripped', {
|
|
112
|
-
handler: handlerName,
|
|
113
|
-
threshold: state.options.threshold,
|
|
114
|
-
windowMs: state.options.windowMs,
|
|
115
|
-
cooldownMs: state.options.cooldownMs,
|
|
116
|
-
trippedUntil: state.trippedUntil,
|
|
117
|
-
}).catch(() => {})
|
|
118
|
-
}
|
|
119
|
-
return state.options.cooldownMs
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
return null
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
/**
|
|
126
|
-
* Record a success — clears the failure history for this handler so
|
|
127
|
-
* intermittent errors don't accumulate. Does NOT close a tripped
|
|
128
|
-
* circuit (only the cooldown expiry does).
|
|
129
|
-
*/
|
|
130
|
-
export function recordSuccess(handlerName: string): void {
|
|
131
|
-
const state = breakers.get(handlerName)
|
|
132
|
-
if (!state) return
|
|
133
|
-
if (state.trippedUntil !== null) return
|
|
134
|
-
state.failures = []
|
|
135
|
-
}
|
package/src/queue/index.ts
DELETED
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
export { default as Queue } from './queue.ts'
|
|
2
|
-
export { default as Worker } from './worker.ts'
|
|
3
|
-
export type {
|
|
4
|
-
JobOptions,
|
|
5
|
-
QueueConfig,
|
|
6
|
-
JobMeta,
|
|
7
|
-
JobRecord,
|
|
8
|
-
FailedJobRecord,
|
|
9
|
-
JobHandler,
|
|
10
|
-
JobHandlerOptions,
|
|
11
|
-
JobHandlerRegistration,
|
|
12
|
-
JobPayloadSchema,
|
|
13
|
-
} from './queue.ts'
|
|
14
|
-
export type { WorkerOptions } from './worker.ts'
|
|
15
|
-
export {
|
|
16
|
-
configureBreaker,
|
|
17
|
-
checkBreaker,
|
|
18
|
-
recordFailure,
|
|
19
|
-
recordSuccess,
|
|
20
|
-
resetBreakers,
|
|
21
|
-
} from './circuit_breaker.ts'
|
|
22
|
-
export type { CircuitBreakerOptions, ResolvedBreakerOptions } from './circuit_breaker.ts'
|