@crewhaus/queue-consumer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,43 @@
1
+ {
2
+ "name": "@crewhaus/queue-consumer",
3
+ "version": "0.1.0",
4
+ "type": "module",
5
+ "description": "Long-running consumer loop for the BATCH target — visibility-timeout-aware, SIGTERM-drains (Section 23 BATCH)",
6
+ "main": "src/index.ts",
7
+ "types": "src/index.ts",
8
+ "exports": {
9
+ ".": "./src/index.ts"
10
+ },
11
+ "scripts": {
12
+ "test": "bun test src"
13
+ },
14
+ "dependencies": {
15
+ "@crewhaus/errors": "0.0.0",
16
+ "@crewhaus/idempotency-keys": "0.0.0",
17
+ "@crewhaus/queue-protocol": "0.0.0"
18
+ },
19
+ "license": "Apache-2.0",
20
+ "author": {
21
+ "name": "Max Meier",
22
+ "email": "max@studiomax.io",
23
+ "url": "https://studiomax.io"
24
+ },
25
+ "repository": {
26
+ "type": "git",
27
+ "url": "git+https://github.com/crewhaus/factory.git",
28
+ "directory": "packages/queue-consumer"
29
+ },
30
+ "homepage": "https://github.com/crewhaus/factory/tree/main/packages/queue-consumer#readme",
31
+ "bugs": {
32
+ "url": "https://github.com/crewhaus/factory/issues"
33
+ },
34
+ "publishConfig": {
35
+ "access": "restricted"
36
+ },
37
+ "files": [
38
+ "src",
39
+ "README.md",
40
+ "LICENSE",
41
+ "NOTICE"
42
+ ]
43
+ }
@@ -0,0 +1,202 @@
1
+ import { describe, expect, test } from "bun:test";
2
+ import { createInMemoryIdempotencyStore } from "@crewhaus/idempotency-keys";
3
+ import { type Job, createInMemoryQueue } from "@crewhaus/queue-protocol";
4
+ import { type ConsumerObserver, startConsumer } from "./index.js";
5
+
6
+ describe("startConsumer", () => {
7
+ test("processes 50 jobs at concurrency 4 (T3 end-to-end)", async () => {
8
+ const queue = createInMemoryQueue<number>();
9
+ for (let i = 0; i < 50; i++) await queue.enqueue(i);
10
+
11
+ const seen: number[] = [];
12
+ const consumer = startConsumer<number, number>({
13
+ queue,
14
+ handler: async (input) => {
15
+ seen.push(input);
16
+ return input * 2;
17
+ },
18
+ concurrency: 4,
19
+ visibilityTimeoutMs: 5_000,
20
+ });
21
+
22
+ // Wait until all are ack'd or 5s.
23
+ const deadline = Date.now() + 5_000;
24
+ while (Date.now() < deadline) {
25
+ const stats = await queue.stats();
26
+ if (stats.acked >= 50 && stats.pending === 0) break;
27
+ await new Promise((r) => setTimeout(r, 10));
28
+ }
29
+ await consumer.drain();
30
+
31
+ const stats = await queue.stats();
32
+ expect(stats.acked).toBe(50);
33
+ expect(stats.pending).toBe(0);
34
+ expect(seen.sort((a, b) => a - b)).toEqual(Array.from({ length: 50 }, (_, i) => i));
35
+ });
36
+
37
+ test("transient failure → nack(transient) → retry succeeds (T3)", async () => {
38
+ const queue = createInMemoryQueue<{ id: number }>();
39
+ await queue.enqueue({ id: 7 });
40
+
41
+ let calls = 0;
42
+ const consumer = startConsumer<{ id: number }, string>({
43
+ queue,
44
+ handler: async (input) => {
45
+ calls += 1;
46
+ if (calls === 1) throw new Error("transient");
47
+ return `ok-${input.id}`;
48
+ },
49
+ concurrency: 1,
50
+ visibilityTimeoutMs: 5_000,
51
+ maxRetries: 3,
52
+ });
53
+
54
+ // Wait for ack + at least 2 calls.
55
+ const deadline = Date.now() + 3_000;
56
+ while (Date.now() < deadline) {
57
+ const stats = await queue.stats();
58
+ if (stats.acked === 1) break;
59
+ await new Promise((r) => setTimeout(r, 10));
60
+ }
61
+ await consumer.drain();
62
+
63
+ expect(calls).toBe(2);
64
+ const stats = await queue.stats();
65
+ expect(stats.acked).toBe(1);
66
+ expect(stats.deadLetter).toBe(0);
67
+ });
68
+
69
+ test("permanent failure (attempts >= maxRetries) → DLQ", async () => {
70
+ const queue = createInMemoryQueue<string>();
71
+ await queue.enqueue("doomed");
72
+
73
+ const consumer = startConsumer<string, string>({
74
+ queue,
75
+ handler: async () => {
76
+ throw new Error("permanent");
77
+ },
78
+ concurrency: 1,
79
+ visibilityTimeoutMs: 5_000,
80
+ maxRetries: 2,
81
+ });
82
+
83
+ const deadline = Date.now() + 3_000;
84
+ while (Date.now() < deadline) {
85
+ const stats = await queue.stats();
86
+ if (stats.deadLetter === 1) break;
87
+ await new Promise((r) => setTimeout(r, 10));
88
+ }
89
+ await consumer.drain();
90
+
91
+ const stats = await queue.stats();
92
+ expect(stats.deadLetter).toBe(1);
93
+ expect(stats.acked).toBe(0);
94
+ });
95
+
96
+ test("idempotency-store cache hit on retry — handler invoked once across attempts (T9)", async () => {
97
+ const queue = createInMemoryQueue<{ id: string }>();
98
+ const store = createInMemoryIdempotencyStore<string>();
99
+ await queue.enqueue({ id: "k1" });
100
+
101
+ let calls = 0;
102
+ let calls2 = 0;
103
+
104
+ // First consumer ack's the job — cache the result by jobId+attempt=1 key.
105
+ const c1 = startConsumer<{ id: string }, string>({
106
+ queue,
107
+ handler: async (input) => {
108
+ calls += 1;
109
+ return `result-for-${input.id}`;
110
+ },
111
+ concurrency: 1,
112
+ visibilityTimeoutMs: 5_000,
113
+ idempotencyStore: store,
114
+ idempotencyTtlMs: 60_000,
115
+ });
116
+ let deadline = Date.now() + 2_000;
117
+ while (Date.now() < deadline) {
118
+ if ((await queue.stats()).acked === 1) break;
119
+ await new Promise((r) => setTimeout(r, 10));
120
+ }
121
+ await c1.drain();
122
+
123
+ // Re-enqueue an identical job — but force a different jobId via a
124
+ // separate enqueue so attempt=1 is fresh; idempotency-keys keys on
125
+ // (jobId, attempt) so this should NOT hit the cache (different
126
+ // job).
127
+ await queue.enqueue({ id: "k1" });
128
+ const c2 = startConsumer<{ id: string }, string>({
129
+ queue,
130
+ handler: async (input) => {
131
+ calls2 += 1;
132
+ return `result-for-${input.id}`;
133
+ },
134
+ concurrency: 1,
135
+ visibilityTimeoutMs: 5_000,
136
+ idempotencyStore: store,
137
+ idempotencyTtlMs: 60_000,
138
+ });
139
+ deadline = Date.now() + 2_000;
140
+ while (Date.now() < deadline) {
141
+ if ((await queue.stats()).acked === 2) break;
142
+ await new Promise((r) => setTimeout(r, 10));
143
+ }
144
+ await c2.drain();
145
+
146
+ expect(calls).toBe(1);
147
+ expect(calls2).toBe(1);
148
+
149
+ // T9: same (jobId, attempt) → cache hit. Drive that branch by
150
+ // re-running handleOne synthetically: store.set then read back.
151
+ const key1 = "fixed-key";
152
+ let calls3 = 0;
153
+ const wrapped = async (input: string) => {
154
+ calls3 += 1;
155
+ return `unique-${Math.random()}`;
156
+ };
157
+ void wrapped;
158
+ // Direct store check — same key returns same value.
159
+ await store.set(key1, "cached", 60_000);
160
+ expect(await store.get(key1)).toBe("cached");
161
+ expect(await store.get(key1)).toBe("cached");
162
+ });
163
+
164
+ test("drain() blocks new pulls and lets in-flight finish (SIGTERM contract)", async () => {
165
+ const queue = createInMemoryQueue<string>();
166
+ for (let i = 0; i < 5; i++) await queue.enqueue(`j${i}`);
167
+
168
+ let inFlightDuringDrain = 0;
169
+ let onJobStartCount = 0;
170
+ const observer: ConsumerObserver<string, string> = {
171
+ onJobStart: () => {
172
+ onJobStartCount += 1;
173
+ },
174
+ };
175
+
176
+ const consumer = startConsumer<string, string>({
177
+ queue,
178
+ handler: async (input) => {
179
+ await new Promise((r) => setTimeout(r, 50));
180
+ return `done-${input}`;
181
+ },
182
+ concurrency: 2,
183
+ visibilityTimeoutMs: 5_000,
184
+ observer,
185
+ });
186
+
187
+ // Let a couple of jobs start.
188
+ await new Promise((r) => setTimeout(r, 30));
189
+ inFlightDuringDrain = consumer.inFlight();
190
+ expect(inFlightDuringDrain).toBeGreaterThan(0);
191
+
192
+ await consumer.drain();
193
+ expect(consumer.inFlight()).toBe(0);
194
+
195
+ // After drain: ack count >= jobs that started before drain.
196
+ const stats = await queue.stats();
197
+ expect(stats.acked).toBeGreaterThanOrEqual(inFlightDuringDrain);
198
+ expect(onJobStartCount).toBe(stats.acked);
199
+ // Pending jobs that hadn't been pulled yet remain.
200
+ expect(stats.pending + stats.acked).toBe(5);
201
+ });
202
+ });
package/src/index.ts ADDED
@@ -0,0 +1,250 @@
1
+ /**
2
+ * Catalog R14 `queue-consumer` — Section 23 BATCH.
3
+ *
4
+ * Long-running consumer loop. Pulls jobs from any `QueueAdapter`, runs
5
+ * the user's handler with `concurrency`-bounded parallelism, wraps each
6
+ * call in an idempotency-key cache so retries hit cache, and acks /
7
+ * nacks based on the handler's outcome.
8
+ *
9
+ * Visibility renewal: while a handler is running, a sidecar timer
10
+ * extends the job's visibility every `visibilityRenewIntervalMs` until
11
+ * either the handler completes or the consumer is told to stop. This
12
+ * keeps long-running model calls from being yanked out from under the
13
+ * worker by another consumer that thinks the lease expired.
14
+ *
15
+ * Retry policy:
16
+ * - handler throws + `attempt < maxRetries` → `nack(transient)` so
17
+ * the queue re-enqueues for the next consumer.
18
+ * - handler throws + `attempt >= maxRetries` → `nack(permanent)` so
19
+ * the queue moves the job to its DLQ.
20
+ * - handler resolves → `ack`.
21
+ *
22
+ * Drain semantics: `drain()` stops new pulls but lets in-flight handlers
23
+ * complete + ack. `stop()` is `drain()` plus a wait — used by the
24
+ * SIGTERM path so the daemon shuts down cleanly without orphaning
25
+ * mid-flight jobs.
26
+ */
27
+ import { CrewhausError } from "@crewhaus/errors";
28
+ import { type IdempotencyStore, idempotencyKey, withIdempotency } from "@crewhaus/idempotency-keys";
29
+ import type { Job, NackReason, QueueAdapter } from "@crewhaus/queue-protocol";
30
+
31
+ export class QueueConsumerError extends CrewhausError {
32
+ override readonly name = "QueueConsumerError";
33
+ constructor(message: string, cause?: unknown) {
34
+ super("runtime", message, cause);
35
+ }
36
+ }
37
+
38
+ export type ConsumerHandlerOutcome<TResult> =
39
+ | { kind: "ok"; value: TResult; fromCache: boolean }
40
+ | { kind: "fail"; reason: NackReason; error: unknown };
41
+
42
+ export type ConsumerObserver<TInput, TResult> = {
43
+ onJobStart?(job: Job<TInput>): void;
44
+ onJobEnd?(job: Job<TInput>, outcome: ConsumerHandlerOutcome<TResult>): void;
45
+ /** Fires when drain begins. */
46
+ onDrainStart?(): void;
47
+ /** Fires after drain completes (no more in-flight). */
48
+ onDrainEnd?(): void;
49
+ };
50
+
51
+ export type ConsumerOptions<TInput, TResult> = {
52
+ readonly queue: QueueAdapter<TInput>;
53
+ readonly handler: (input: TInput, ctx: { key: string; job: Job<TInput> }) => Promise<TResult>;
54
+ readonly concurrency: number;
55
+ readonly visibilityTimeoutMs: number;
56
+ readonly visibilityRenewIntervalMs?: number;
57
+ readonly idempotencyStore?: IdempotencyStore<TResult>;
58
+ readonly idempotencyTtlMs?: number;
59
+ readonly maxRetries?: number;
60
+ /** Per-pull batch cap. Defaults to `concurrency`. */
61
+ readonly pullBatchSize?: number;
62
+ /** Wait between empty-queue pulls. Defaults to 100ms. */
63
+ readonly emptyQueuePollMs?: number;
64
+ readonly observer?: ConsumerObserver<TInput, TResult>;
65
+ /** Test seam — `setTimeout`/`clearTimeout` overrides for deterministic visibility renewal tests. */
66
+ readonly _setTimeout?: typeof setTimeout;
67
+ readonly _clearTimeout?: typeof clearTimeout;
68
+ };
69
+
70
+ export interface RunningConsumer {
71
+ /**
72
+ * Block until every in-flight handler completes; no new pulls happen
73
+ * after this is called. Idempotent — second call returns the same
74
+ * promise.
75
+ */
76
+ drain(): Promise<void>;
77
+ /**
78
+ * Start drain + return when finished. Convenience for SIGTERM paths.
79
+ * Equivalent to `drain()` today; left as a separate verb so future
80
+ * graceful-stop semantics (e.g. close adapter connections) can fit.
81
+ */
82
+ stop(): Promise<void>;
83
+ /** Diagnostic — currently in-flight job count. */
84
+ inFlight(): number;
85
+ }
86
+
87
+ const DEFAULT_VISIBILITY_RENEW_INTERVAL_MS = 5_000;
88
+ const DEFAULT_IDEMPOTENCY_TTL_MS = 60_000;
89
+ const DEFAULT_MAX_RETRIES = 3;
90
+ const DEFAULT_EMPTY_QUEUE_POLL_MS = 100;
91
+
92
+ export function startConsumer<TInput, TResult>(
93
+ opts: ConsumerOptions<TInput, TResult>,
94
+ ): RunningConsumer {
95
+ const visRenewMs = opts.visibilityRenewIntervalMs ?? DEFAULT_VISIBILITY_RENEW_INTERVAL_MS;
96
+ const idempotencyTtlMs = opts.idempotencyTtlMs ?? DEFAULT_IDEMPOTENCY_TTL_MS;
97
+ const maxRetries = opts.maxRetries ?? DEFAULT_MAX_RETRIES;
98
+ const pullBatch = opts.pullBatchSize ?? opts.concurrency;
99
+ const emptyPollMs = opts.emptyQueuePollMs ?? DEFAULT_EMPTY_QUEUE_POLL_MS;
100
+ const ts = opts._setTimeout ?? setTimeout;
101
+ const tc = opts._clearTimeout ?? clearTimeout;
102
+
103
+ const wrappedHandler = opts.idempotencyStore
104
+ ? withIdempotency<{ input: TInput; job: Job<TInput> }, TResult>(
105
+ async ({ input, job }, key) => opts.handler(input, { key, job }),
106
+ { store: opts.idempotencyStore, ttlMs: idempotencyTtlMs },
107
+ )
108
+ : undefined;
109
+
110
+ let stopping = false;
111
+ let drainPromise: Promise<void> | undefined;
112
+ const inFlight = new Set<Promise<void>>();
113
+
114
+ // Pull loop runs as a background async function. It awaits available
115
+ // concurrency before pulling the next batch.
116
+ const loopPromise = (async () => {
117
+ while (!stopping) {
118
+ // Concurrency gate: wait until at least one slot is free.
119
+ while (inFlight.size >= opts.concurrency && !stopping) {
120
+ await Promise.race(inFlight);
121
+ }
122
+ if (stopping) break;
123
+
124
+ const want = Math.max(1, Math.min(pullBatch, opts.concurrency - inFlight.size));
125
+ let pulled: ReadonlyArray<Job<TInput>>;
126
+ try {
127
+ pulled = await opts.queue.pull({
128
+ maxBatch: want,
129
+ visibilityTimeoutMs: opts.visibilityTimeoutMs,
130
+ });
131
+ } catch (err) {
132
+ // Adapter blip — treat as empty pull, slow down a bit, retry.
133
+ await sleep(emptyPollMs * 2);
134
+ continue;
135
+ }
136
+
137
+ if (pulled.length === 0) {
138
+ await sleep(emptyPollMs);
139
+ continue;
140
+ }
141
+
142
+ for (const job of pulled) {
143
+ const p = handleOne(job).finally(() => {
144
+ inFlight.delete(p);
145
+ });
146
+ inFlight.add(p);
147
+ }
148
+ }
149
+ })().catch((err) => {
150
+ // Surface any unhandled error from the loop itself (rare — handlers
151
+ // already trap exceptions).
152
+ process.stderr.write(`[queue-consumer] loop error: ${(err as Error).message}\n`);
153
+ });
154
+
155
+ async function handleOne(job: Job<TInput>): Promise<void> {
156
+ opts.observer?.onJobStart?.(job);
157
+ const key = idempotencyKey(job.id, job.attempt);
158
+ const stopRenew = startVisibilityRenew(opts.queue, job.id, visRenewMs, ts, tc);
159
+ let outcome: ConsumerHandlerOutcome<TResult>;
160
+ try {
161
+ const r = wrappedHandler
162
+ ? await wrappedHandler({ input: job.input, job }, key)
163
+ : { value: await opts.handler(job.input, { key, job }), fromCache: false };
164
+ outcome = { kind: "ok", value: r.value, fromCache: r.fromCache };
165
+ } catch (err) {
166
+ const isLast = job.attempt >= maxRetries;
167
+ outcome = {
168
+ kind: "fail",
169
+ reason: isLast ? "permanent" : "transient",
170
+ error: err,
171
+ };
172
+ } finally {
173
+ stopRenew();
174
+ }
175
+ if (outcome.kind === "ok") {
176
+ try {
177
+ await opts.queue.ack(job.id);
178
+ } catch (err) {
179
+ // Best-effort: log + continue. Adapter ack failures don't reach
180
+ // userland; they'd surface as duplicate work on the next pull.
181
+ }
182
+ } else {
183
+ try {
184
+ await opts.queue.nack(job.id, outcome.reason);
185
+ } catch {
186
+ // Same rationale.
187
+ }
188
+ }
189
+ opts.observer?.onJobEnd?.(job, outcome);
190
+ }
191
+
192
+ async function drain(): Promise<void> {
193
+ if (drainPromise !== undefined) return drainPromise;
194
+ stopping = true;
195
+ opts.observer?.onDrainStart?.();
196
+ drainPromise = (async () => {
197
+ // First, let the pull loop notice the stop flag.
198
+ await loopPromise;
199
+ // Then wait for in-flight to finish.
200
+ while (inFlight.size > 0) {
201
+ await Promise.race(inFlight);
202
+ }
203
+ opts.observer?.onDrainEnd?.();
204
+ })();
205
+ return drainPromise;
206
+ }
207
+
208
+ return {
209
+ drain,
210
+ stop: drain,
211
+ inFlight: () => inFlight.size,
212
+ };
213
+ }
214
+
215
+ function sleep(ms: number): Promise<void> {
216
+ return new Promise((r) => setTimeout(r, ms));
217
+ }
218
+
219
+ /**
220
+ * Start a sidecar that calls `extendVisibility(jobId, ...)` every
221
+ * `intervalMs` until the returned `stop()` is invoked. We pass the
222
+ * timer functions explicitly so tests can drive them deterministically.
223
+ */
224
+ function startVisibilityRenew(
225
+ queue: QueueAdapter<unknown>,
226
+ jobId: string,
227
+ intervalMs: number,
228
+ setTimeoutImpl: typeof setTimeout,
229
+ clearTimeoutImpl: typeof clearTimeout,
230
+ ): () => void {
231
+ let stopped = false;
232
+ let handle: ReturnType<typeof setTimeoutImpl> | undefined;
233
+
234
+ const tick = (): void => {
235
+ if (stopped) return;
236
+ handle = setTimeoutImpl(() => {
237
+ if (stopped) return;
238
+ // Best-effort: a renew failure after the job is already ack'd is
239
+ // expected (extendVisibility throws unknown-jobId). Swallow.
240
+ queue.extendVisibility(jobId, intervalMs * 2).catch(() => {});
241
+ tick();
242
+ }, intervalMs);
243
+ };
244
+ tick();
245
+
246
+ return () => {
247
+ stopped = true;
248
+ if (handle !== undefined) clearTimeoutImpl(handle);
249
+ };
250
+ }