@pattern-stack/codegen 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/CHANGELOG.md +14 -0
  2. package/dist/src/cli/index.js +1616 -1070
  3. package/dist/src/cli/index.js.map +1 -1
  4. package/package.json +3 -1
  5. package/runtime/analytics/index.ts +31 -0
  6. package/runtime/analytics/metrics.ts +85 -0
  7. package/runtime/analytics/packs/crm-entity-measures.ts +20 -0
  8. package/runtime/analytics/packs/index.ts +5 -0
  9. package/runtime/analytics/packs/monetary-measures.ts +20 -0
  10. package/runtime/analytics/specs.ts +54 -0
  11. package/runtime/analytics/types.ts +105 -0
  12. package/runtime/base-classes/activity-entity-repository.ts +50 -0
  13. package/runtime/base-classes/activity-entity-service.ts +48 -0
  14. package/runtime/base-classes/base-read-use-cases.ts +88 -0
  15. package/runtime/base-classes/base-repository.ts +289 -0
  16. package/runtime/base-classes/base-service.ts +183 -0
  17. package/runtime/base-classes/index.ts +38 -0
  18. package/runtime/base-classes/knowledge-entity-repository.ts +12 -0
  19. package/runtime/base-classes/knowledge-entity-service.ts +14 -0
  20. package/runtime/base-classes/lifecycle-events.ts +152 -0
  21. package/runtime/base-classes/metadata-entity-repository.ts +80 -0
  22. package/runtime/base-classes/metadata-entity-service.ts +48 -0
  23. package/runtime/base-classes/synced-entity-repository.ts +57 -0
  24. package/runtime/base-classes/synced-entity-service.ts +50 -0
  25. package/runtime/base-classes/with-analytics.ts +22 -0
  26. package/runtime/constants/tokens.ts +29 -0
  27. package/runtime/eav-helpers.ts +74 -0
  28. package/runtime/pipes/zod-validation.pipe.ts +64 -0
  29. package/runtime/shared/openapi/error-response.dto.ts +24 -0
  30. package/runtime/shared/openapi/errors.ts +39 -0
  31. package/runtime/shared/openapi/index.ts +20 -0
  32. package/runtime/shared/openapi/registry.tokens.ts +13 -0
  33. package/runtime/shared/openapi/registry.ts +151 -0
  34. package/runtime/subsystems/analytics/analytics-query.protocol.ts +37 -0
  35. package/runtime/subsystems/analytics/analytics.module.ts +64 -0
  36. package/runtime/subsystems/analytics/analytics.tokens.ts +24 -0
  37. package/runtime/subsystems/analytics/cube-backend.ts +75 -0
  38. package/runtime/subsystems/analytics/index.ts +15 -0
  39. package/runtime/subsystems/analytics/noop-backend.ts +27 -0
  40. package/runtime/subsystems/auth/auth.module.ts +91 -0
  41. package/runtime/subsystems/auth/auth.tokens.ts +27 -0
  42. package/runtime/subsystems/auth/backends/encryption-key/env.ts +76 -0
  43. package/runtime/subsystems/auth/backends/oauth-state-store/in-memory.ts +42 -0
  44. package/runtime/subsystems/auth/index.ts +77 -0
  45. package/runtime/subsystems/auth/protocols/auth-strategy.ts +46 -0
  46. package/runtime/subsystems/auth/protocols/encryption-key.ts +21 -0
  47. package/runtime/subsystems/auth/protocols/integration-store.ts +66 -0
  48. package/runtime/subsystems/auth/protocols/oauth-state-store.ts +16 -0
  49. package/runtime/subsystems/auth/runtime/integration-broken.error.ts +21 -0
  50. package/runtime/subsystems/auth/runtime/oauth2-refresh.strategy.ts +189 -0
  51. package/runtime/subsystems/auth/runtime/session-expired.error.ts +39 -0
  52. package/runtime/subsystems/auth/runtime/with-auth-retry.ts +50 -0
  53. package/runtime/subsystems/bridge/assert-tenant-id.ts +57 -0
  54. package/runtime/subsystems/bridge/bridge-delivery-handler.ts +220 -0
  55. package/runtime/subsystems/bridge/bridge-delivery.drizzle-backend.ts +149 -0
  56. package/runtime/subsystems/bridge/bridge-delivery.memory-backend.ts +140 -0
  57. package/runtime/subsystems/bridge/bridge-delivery.schema.ts +142 -0
  58. package/runtime/subsystems/bridge/bridge-errors.ts +112 -0
  59. package/runtime/subsystems/bridge/bridge-outbox-drain-hook.ts +175 -0
  60. package/runtime/subsystems/bridge/bridge.module.ts +160 -0
  61. package/runtime/subsystems/bridge/bridge.protocol.ts +351 -0
  62. package/runtime/subsystems/bridge/bridge.tokens.ts +68 -0
  63. package/runtime/subsystems/bridge/event-flow.service.ts +175 -0
  64. package/runtime/subsystems/bridge/generated/.gitkeep +0 -0
  65. package/runtime/subsystems/bridge/generated/registry.ts +6 -0
  66. package/runtime/subsystems/bridge/index.ts +84 -0
  67. package/runtime/subsystems/bridge/reserved-pools.ts +36 -0
  68. package/runtime/subsystems/cache/cache.drizzle-backend.ts +150 -0
  69. package/runtime/subsystems/cache/cache.memory-backend.ts +116 -0
  70. package/runtime/subsystems/cache/cache.module.ts +115 -0
  71. package/runtime/subsystems/cache/cache.protocol.ts +45 -0
  72. package/runtime/subsystems/cache/cache.schema.ts +27 -0
  73. package/runtime/subsystems/cache/cache.tokens.ts +17 -0
  74. package/runtime/subsystems/cache/index.ts +22 -0
  75. package/runtime/subsystems/events/domain-events.schema.ts +77 -0
  76. package/runtime/subsystems/events/event-bus.drizzle-backend.ts +327 -0
  77. package/runtime/subsystems/events/event-bus.memory-backend.ts +142 -0
  78. package/runtime/subsystems/events/event-bus.protocol.ts +86 -0
  79. package/runtime/subsystems/events/event-bus.redis-backend.ts +304 -0
  80. package/runtime/subsystems/events/events-errors.ts +30 -0
  81. package/runtime/subsystems/events/events.module.ts +230 -0
  82. package/runtime/subsystems/events/events.tokens.ts +62 -0
  83. package/runtime/subsystems/events/generated/bus.ts +103 -0
  84. package/runtime/subsystems/events/generated/index.ts +7 -0
  85. package/runtime/subsystems/events/generated/registry.ts +84 -0
  86. package/runtime/subsystems/events/generated/schemas.ts +59 -0
  87. package/runtime/subsystems/events/generated/types.ts +94 -0
  88. package/runtime/subsystems/events/index.ts +21 -0
  89. package/runtime/subsystems/index.ts +63 -0
  90. package/runtime/subsystems/jobs/generated/job-orchestration.schema.multi-tenant.ts +217 -0
  91. package/runtime/subsystems/jobs/generated/job-orchestration.schema.single-tenant.ts +217 -0
  92. package/runtime/subsystems/jobs/generated/scope-entity-type.ts +10 -0
  93. package/runtime/subsystems/jobs/index.ts +120 -0
  94. package/runtime/subsystems/jobs/job-handler.base.ts +206 -0
  95. package/runtime/subsystems/jobs/job-orchestration.schema.ts +217 -0
  96. package/runtime/subsystems/jobs/job-orchestrator.drizzle-backend.ts +536 -0
  97. package/runtime/subsystems/jobs/job-orchestrator.memory-backend.ts +850 -0
  98. package/runtime/subsystems/jobs/job-orchestrator.protocol.ts +179 -0
  99. package/runtime/subsystems/jobs/job-run-service.drizzle-backend.ts +171 -0
  100. package/runtime/subsystems/jobs/job-run-service.memory-backend.ts +165 -0
  101. package/runtime/subsystems/jobs/job-run-service.protocol.ts +79 -0
  102. package/runtime/subsystems/jobs/job-step-service.drizzle-backend.ts +66 -0
  103. package/runtime/subsystems/jobs/job-step-service.memory-backend.ts +119 -0
  104. package/runtime/subsystems/jobs/job-step-service.protocol.ts +53 -0
  105. package/runtime/subsystems/jobs/job-worker.module.ts +302 -0
  106. package/runtime/subsystems/jobs/job-worker.ts +615 -0
  107. package/runtime/subsystems/jobs/jobs-domain.module.ts +119 -0
  108. package/runtime/subsystems/jobs/jobs-domain.tokens.ts +30 -0
  109. package/runtime/subsystems/jobs/jobs-errors.ts +150 -0
  110. package/runtime/subsystems/jobs/memory-job-store.ts +35 -0
  111. package/runtime/subsystems/jobs/pool-config.loader.ts +218 -0
  112. package/runtime/subsystems/storage/index.ts +18 -0
  113. package/runtime/subsystems/storage/storage.local-backend.ts +113 -0
  114. package/runtime/subsystems/storage/storage.memory-backend.ts +78 -0
  115. package/runtime/subsystems/storage/storage.module.ts +60 -0
  116. package/runtime/subsystems/storage/storage.protocol.ts +78 -0
  117. package/runtime/subsystems/storage/storage.tokens.ts +9 -0
  118. package/runtime/subsystems/storage/storage.utils.ts +20 -0
  119. package/runtime/subsystems/sync/deep-equal.differ.ts +198 -0
  120. package/runtime/subsystems/sync/execute-sync.use-case.ts +334 -0
  121. package/runtime/subsystems/sync/index.ts +98 -0
  122. package/runtime/subsystems/sync/sync-audit.schema.ts +300 -0
  123. package/runtime/subsystems/sync/sync-change-source.protocol.ts +99 -0
  124. package/runtime/subsystems/sync/sync-cursor-store.drizzle-backend.ts +104 -0
  125. package/runtime/subsystems/sync/sync-cursor-store.memory-backend.ts +64 -0
  126. package/runtime/subsystems/sync/sync-cursor-store.protocol.ts +53 -0
  127. package/runtime/subsystems/sync/sync-errors.ts +54 -0
  128. package/runtime/subsystems/sync/sync-field-diff.protocol.ts +61 -0
  129. package/runtime/subsystems/sync/sync-loopback.protocol.ts +33 -0
  130. package/runtime/subsystems/sync/sync-run-recorder.drizzle-backend.ts +123 -0
  131. package/runtime/subsystems/sync/sync-run-recorder.memory-backend.ts +143 -0
  132. package/runtime/subsystems/sync/sync-run-recorder.protocol.ts +86 -0
  133. package/runtime/subsystems/sync/sync-sink.protocol.ts +55 -0
  134. package/runtime/subsystems/sync/sync.module.ts +156 -0
  135. package/runtime/subsystems/sync/sync.tokens.ts +57 -0
  136. package/runtime/types/drizzle.ts +23 -0
@@ -0,0 +1,615 @@
1
+ /**
2
+ * JobWorker — backend-agnostic tick loop for the job orchestration domain
3
+ * (ADR-022, JOB-3).
4
+ *
5
+ * One worker instance per active pool. On `onModuleInit` it starts two
6
+ * intervals: the poll loop (claim → process → repeat) and the stale-claim
7
+ * sweeper. On `onModuleDestroy` / SIGTERM it drains in-flight work and
8
+ * releases still-`running` rows back to `pending` so a replacement worker
9
+ * can resume with step memoization intact.
10
+ *
11
+ * The claim query is the beating heart: `SELECT … FOR UPDATE SKIP LOCKED`
12
+ * inside a single transaction. Multiple worker processes share the table
13
+ * without serialising on row locks.
14
+ */
15
+ // TODO(logging-subsystem): swap to ILogger once ADR-028 lands
16
+ import { Inject, Injectable, Logger, type OnModuleDestroy, type OnModuleInit } from '@nestjs/common';
17
+ import { and, asc, desc, eq, inArray, lt, lte, sql } from 'drizzle-orm';
18
+ import type { DrizzleClient } from '../../types/drizzle';
19
+ import { DRIZZLE } from '../../constants/tokens';
20
+ import { jobRuns, type JobRunRow } from './job-orchestration.schema';
21
+ import type { IJobOrchestrator, JobRun } from './job-orchestrator.protocol';
22
+ import type { IJobRunService } from './job-run-service.protocol';
23
+ import type { IJobStepService } from './job-step-service.protocol';
24
+ import {
25
+ JOB_ORCHESTRATOR,
26
+ JOB_RUN_SERVICE,
27
+ JOB_STEP_SERVICE,
28
+ } from './jobs-domain.tokens';
29
+ import {
30
+ JOB_HANDLER_REGISTRY,
31
+ JobHandlerBase,
32
+ type JobContext,
33
+ type JobHandlerMeta,
34
+ type RetryPolicy,
35
+ type SpawnChildOptions,
36
+ type StepOptions,
37
+ } from './job-handler.base';
38
+
39
+ /**
40
+ * Options accepted by `JobWorker`. JOB-5 threads these through module
41
+ * `.forRoot()` config; supplied here as a plain DI-constructor argument
42
+ * so the worker compiles standalone.
43
+ */
44
+ export interface JobWorkerOptions {
45
+ /** Pool name this worker claims from. Matches `job.pool`. */
46
+ pool: string;
47
+ /** Max concurrent in-flight `processRun` calls. */
48
+ concurrency: number;
49
+ /** Poll interval in ms. Default 1000. */
50
+ pollIntervalMs?: number;
51
+ /** Stale sweep interval in ms. Default 60_000. */
52
+ staleSweeperIntervalMs?: number;
53
+ /**
54
+ * Threshold beyond which a `running` row is presumed stranded by a
55
+ * crashed worker. Default 5 min. Must be >= 2× max handler duration.
56
+ */
57
+ staleThresholdMs?: number;
58
+ /** Max ms to wait for in-flight drain on SIGTERM. Default 30_000. */
59
+ shutdownTimeoutMs?: number;
60
+ }
61
+
62
+ export const JOB_WORKER_OPTIONS = Symbol('JOB_WORKER_OPTIONS');
63
+
64
+ const DEFAULT_POLL_INTERVAL_MS = 1_000;
65
+ const DEFAULT_STALE_SWEEPER_INTERVAL_MS = 60_000;
66
+ const DEFAULT_STALE_THRESHOLD_MS = 5 * 60_000;
67
+ const DEFAULT_SHUTDOWN_TIMEOUT_MS = 30_000;
68
+
69
+ const TERMINAL_STATUSES: JobRunRow['status'][] = [
70
+ 'completed',
71
+ 'failed',
72
+ 'timed_out',
73
+ 'canceled',
74
+ ];
75
+
76
+ // ─── Pure helpers (exported for unit tests) ────────────────────────────────
77
+
78
+ /**
79
+ * Backoff delay in ms for the Nth attempt (1-indexed). Supports both
80
+ * policy modes. Exponential is capped at `Number.MAX_SAFE_INTEGER` so
81
+ * pathological attempt counts don't overflow.
82
+ */
83
+ export function computeBackoff(policy: RetryPolicy, attempts: number): number {
84
+ const base = Math.max(policy.baseMs, 0);
85
+ if (policy.backoff === 'fixed') {
86
+ return base;
87
+ }
88
+ // exponential: baseMs * 2^(attempts-1)
89
+ const exponent = Math.max(attempts - 1, 0);
90
+ if (exponent >= 53) return Number.MAX_SAFE_INTEGER; // 2^53 overflow guard
91
+ const raw = base * Math.pow(2, exponent);
92
+ if (!Number.isFinite(raw) || raw >= Number.MAX_SAFE_INTEGER) {
93
+ return Number.MAX_SAFE_INTEGER;
94
+ }
95
+ return raw;
96
+ }
97
+
98
+ /**
99
+ * Decide whether an error should be retried under the given policy.
100
+ * Matches `nonRetryableErrors` by `.name` OR `.code`. Returns
101
+ * - `'retry'` if attempts remain and the error isn't blacklisted,
102
+ * - `'fail'` otherwise (terminal failure).
103
+ */
104
+ export function classifyError(
105
+ err: unknown,
106
+ policy: RetryPolicy | undefined,
107
+ currentAttempts: number,
108
+ ): 'retry' | 'fail' {
109
+ if (!policy) return 'fail';
110
+ const errObj = err as { name?: string; code?: string } | undefined;
111
+ const name = errObj?.name;
112
+ const code = errObj?.code;
113
+ const nonRetryable = policy.nonRetryableErrors ?? [];
114
+ if (nonRetryable.some((n) => n === name || n === code)) return 'fail';
115
+ if (currentAttempts + 1 >= policy.attempts) return 'fail';
116
+ return 'retry';
117
+ }
118
+
119
+ /**
120
+ * Build the raw claim-candidate select. Exported so tests can inspect
121
+ * `.toSQL()` without spinning up the full worker. Matches JOB-3 §4 and
122
+ * ADR-022 "Claim query (Drizzle backend)".
123
+ */
124
+ export function buildClaimQuery(db: DrizzleClient, pool: string) {
125
+ return db
126
+ .select({ id: jobRuns.id })
127
+ .from(jobRuns)
128
+ .where(
129
+ and(
130
+ eq(jobRuns.status, 'pending'),
131
+ eq(jobRuns.pool, pool),
132
+ lte(jobRuns.runAt, new Date()),
133
+ ),
134
+ )
135
+ .orderBy(desc(jobRuns.priority), asc(jobRuns.runAt))
136
+ .limit(1)
137
+ .for('update', { skipLocked: true });
138
+ }
139
+
140
+ /**
141
+ * Build the stale-claim sweep candidate select. `FOR UPDATE SKIP LOCKED`
142
+ * per OQ-2 resolution (2026-04-19): per-worker sweeper, safe without
143
+ * leader election because the update is self-gating.
144
+ */
145
+ export function buildStaleSweepQuery(
146
+ db: DrizzleClient,
147
+ staleThresholdMs: number,
148
+ ) {
149
+ const threshold = new Date(Date.now() - staleThresholdMs);
150
+ return db
151
+ .select({ id: jobRuns.id })
152
+ .from(jobRuns)
153
+ .where(
154
+ and(
155
+ eq(jobRuns.status, 'running'),
156
+ lt(jobRuns.claimedAt, threshold),
157
+ ),
158
+ )
159
+ .for('update', { skipLocked: true });
160
+ }
161
+
162
+ // ─── Error serialisation ───────────────────────────────────────────────────
163
+
164
+ function serialiseError(err: unknown, attempt: number, retryable: boolean) {
165
+ const e = err as { message?: string; stack?: string; code?: string } | undefined;
166
+ return {
167
+ message: (e?.message ?? String(err)) as string,
168
+ stack: e?.stack,
169
+ retryable,
170
+ attempt,
171
+ };
172
+ }
173
+
174
+ // ─── JobWorker ─────────────────────────────────────────────────────────────
175
+
176
+ @Injectable()
177
+ export class JobWorker implements OnModuleInit, OnModuleDestroy {
178
+ private readonly logger = new Logger(JobWorker.name);
179
+ private shuttingDown = false;
180
+ private readonly inFlight = new Set<Promise<void>>();
181
+ private pollTimer: ReturnType<typeof setInterval> | null = null;
182
+ private sweeperTimer: ReturnType<typeof setInterval> | null = null;
183
+ private sigtermHandled = false;
184
+ private readonly sigtermHandler: () => void;
185
+
186
+ private readonly pollIntervalMs: number;
187
+ private readonly staleSweeperIntervalMs: number;
188
+ private readonly staleThresholdMs: number;
189
+ private readonly shutdownTimeoutMs: number;
190
+
191
+ constructor(
192
+ @Inject(DRIZZLE) private readonly db: DrizzleClient,
193
+ @Inject(JOB_ORCHESTRATOR) private readonly orchestrator: IJobOrchestrator,
194
+ @Inject(JOB_RUN_SERVICE) private readonly runService: IJobRunService,
195
+ @Inject(JOB_STEP_SERVICE) private readonly stepService: IJobStepService,
196
+ @Inject(JOB_WORKER_OPTIONS) private readonly options: JobWorkerOptions,
197
+ ) {
198
+ this.pollIntervalMs = options.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS;
199
+ this.staleSweeperIntervalMs =
200
+ options.staleSweeperIntervalMs ?? DEFAULT_STALE_SWEEPER_INTERVAL_MS;
201
+ this.staleThresholdMs = options.staleThresholdMs ?? DEFAULT_STALE_THRESHOLD_MS;
202
+ this.shutdownTimeoutMs =
203
+ options.shutdownTimeoutMs ?? DEFAULT_SHUTDOWN_TIMEOUT_MS;
204
+
205
+ this.sigtermHandler = () => {
206
+ if (this.sigtermHandled) return;
207
+ this.sigtermHandled = true;
208
+ void this.onModuleDestroy();
209
+ };
210
+ void this.runService; // reserved for future scope-aware cancellation paths
211
+ }
212
+
213
+ // ============================================================================
214
+ // Lifecycle
215
+ // ============================================================================
216
+
217
+ onModuleInit(): void {
218
+ this.pollTimer = setInterval(() => {
219
+ void this.pollAndProcess();
220
+ }, this.pollIntervalMs);
221
+ this.sweeperTimer = setInterval(() => {
222
+ void this.sweepStaleClaims();
223
+ }, this.staleSweeperIntervalMs);
224
+ process.on('SIGTERM', this.sigtermHandler);
225
+ }
226
+
227
+ async onModuleDestroy(): Promise<void> {
228
+ if (this.shuttingDown) {
229
+ // Still drain, but don't tear intervals down twice.
230
+ await this.drainInFlight();
231
+ return;
232
+ }
233
+ this.shuttingDown = true;
234
+ if (this.pollTimer) {
235
+ clearInterval(this.pollTimer);
236
+ this.pollTimer = null;
237
+ }
238
+ if (this.sweeperTimer) {
239
+ clearInterval(this.sweeperTimer);
240
+ this.sweeperTimer = null;
241
+ }
242
+ process.removeListener('SIGTERM', this.sigtermHandler);
243
+
244
+ await this.drainInFlight();
245
+
246
+ // Any rows still `running` past timeout → release back to pending.
247
+ try {
248
+ await this.db
249
+ .update(jobRuns)
250
+ .set({ status: 'pending', claimedAt: null, startedAt: null })
251
+ .where(
252
+ and(eq(jobRuns.status, 'running'), eq(jobRuns.pool, this.options.pool)),
253
+ );
254
+ } catch (err) {
255
+ this.logger.error(`shutdown reset failed: ${(err as Error).message}`);
256
+ }
257
+ }
258
+
259
+ private async drainInFlight(): Promise<void> {
260
+ if (this.inFlight.size === 0) return;
261
+ const timeout = new Promise<void>((resolve) =>
262
+ setTimeout(resolve, this.shutdownTimeoutMs),
263
+ );
264
+ await Promise.race([
265
+ Promise.allSettled([...this.inFlight]).then(() => undefined),
266
+ timeout,
267
+ ]);
268
+ }
269
+
270
+ // ============================================================================
271
+ // Poll loop
272
+ // ============================================================================
273
+
274
+ async pollAndProcess(): Promise<void> {
275
+ if (this.shuttingDown) return;
276
+ if (this.inFlight.size >= this.options.concurrency) return;
277
+
278
+ let claimed: JobRunRow | null;
279
+ try {
280
+ claimed = await this.claimNext(this.options.pool);
281
+ } catch (err) {
282
+ this.logger.error(`claimNext failed: ${(err as Error).message}`);
283
+ return;
284
+ }
285
+ if (!claimed) return;
286
+
287
+ const run = claimed;
288
+ const promise = this.processRun(run).catch((err) => {
289
+ this.logger.error(
290
+ `processRun(${run.id}) unhandled: ${(err as Error).message}`,
291
+ );
292
+ });
293
+ this.inFlight.add(promise);
294
+ promise.finally(() => {
295
+ this.inFlight.delete(promise);
296
+ });
297
+ }
298
+
299
+ /**
300
+ * Claim the next runnable row from the pool. Transaction ensures the
301
+ * select-candidate + update-to-running pair is atomic; FOR UPDATE SKIP
302
+ * LOCKED lets multiple workers share the table without serialising.
303
+ */
304
+ async claimNext(pool: string): Promise<JobRunRow | null> {
305
+ return this.db.transaction(async (tx) => {
306
+ const candidates = await tx
307
+ .select({ id: jobRuns.id })
308
+ .from(jobRuns)
309
+ .where(
310
+ and(
311
+ eq(jobRuns.status, 'pending'),
312
+ eq(jobRuns.pool, pool),
313
+ lte(jobRuns.runAt, new Date()),
314
+ ),
315
+ )
316
+ .orderBy(desc(jobRuns.priority), asc(jobRuns.runAt))
317
+ .limit(1)
318
+ .for('update', { skipLocked: true });
319
+ const candidate = candidates[0];
320
+ if (!candidate) return null;
321
+
322
+ const [claimed] = await tx
323
+ .update(jobRuns)
324
+ .set({
325
+ status: 'running',
326
+ claimedAt: new Date(),
327
+ startedAt: new Date(),
328
+ updatedAt: new Date(),
329
+ })
330
+ .where(eq(jobRuns.id, candidate.id))
331
+ .returning();
332
+ return (claimed ?? null) as JobRunRow | null;
333
+ });
334
+ }
335
+
336
+ // ============================================================================
337
+ // Stale claim sweeper
338
+ // ============================================================================
339
+
340
+ /**
341
+ * Release rows whose `claimed_at` is older than the threshold. Safe to
342
+ * run concurrently across workers — the two-phase tx (select-for-update
343
+ * then update) guarantees each stranded row is only reset once.
344
+ */
345
+ async sweepStaleClaims(): Promise<void> {
346
+ if (this.shuttingDown) return;
347
+ try {
348
+ await this.db.transaction(async (tx) => {
349
+ const threshold = new Date(Date.now() - this.staleThresholdMs);
350
+ const stale = await tx
351
+ .select({ id: jobRuns.id })
352
+ .from(jobRuns)
353
+ .where(
354
+ and(eq(jobRuns.status, 'running'), lt(jobRuns.claimedAt, threshold)),
355
+ )
356
+ .for('update', { skipLocked: true });
357
+ if (stale.length === 0) return;
358
+ const ids = stale.map((r) => r.id);
359
+ await tx
360
+ .update(jobRuns)
361
+ .set({ status: 'pending', claimedAt: null, startedAt: null })
362
+ .where(inArray(jobRuns.id, ids));
363
+ for (const id of ids) {
364
+ this.logger.warn(`Recovered stale claim on run ${id}`);
365
+ }
366
+ });
367
+ } catch (err) {
368
+ this.logger.error(`sweepStaleClaims failed: ${(err as Error).message}`);
369
+ }
370
+ }
371
+
372
+ // ============================================================================
373
+ // processRun
374
+ // ============================================================================
375
+
376
+ private async processRun(claimed: JobRunRow): Promise<void> {
377
+ const registryEntry = JOB_HANDLER_REGISTRY.get(claimed.jobType);
378
+
379
+ // (a) Missing handler — defensive; JOB-5 boot validator should have caught.
380
+ if (!registryEntry) {
381
+ this.logger.error(
382
+ `No handler registered for jobType='${claimed.jobType}' (run ${claimed.id})`,
383
+ );
384
+ await this.markFailed(
385
+ claimed,
386
+ new Error(`No handler registered for jobType='${claimed.jobType}'`),
387
+ /*finalAttempts*/ (claimed.attempts ?? 0) + 1,
388
+ );
389
+ return;
390
+ }
391
+
392
+ // (b) Concurrency-queue release gate — defer if another run with the
393
+ // same key is already `running`.
394
+ if (claimed.concurrencyKey) {
395
+ const inflight = await this.db
396
+ .select({ id: jobRuns.id })
397
+ .from(jobRuns)
398
+ .where(
399
+ and(
400
+ eq(jobRuns.concurrencyKey, claimed.concurrencyKey),
401
+ eq(jobRuns.status, 'running'),
402
+ ),
403
+ );
404
+ const other = inflight.find((r) => r.id !== claimed.id);
405
+ if (other) {
406
+ await this.db
407
+ .update(jobRuns)
408
+ .set({
409
+ status: 'pending',
410
+ claimedAt: null,
411
+ startedAt: null,
412
+ updatedAt: new Date(),
413
+ })
414
+ .where(eq(jobRuns.id, claimed.id));
415
+ return;
416
+ }
417
+ }
418
+
419
+ const meta = registryEntry.meta as JobHandlerMeta<unknown>;
420
+ const HandlerClass = registryEntry.handlerClass;
421
+
422
+ // (c) Build JobContext. Phase 1: instantiate handler with no args.
423
+ // DI-for-handlers lands with JOB-5's boot wiring.
424
+ const handler = new HandlerClass() as JobHandlerBase<unknown>;
425
+ const ctx: JobContext<unknown> = {
426
+ input: claimed.input,
427
+ run: claimed as JobRun,
428
+ step: this.makeStepFn(claimed),
429
+ spawnChild: this.makeSpawnFn(claimed),
430
+ logger: new Logger(`JobRun:${claimed.id}`),
431
+ };
432
+
433
+ const attemptsBefore = claimed.attempts ?? 0;
434
+ try {
435
+ // (d) Run the handler.
436
+ const output = (await handler.run(ctx)) as Record<string, unknown> | undefined;
437
+ // (e) Success.
438
+ await this.db
439
+ .update(jobRuns)
440
+ .set({
441
+ status: 'completed',
442
+ output: (output ?? {}) as Record<string, unknown>,
443
+ finishedAt: new Date(),
444
+ updatedAt: new Date(),
445
+ attempts: attemptsBefore + 1,
446
+ })
447
+ .where(eq(jobRuns.id, claimed.id));
448
+ } catch (err) {
449
+ // (f) Error classification + retry/fail.
450
+ const policy = meta.retry;
451
+ const decision = classifyError(err, policy, attemptsBefore);
452
+ const nextAttempts = attemptsBefore + 1;
453
+ if (decision === 'retry' && policy) {
454
+ const delay = computeBackoff(policy, nextAttempts);
455
+ await this.db
456
+ .update(jobRuns)
457
+ .set({
458
+ status: 'pending',
459
+ attempts: nextAttempts,
460
+ runAt: new Date(Date.now() + delay),
461
+ startedAt: null,
462
+ claimedAt: null,
463
+ error: serialiseError(err, nextAttempts, true),
464
+ updatedAt: new Date(),
465
+ })
466
+ .where(eq(jobRuns.id, claimed.id));
467
+ } else {
468
+ await this.markFailed(claimed, err, nextAttempts);
469
+ }
470
+ }
471
+ }
472
+
473
+ private async markFailed(
474
+ claimed: JobRunRow,
475
+ err: unknown,
476
+ finalAttempts: number,
477
+ ): Promise<void> {
478
+ await this.db
479
+ .update(jobRuns)
480
+ .set({
481
+ status: 'failed',
482
+ attempts: finalAttempts,
483
+ finishedAt: new Date(),
484
+ error: serialiseError(err, finalAttempts, false),
485
+ updatedAt: new Date(),
486
+ })
487
+ .where(eq(jobRuns.id, claimed.id));
488
+
489
+ // Parent-close-policy cascade: if this run has children under the same
490
+ // root_run_id and this run's own parentClosePolicy is 'terminate', cascade.
491
+ if (claimed.parentClosePolicy === 'terminate') {
492
+ try {
493
+ // JOB-8 — thread the run's own tenantId so the orchestrator's
494
+ // multi-tenant gate passes. Without this, every terminate-policy
495
+ // cascade throws MissingTenantIdError under multiTenant=true and
496
+ // the outer catch silently swallows it — children never cancel.
497
+ await this.orchestrator.cancel(claimed.id, {
498
+ cascade: true,
499
+ reason: 'parent-failed',
500
+ tenantId: claimed.tenantId,
501
+ });
502
+ } catch (cascadeErr) {
503
+ // cancel is idempotent; failure here is unusual but not fatal.
504
+ this.logger.warn(
505
+ `cascade on failed run ${claimed.id}: ${(cascadeErr as Error).message}`,
506
+ );
507
+ }
508
+ }
509
+ }
510
+
511
+ // ============================================================================
512
+ // ctx.step / ctx.spawnChild builders
513
+ // ============================================================================
514
+
515
+ private makeStepFn(run: JobRunRow) {
516
+ return async <TOutput>(
517
+ stepId: string,
518
+ fn: () => Promise<TOutput>,
519
+ _opts?: StepOptions,
520
+ ): Promise<TOutput> => {
521
+ void _opts;
522
+ const existing = await this.stepService.findStep(run.id, stepId);
523
+ if (existing?.status === 'completed') {
524
+ return existing.output as TOutput;
525
+ }
526
+
527
+ const seq = await this.nextStepSeq(run.id);
528
+ const startedAt = new Date();
529
+ const nextAttempts = (existing?.attempts ?? 0) + 1;
530
+ await this.stepService.recordStep({
531
+ jobRunId: run.id,
532
+ stepId,
533
+ kind: 'task',
534
+ seq,
535
+ status: 'running',
536
+ startedAt,
537
+ attempts: nextAttempts,
538
+ });
539
+ try {
540
+ const output = await fn();
541
+ await this.stepService.recordStep({
542
+ jobRunId: run.id,
543
+ stepId,
544
+ kind: 'task',
545
+ seq,
546
+ status: 'completed',
547
+ output: output as Record<string, unknown> | undefined,
548
+ finishedAt: new Date(),
549
+ attempts: nextAttempts,
550
+ });
551
+ return output;
552
+ } catch (err) {
553
+ await this.stepService.recordStep({
554
+ jobRunId: run.id,
555
+ stepId,
556
+ kind: 'task',
557
+ seq,
558
+ status: 'failed',
559
+ error: serialiseError(err, nextAttempts, false),
560
+ finishedAt: new Date(),
561
+ attempts: nextAttempts,
562
+ });
563
+ throw err;
564
+ }
565
+ };
566
+ }
567
+
568
+ private makeSpawnFn(run: JobRunRow) {
569
+ return async (
570
+ type: string,
571
+ input: unknown,
572
+ opts?: SpawnChildOptions,
573
+ ): Promise<JobRun> => {
574
+ return this.orchestrator.start(type, input, {
575
+ parentRunId: run.id,
576
+ parentClosePolicy: opts?.closePolicy,
577
+ runAt: opts?.runAt,
578
+ priority: opts?.priority,
579
+ tags: opts?.tags,
580
+ triggerSource: 'parent',
581
+ triggerRef: run.id,
582
+ });
583
+ };
584
+ }
585
+
586
+ /**
587
+ * Allocate the next `seq` for a given run. SELECT-max approach — runs
588
+ * typically have <100 steps so the scan is cheap, and correctness across
589
+ * retries is more important than the microseconds saved by an in-memory
590
+ * counter (which would drift if the worker crashes mid-run and another
591
+ * worker resumes via stale-claim sweep).
592
+ */
593
+ private async nextStepSeq(runId: string): Promise<number> {
594
+ const [row] = await this.db.execute(
595
+ sql`SELECT COALESCE(MAX(seq), 0) + 1 AS next FROM job_step WHERE job_run_id = ${runId}`,
596
+ ) as unknown as Array<{ next: number }>;
597
+ // pg driver returns { rows: [...] } for raw execute; tolerate both shapes.
598
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
599
+ const maybeRows = (row as any)?.rows;
600
+ if (Array.isArray(maybeRows) && maybeRows.length > 0) {
601
+ return Number(maybeRows[0].next ?? 1);
602
+ }
603
+ if (row && typeof (row as { next?: unknown }).next !== 'undefined') {
604
+ return Number((row as { next: unknown }).next);
605
+ }
606
+ return 1;
607
+ }
608
+
609
+ // ============================================================================
610
+ // (suppress unused-import noise)
611
+ // ============================================================================
612
+ }
613
+
614
+ // Terminal statuses re-exported for JOB-4 parity imports.
615
+ export { TERMINAL_STATUSES };