@pattern-stack/codegen 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/dist/runtime/subsystems/bridge/bridge.module.d.ts +1 -0
  3. package/dist/runtime/subsystems/bridge/bridge.module.js +38 -21
  4. package/dist/runtime/subsystems/bridge/bridge.module.js.map +1 -1
  5. package/dist/runtime/subsystems/bridge/index.d.ts +1 -0
  6. package/dist/runtime/subsystems/bridge/index.js +29 -12
  7. package/dist/runtime/subsystems/bridge/index.js.map +1 -1
  8. package/dist/runtime/subsystems/index.js +31 -14
  9. package/dist/runtime/subsystems/index.js.map +1 -1
  10. package/dist/runtime/subsystems/jobs/index.d.ts +1 -0
  11. package/dist/runtime/subsystems/jobs/index.js +27 -10
  12. package/dist/runtime/subsystems/jobs/index.js.map +1 -1
  13. package/dist/runtime/subsystems/jobs/job-orchestrator.memory-backend.d.ts +3 -1
  14. package/dist/runtime/subsystems/jobs/job-orchestrator.memory-backend.js +9 -4
  15. package/dist/runtime/subsystems/jobs/job-orchestrator.memory-backend.js.map +1 -1
  16. package/dist/runtime/subsystems/jobs/job-worker.d.ts +3 -1
  17. package/dist/runtime/subsystems/jobs/job-worker.js +6 -2
  18. package/dist/runtime/subsystems/jobs/job-worker.js.map +1 -1
  19. package/dist/runtime/subsystems/jobs/job-worker.module.d.ts +3 -1
  20. package/dist/runtime/subsystems/jobs/job-worker.module.js +27 -10
  21. package/dist/runtime/subsystems/jobs/job-worker.module.js.map +1 -1
  22. package/dist/runtime/subsystems/jobs/jobs-domain.module.js +9 -4
  23. package/dist/runtime/subsystems/jobs/jobs-domain.module.js.map +1 -1
  24. package/dist/src/cli/index.js +29 -2
  25. package/dist/src/cli/index.js.map +1 -1
  26. package/package.json +2 -1
  27. package/runtime/analytics/index.ts +31 -0
  28. package/runtime/analytics/metrics.ts +85 -0
  29. package/runtime/analytics/packs/crm-entity-measures.ts +20 -0
  30. package/runtime/analytics/packs/index.ts +5 -0
  31. package/runtime/analytics/packs/monetary-measures.ts +20 -0
  32. package/runtime/analytics/specs.ts +54 -0
  33. package/runtime/analytics/types.ts +105 -0
  34. package/runtime/base-classes/activity-entity-repository.ts +50 -0
  35. package/runtime/base-classes/activity-entity-service.ts +48 -0
  36. package/runtime/base-classes/base-read-use-cases.ts +88 -0
  37. package/runtime/base-classes/base-repository.ts +289 -0
  38. package/runtime/base-classes/base-service.ts +183 -0
  39. package/runtime/base-classes/index.ts +38 -0
  40. package/runtime/base-classes/knowledge-entity-repository.ts +12 -0
  41. package/runtime/base-classes/knowledge-entity-service.ts +14 -0
  42. package/runtime/base-classes/lifecycle-events.ts +152 -0
  43. package/runtime/base-classes/metadata-entity-repository.ts +80 -0
  44. package/runtime/base-classes/metadata-entity-service.ts +48 -0
  45. package/runtime/base-classes/synced-entity-repository.ts +57 -0
  46. package/runtime/base-classes/synced-entity-service.ts +50 -0
  47. package/runtime/base-classes/with-analytics.ts +22 -0
  48. package/runtime/constants/tokens.ts +29 -0
  49. package/runtime/eav-helpers.ts +74 -0
  50. package/runtime/pipes/zod-validation.pipe.ts +64 -0
  51. package/runtime/shared/openapi/error-response.dto.ts +24 -0
  52. package/runtime/shared/openapi/errors.ts +39 -0
  53. package/runtime/shared/openapi/index.ts +20 -0
  54. package/runtime/shared/openapi/registry.tokens.ts +13 -0
  55. package/runtime/shared/openapi/registry.ts +151 -0
  56. package/runtime/subsystems/analytics/analytics-query.protocol.ts +37 -0
  57. package/runtime/subsystems/analytics/analytics.module.ts +64 -0
  58. package/runtime/subsystems/analytics/analytics.tokens.ts +24 -0
  59. package/runtime/subsystems/analytics/cube-backend.ts +75 -0
  60. package/runtime/subsystems/analytics/index.ts +15 -0
  61. package/runtime/subsystems/analytics/noop-backend.ts +27 -0
  62. package/runtime/subsystems/auth/auth.module.ts +91 -0
  63. package/runtime/subsystems/auth/auth.tokens.ts +27 -0
  64. package/runtime/subsystems/auth/backends/encryption-key/env.ts +76 -0
  65. package/runtime/subsystems/auth/backends/oauth-state-store/in-memory.ts +42 -0
  66. package/runtime/subsystems/auth/index.ts +77 -0
  67. package/runtime/subsystems/auth/protocols/auth-strategy.ts +46 -0
  68. package/runtime/subsystems/auth/protocols/encryption-key.ts +21 -0
  69. package/runtime/subsystems/auth/protocols/integration-store.ts +66 -0
  70. package/runtime/subsystems/auth/protocols/oauth-state-store.ts +16 -0
  71. package/runtime/subsystems/auth/runtime/integration-broken.error.ts +21 -0
  72. package/runtime/subsystems/auth/runtime/oauth2-refresh.strategy.ts +189 -0
  73. package/runtime/subsystems/auth/runtime/session-expired.error.ts +39 -0
  74. package/runtime/subsystems/auth/runtime/with-auth-retry.ts +50 -0
  75. package/runtime/subsystems/bridge/assert-tenant-id.ts +57 -0
  76. package/runtime/subsystems/bridge/bridge-delivery-handler.ts +220 -0
  77. package/runtime/subsystems/bridge/bridge-delivery.drizzle-backend.ts +149 -0
  78. package/runtime/subsystems/bridge/bridge-delivery.memory-backend.ts +140 -0
  79. package/runtime/subsystems/bridge/bridge-delivery.schema.ts +142 -0
  80. package/runtime/subsystems/bridge/bridge-errors.ts +112 -0
  81. package/runtime/subsystems/bridge/bridge-outbox-drain-hook.ts +175 -0
  82. package/runtime/subsystems/bridge/bridge.module.ts +160 -0
  83. package/runtime/subsystems/bridge/bridge.protocol.ts +351 -0
  84. package/runtime/subsystems/bridge/bridge.tokens.ts +68 -0
  85. package/runtime/subsystems/bridge/event-flow.service.ts +175 -0
  86. package/runtime/subsystems/bridge/generated/.gitkeep +0 -0
  87. package/runtime/subsystems/bridge/generated/registry.ts +6 -0
  88. package/runtime/subsystems/bridge/index.ts +84 -0
  89. package/runtime/subsystems/bridge/reserved-pools.ts +36 -0
  90. package/runtime/subsystems/cache/cache.drizzle-backend.ts +150 -0
  91. package/runtime/subsystems/cache/cache.memory-backend.ts +116 -0
  92. package/runtime/subsystems/cache/cache.module.ts +115 -0
  93. package/runtime/subsystems/cache/cache.protocol.ts +45 -0
  94. package/runtime/subsystems/cache/cache.schema.ts +27 -0
  95. package/runtime/subsystems/cache/cache.tokens.ts +17 -0
  96. package/runtime/subsystems/cache/index.ts +22 -0
  97. package/runtime/subsystems/events/domain-events.schema.ts +77 -0
  98. package/runtime/subsystems/events/event-bus.drizzle-backend.ts +327 -0
  99. package/runtime/subsystems/events/event-bus.memory-backend.ts +142 -0
  100. package/runtime/subsystems/events/event-bus.protocol.ts +86 -0
  101. package/runtime/subsystems/events/event-bus.redis-backend.ts +304 -0
  102. package/runtime/subsystems/events/events-errors.ts +30 -0
  103. package/runtime/subsystems/events/events.module.ts +230 -0
  104. package/runtime/subsystems/events/events.tokens.ts +62 -0
  105. package/runtime/subsystems/events/generated/bus.ts +103 -0
  106. package/runtime/subsystems/events/generated/index.ts +7 -0
  107. package/runtime/subsystems/events/generated/registry.ts +84 -0
  108. package/runtime/subsystems/events/generated/schemas.ts +59 -0
  109. package/runtime/subsystems/events/generated/types.ts +94 -0
  110. package/runtime/subsystems/events/index.ts +21 -0
  111. package/runtime/subsystems/index.ts +63 -0
  112. package/runtime/subsystems/jobs/generated/job-orchestration.schema.multi-tenant.ts +217 -0
  113. package/runtime/subsystems/jobs/generated/job-orchestration.schema.single-tenant.ts +217 -0
  114. package/runtime/subsystems/jobs/generated/scope-entity-type.ts +10 -0
  115. package/runtime/subsystems/jobs/index.ts +120 -0
  116. package/runtime/subsystems/jobs/job-handler.base.ts +206 -0
  117. package/runtime/subsystems/jobs/job-orchestration.schema.ts +217 -0
  118. package/runtime/subsystems/jobs/job-orchestrator.drizzle-backend.ts +536 -0
  119. package/runtime/subsystems/jobs/job-orchestrator.memory-backend.ts +860 -0
  120. package/runtime/subsystems/jobs/job-orchestrator.protocol.ts +179 -0
  121. package/runtime/subsystems/jobs/job-run-service.drizzle-backend.ts +171 -0
  122. package/runtime/subsystems/jobs/job-run-service.memory-backend.ts +165 -0
  123. package/runtime/subsystems/jobs/job-run-service.protocol.ts +79 -0
  124. package/runtime/subsystems/jobs/job-step-service.drizzle-backend.ts +66 -0
  125. package/runtime/subsystems/jobs/job-step-service.memory-backend.ts +119 -0
  126. package/runtime/subsystems/jobs/job-step-service.protocol.ts +53 -0
  127. package/runtime/subsystems/jobs/job-worker.module.ts +312 -0
  128. package/runtime/subsystems/jobs/job-worker.ts +624 -0
  129. package/runtime/subsystems/jobs/jobs-domain.module.ts +119 -0
  130. package/runtime/subsystems/jobs/jobs-domain.tokens.ts +30 -0
  131. package/runtime/subsystems/jobs/jobs-errors.ts +150 -0
  132. package/runtime/subsystems/jobs/memory-job-store.ts +35 -0
  133. package/runtime/subsystems/jobs/pool-config.loader.ts +218 -0
  134. package/runtime/subsystems/storage/index.ts +18 -0
  135. package/runtime/subsystems/storage/storage.local-backend.ts +113 -0
  136. package/runtime/subsystems/storage/storage.memory-backend.ts +78 -0
  137. package/runtime/subsystems/storage/storage.module.ts +60 -0
  138. package/runtime/subsystems/storage/storage.protocol.ts +78 -0
  139. package/runtime/subsystems/storage/storage.tokens.ts +9 -0
  140. package/runtime/subsystems/storage/storage.utils.ts +20 -0
  141. package/runtime/subsystems/sync/deep-equal.differ.ts +198 -0
  142. package/runtime/subsystems/sync/execute-sync.use-case.ts +334 -0
  143. package/runtime/subsystems/sync/index.ts +98 -0
  144. package/runtime/subsystems/sync/sync-audit.schema.ts +300 -0
  145. package/runtime/subsystems/sync/sync-change-source.protocol.ts +99 -0
  146. package/runtime/subsystems/sync/sync-cursor-store.drizzle-backend.ts +104 -0
  147. package/runtime/subsystems/sync/sync-cursor-store.memory-backend.ts +64 -0
  148. package/runtime/subsystems/sync/sync-cursor-store.protocol.ts +53 -0
  149. package/runtime/subsystems/sync/sync-errors.ts +54 -0
  150. package/runtime/subsystems/sync/sync-field-diff.protocol.ts +61 -0
  151. package/runtime/subsystems/sync/sync-loopback.protocol.ts +33 -0
  152. package/runtime/subsystems/sync/sync-run-recorder.drizzle-backend.ts +123 -0
  153. package/runtime/subsystems/sync/sync-run-recorder.memory-backend.ts +143 -0
  154. package/runtime/subsystems/sync/sync-run-recorder.protocol.ts +86 -0
  155. package/runtime/subsystems/sync/sync-sink.protocol.ts +55 -0
  156. package/runtime/subsystems/sync/sync.module.ts +156 -0
  157. package/runtime/subsystems/sync/sync.tokens.ts +57 -0
  158. package/runtime/types/drizzle.ts +23 -0
@@ -0,0 +1,624 @@
1
+ /**
2
+ * JobWorker — backend-agnostic tick loop for the job orchestration domain
3
+ * (ADR-022, JOB-3).
4
+ *
5
+ * One worker instance per active pool. On `onModuleInit` it starts two
6
+ * intervals: the poll loop (claim → process → repeat) and the stale-claim
7
+ * sweeper. On `onModuleDestroy` / SIGTERM it drains in-flight work and
8
+ * releases still-`running` rows back to `pending` so a replacement worker
9
+ * can resume with step memoization intact.
10
+ *
11
+ * The claim query is the beating heart: `SELECT … FOR UPDATE SKIP LOCKED`
12
+ * inside a single transaction. Multiple worker processes share the table
13
+ * without serialising on row locks.
14
+ */
15
+ // TODO(logging-subsystem): swap to ILogger once ADR-028 lands
16
+ import { Inject, Injectable, Logger, type OnModuleDestroy, type OnModuleInit } from '@nestjs/common';
17
+ import { ModuleRef } from '@nestjs/core';
18
+ import { and, asc, desc, eq, inArray, lt, lte, sql } from 'drizzle-orm';
19
+ import type { DrizzleClient } from '../../types/drizzle';
20
+ import { DRIZZLE } from '../../constants/tokens';
21
+ import { jobRuns, type JobRunRow } from './job-orchestration.schema';
22
+ import type { IJobOrchestrator, JobRun } from './job-orchestrator.protocol';
23
+ import type { IJobRunService } from './job-run-service.protocol';
24
+ import type { IJobStepService } from './job-step-service.protocol';
25
+ import {
26
+ JOB_ORCHESTRATOR,
27
+ JOB_RUN_SERVICE,
28
+ JOB_STEP_SERVICE,
29
+ } from './jobs-domain.tokens';
30
+ import {
31
+ JOB_HANDLER_REGISTRY,
32
+ JobHandlerBase,
33
+ type JobContext,
34
+ type JobHandlerMeta,
35
+ type RetryPolicy,
36
+ type SpawnChildOptions,
37
+ type StepOptions,
38
+ } from './job-handler.base';
39
+
40
+ /**
41
+ * Options accepted by `JobWorker`. JOB-5 threads these through module
42
+ * `.forRoot()` config; supplied here as a plain DI-constructor argument
43
+ * so the worker compiles standalone.
44
+ */
45
+ export interface JobWorkerOptions {
46
+ /** Pool name this worker claims from. Matches `job.pool`. */
47
+ pool: string;
48
+ /** Max concurrent in-flight `processRun` calls. */
49
+ concurrency: number;
50
+ /** Poll interval in ms. Default 1000. */
51
+ pollIntervalMs?: number;
52
+ /** Stale sweep interval in ms. Default 60_000. */
53
+ staleSweeperIntervalMs?: number;
54
+ /**
55
+ * Threshold beyond which a `running` row is presumed stranded by a
56
+ * crashed worker. Default 5 min. Must be >= 2× max handler duration.
57
+ */
58
+ staleThresholdMs?: number;
59
+ /** Max ms to wait for in-flight drain on SIGTERM. Default 30_000. */
60
+ shutdownTimeoutMs?: number;
61
+ }
62
+
63
+ export const JOB_WORKER_OPTIONS = Symbol('JOB_WORKER_OPTIONS');
64
+
65
+ const DEFAULT_POLL_INTERVAL_MS = 1_000;
66
+ const DEFAULT_STALE_SWEEPER_INTERVAL_MS = 60_000;
67
+ const DEFAULT_STALE_THRESHOLD_MS = 5 * 60_000;
68
+ const DEFAULT_SHUTDOWN_TIMEOUT_MS = 30_000;
69
+
70
+ const TERMINAL_STATUSES: JobRunRow['status'][] = [
71
+ 'completed',
72
+ 'failed',
73
+ 'timed_out',
74
+ 'canceled',
75
+ ];
76
+
77
+ // ─── Pure helpers (exported for unit tests) ────────────────────────────────
78
+
79
+ /**
80
+ * Backoff delay in ms for the Nth attempt (1-indexed). Supports both
81
+ * policy modes. Exponential is capped at `Number.MAX_SAFE_INTEGER` so
82
+ * pathological attempt counts don't overflow.
83
+ */
84
+ export function computeBackoff(policy: RetryPolicy, attempts: number): number {
85
+ const base = Math.max(policy.baseMs, 0);
86
+ if (policy.backoff === 'fixed') {
87
+ return base;
88
+ }
89
+ // exponential: baseMs * 2^(attempts-1)
90
+ const exponent = Math.max(attempts - 1, 0);
91
+ if (exponent >= 53) return Number.MAX_SAFE_INTEGER; // 2^53 overflow guard
92
+ const raw = base * Math.pow(2, exponent);
93
+ if (!Number.isFinite(raw) || raw >= Number.MAX_SAFE_INTEGER) {
94
+ return Number.MAX_SAFE_INTEGER;
95
+ }
96
+ return raw;
97
+ }
98
+
99
+ /**
100
+ * Decide whether an error should be retried under the given policy.
101
+ * Matches `nonRetryableErrors` by `.name` OR `.code`. Returns
102
+ * - `'retry'` if attempts remain and the error isn't blacklisted,
103
+ * - `'fail'` otherwise (terminal failure).
104
+ */
105
+ export function classifyError(
106
+ err: unknown,
107
+ policy: RetryPolicy | undefined,
108
+ currentAttempts: number,
109
+ ): 'retry' | 'fail' {
110
+ if (!policy) return 'fail';
111
+ const errObj = err as { name?: string; code?: string } | undefined;
112
+ const name = errObj?.name;
113
+ const code = errObj?.code;
114
+ const nonRetryable = policy.nonRetryableErrors ?? [];
115
+ if (nonRetryable.some((n) => n === name || n === code)) return 'fail';
116
+ if (currentAttempts + 1 >= policy.attempts) return 'fail';
117
+ return 'retry';
118
+ }
119
+
120
+ /**
121
+ * Build the raw claim-candidate select. Exported so tests can inspect
122
+ * `.toSQL()` without spinning up the full worker. Matches JOB-3 §4 and
123
+ * ADR-022 "Claim query (Drizzle backend)".
124
+ */
125
+ export function buildClaimQuery(db: DrizzleClient, pool: string) {
126
+ return db
127
+ .select({ id: jobRuns.id })
128
+ .from(jobRuns)
129
+ .where(
130
+ and(
131
+ eq(jobRuns.status, 'pending'),
132
+ eq(jobRuns.pool, pool),
133
+ lte(jobRuns.runAt, new Date()),
134
+ ),
135
+ )
136
+ .orderBy(desc(jobRuns.priority), asc(jobRuns.runAt))
137
+ .limit(1)
138
+ .for('update', { skipLocked: true });
139
+ }
140
+
141
+ /**
142
+ * Build the stale-claim sweep candidate select. `FOR UPDATE SKIP LOCKED`
143
+ * per OQ-2 resolution (2026-04-19): per-worker sweeper, safe without
144
+ * leader election because the update is self-gating.
145
+ */
146
+ export function buildStaleSweepQuery(
147
+ db: DrizzleClient,
148
+ staleThresholdMs: number,
149
+ ) {
150
+ const threshold = new Date(Date.now() - staleThresholdMs);
151
+ return db
152
+ .select({ id: jobRuns.id })
153
+ .from(jobRuns)
154
+ .where(
155
+ and(
156
+ eq(jobRuns.status, 'running'),
157
+ lt(jobRuns.claimedAt, threshold),
158
+ ),
159
+ )
160
+ .for('update', { skipLocked: true });
161
+ }
162
+
163
+ // ─── Error serialisation ───────────────────────────────────────────────────
164
+
165
+ function serialiseError(err: unknown, attempt: number, retryable: boolean) {
166
+ const e = err as { message?: string; stack?: string; code?: string } | undefined;
167
+ return {
168
+ message: (e?.message ?? String(err)) as string,
169
+ stack: e?.stack,
170
+ retryable,
171
+ attempt,
172
+ };
173
+ }
174
+
175
+ // ─── JobWorker ─────────────────────────────────────────────────────────────
176
+
177
+ @Injectable()
178
+ export class JobWorker implements OnModuleInit, OnModuleDestroy {
179
+ private readonly logger = new Logger(JobWorker.name);
180
+ private shuttingDown = false;
181
+ private readonly inFlight = new Set<Promise<void>>();
182
+ private pollTimer: ReturnType<typeof setInterval> | null = null;
183
+ private sweeperTimer: ReturnType<typeof setInterval> | null = null;
184
+ private sigtermHandled = false;
185
+ private readonly sigtermHandler: () => void;
186
+
187
+ private readonly pollIntervalMs: number;
188
+ private readonly staleSweeperIntervalMs: number;
189
+ private readonly staleThresholdMs: number;
190
+ private readonly shutdownTimeoutMs: number;
191
+
192
+ constructor(
193
+ @Inject(DRIZZLE) private readonly db: DrizzleClient,
194
+ @Inject(JOB_ORCHESTRATOR) private readonly orchestrator: IJobOrchestrator,
195
+ @Inject(JOB_RUN_SERVICE) private readonly runService: IJobRunService,
196
+ @Inject(JOB_STEP_SERVICE) private readonly stepService: IJobStepService,
197
+ @Inject(JOB_WORKER_OPTIONS) private readonly options: JobWorkerOptions,
198
+ private readonly moduleRef: ModuleRef,
199
+ ) {
200
+ this.pollIntervalMs = options.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS;
201
+ this.staleSweeperIntervalMs =
202
+ options.staleSweeperIntervalMs ?? DEFAULT_STALE_SWEEPER_INTERVAL_MS;
203
+ this.staleThresholdMs = options.staleThresholdMs ?? DEFAULT_STALE_THRESHOLD_MS;
204
+ this.shutdownTimeoutMs =
205
+ options.shutdownTimeoutMs ?? DEFAULT_SHUTDOWN_TIMEOUT_MS;
206
+
207
+ this.sigtermHandler = () => {
208
+ if (this.sigtermHandled) return;
209
+ this.sigtermHandled = true;
210
+ void this.onModuleDestroy();
211
+ };
212
+ void this.runService; // reserved for future scope-aware cancellation paths
213
+ }
214
+
215
+ // ============================================================================
216
+ // Lifecycle
217
+ // ============================================================================
218
+
219
+ onModuleInit(): void {
220
+ this.pollTimer = setInterval(() => {
221
+ void this.pollAndProcess();
222
+ }, this.pollIntervalMs);
223
+ this.sweeperTimer = setInterval(() => {
224
+ void this.sweepStaleClaims();
225
+ }, this.staleSweeperIntervalMs);
226
+ process.on('SIGTERM', this.sigtermHandler);
227
+ }
228
+
229
+ async onModuleDestroy(): Promise<void> {
230
+ if (this.shuttingDown) {
231
+ // Still drain, but don't tear intervals down twice.
232
+ await this.drainInFlight();
233
+ return;
234
+ }
235
+ this.shuttingDown = true;
236
+ if (this.pollTimer) {
237
+ clearInterval(this.pollTimer);
238
+ this.pollTimer = null;
239
+ }
240
+ if (this.sweeperTimer) {
241
+ clearInterval(this.sweeperTimer);
242
+ this.sweeperTimer = null;
243
+ }
244
+ process.removeListener('SIGTERM', this.sigtermHandler);
245
+
246
+ await this.drainInFlight();
247
+
248
+ // Any rows still `running` past timeout → release back to pending.
249
+ try {
250
+ await this.db
251
+ .update(jobRuns)
252
+ .set({ status: 'pending', claimedAt: null, startedAt: null })
253
+ .where(
254
+ and(eq(jobRuns.status, 'running'), eq(jobRuns.pool, this.options.pool)),
255
+ );
256
+ } catch (err) {
257
+ this.logger.error(`shutdown reset failed: ${(err as Error).message}`);
258
+ }
259
+ }
260
+
261
+ private async drainInFlight(): Promise<void> {
262
+ if (this.inFlight.size === 0) return;
263
+ const timeout = new Promise<void>((resolve) =>
264
+ setTimeout(resolve, this.shutdownTimeoutMs),
265
+ );
266
+ await Promise.race([
267
+ Promise.allSettled([...this.inFlight]).then(() => undefined),
268
+ timeout,
269
+ ]);
270
+ }
271
+
272
+ // ============================================================================
273
+ // Poll loop
274
+ // ============================================================================
275
+
276
+ async pollAndProcess(): Promise<void> {
277
+ if (this.shuttingDown) return;
278
+ if (this.inFlight.size >= this.options.concurrency) return;
279
+
280
+ let claimed: JobRunRow | null;
281
+ try {
282
+ claimed = await this.claimNext(this.options.pool);
283
+ } catch (err) {
284
+ this.logger.error(`claimNext failed: ${(err as Error).message}`);
285
+ return;
286
+ }
287
+ if (!claimed) return;
288
+
289
+ const run = claimed;
290
+ const promise = this.processRun(run).catch((err) => {
291
+ this.logger.error(
292
+ `processRun(${run.id}) unhandled: ${(err as Error).message}`,
293
+ );
294
+ });
295
+ this.inFlight.add(promise);
296
+ promise.finally(() => {
297
+ this.inFlight.delete(promise);
298
+ });
299
+ }
300
+
301
+ /**
302
+ * Claim the next runnable row from the pool. Transaction ensures the
303
+ * select-candidate + update-to-running pair is atomic; FOR UPDATE SKIP
304
+ * LOCKED lets multiple workers share the table without serialising.
305
+ */
306
+ async claimNext(pool: string): Promise<JobRunRow | null> {
307
+ return this.db.transaction(async (tx) => {
308
+ const candidates = await tx
309
+ .select({ id: jobRuns.id })
310
+ .from(jobRuns)
311
+ .where(
312
+ and(
313
+ eq(jobRuns.status, 'pending'),
314
+ eq(jobRuns.pool, pool),
315
+ lte(jobRuns.runAt, new Date()),
316
+ ),
317
+ )
318
+ .orderBy(desc(jobRuns.priority), asc(jobRuns.runAt))
319
+ .limit(1)
320
+ .for('update', { skipLocked: true });
321
+ const candidate = candidates[0];
322
+ if (!candidate) return null;
323
+
324
+ const [claimed] = await tx
325
+ .update(jobRuns)
326
+ .set({
327
+ status: 'running',
328
+ claimedAt: new Date(),
329
+ startedAt: new Date(),
330
+ updatedAt: new Date(),
331
+ })
332
+ .where(eq(jobRuns.id, candidate.id))
333
+ .returning();
334
+ return (claimed ?? null) as JobRunRow | null;
335
+ });
336
+ }
337
+
338
+ // ============================================================================
339
+ // Stale claim sweeper
340
+ // ============================================================================
341
+
342
+ /**
343
+ * Release rows whose `claimed_at` is older than the threshold. Safe to
344
+ * run concurrently across workers — the two-phase tx (select-for-update
345
+ * then update) guarantees each stranded row is only reset once.
346
+ */
347
+ async sweepStaleClaims(): Promise<void> {
348
+ if (this.shuttingDown) return;
349
+ try {
350
+ await this.db.transaction(async (tx) => {
351
+ const threshold = new Date(Date.now() - this.staleThresholdMs);
352
+ const stale = await tx
353
+ .select({ id: jobRuns.id })
354
+ .from(jobRuns)
355
+ .where(
356
+ and(eq(jobRuns.status, 'running'), lt(jobRuns.claimedAt, threshold)),
357
+ )
358
+ .for('update', { skipLocked: true });
359
+ if (stale.length === 0) return;
360
+ const ids = stale.map((r) => r.id);
361
+ await tx
362
+ .update(jobRuns)
363
+ .set({ status: 'pending', claimedAt: null, startedAt: null })
364
+ .where(inArray(jobRuns.id, ids));
365
+ for (const id of ids) {
366
+ this.logger.warn(`Recovered stale claim on run ${id}`);
367
+ }
368
+ });
369
+ } catch (err) {
370
+ this.logger.error(`sweepStaleClaims failed: ${(err as Error).message}`);
371
+ }
372
+ }
373
+
374
+ // ============================================================================
375
+ // processRun
376
+ // ============================================================================
377
+
378
+ private async processRun(claimed: JobRunRow): Promise<void> {
379
+ const registryEntry = JOB_HANDLER_REGISTRY.get(claimed.jobType);
380
+
381
+ // (a) Missing handler — defensive; JOB-5 boot validator should have caught.
382
+ if (!registryEntry) {
383
+ this.logger.error(
384
+ `No handler registered for jobType='${claimed.jobType}' (run ${claimed.id})`,
385
+ );
386
+ await this.markFailed(
387
+ claimed,
388
+ new Error(`No handler registered for jobType='${claimed.jobType}'`),
389
+ /*finalAttempts*/ (claimed.attempts ?? 0) + 1,
390
+ );
391
+ return;
392
+ }
393
+
394
+ // (b) Concurrency-queue release gate — defer if another run with the
395
+ // same key is already `running`.
396
+ if (claimed.concurrencyKey) {
397
+ const inflight = await this.db
398
+ .select({ id: jobRuns.id })
399
+ .from(jobRuns)
400
+ .where(
401
+ and(
402
+ eq(jobRuns.concurrencyKey, claimed.concurrencyKey),
403
+ eq(jobRuns.status, 'running'),
404
+ ),
405
+ );
406
+ const other = inflight.find((r) => r.id !== claimed.id);
407
+ if (other) {
408
+ await this.db
409
+ .update(jobRuns)
410
+ .set({
411
+ status: 'pending',
412
+ claimedAt: null,
413
+ startedAt: null,
414
+ updatedAt: new Date(),
415
+ })
416
+ .where(eq(jobRuns.id, claimed.id));
417
+ return;
418
+ }
419
+ }
420
+
421
+ const meta = registryEntry.meta as JobHandlerMeta<unknown>;
422
+ const HandlerClass = registryEntry.handlerClass;
423
+
424
+ // (c) Build JobContext. Instantiate handler via Nest's ModuleRef so
425
+ // `@Inject` constructor params resolve. `create({ strict: false })`
426
+ // walks the whole module graph for providers (handlers don't need to
427
+ // be registered as providers themselves; the @JobHandler decorator
428
+ // is the only registration required). A fresh instance per run
429
+ // mirrors the contract handlers were authored against and keeps
430
+ // run-scoped state from leaking across claims.
431
+ const handler = (await this.moduleRef.create(
432
+ HandlerClass as unknown as new (...args: unknown[]) => unknown,
433
+ )) as JobHandlerBase<unknown>;
434
+ const ctx: JobContext<unknown> = {
435
+ input: claimed.input,
436
+ run: claimed as JobRun,
437
+ step: this.makeStepFn(claimed),
438
+ spawnChild: this.makeSpawnFn(claimed),
439
+ logger: new Logger(`JobRun:${claimed.id}`),
440
+ };
441
+
442
+ const attemptsBefore = claimed.attempts ?? 0;
443
+ try {
444
+ // (d) Run the handler.
445
+ const output = (await handler.run(ctx)) as Record<string, unknown> | undefined;
446
+ // (e) Success.
447
+ await this.db
448
+ .update(jobRuns)
449
+ .set({
450
+ status: 'completed',
451
+ output: (output ?? {}) as Record<string, unknown>,
452
+ finishedAt: new Date(),
453
+ updatedAt: new Date(),
454
+ attempts: attemptsBefore + 1,
455
+ })
456
+ .where(eq(jobRuns.id, claimed.id));
457
+ } catch (err) {
458
+ // (f) Error classification + retry/fail.
459
+ const policy = meta.retry;
460
+ const decision = classifyError(err, policy, attemptsBefore);
461
+ const nextAttempts = attemptsBefore + 1;
462
+ if (decision === 'retry' && policy) {
463
+ const delay = computeBackoff(policy, nextAttempts);
464
+ await this.db
465
+ .update(jobRuns)
466
+ .set({
467
+ status: 'pending',
468
+ attempts: nextAttempts,
469
+ runAt: new Date(Date.now() + delay),
470
+ startedAt: null,
471
+ claimedAt: null,
472
+ error: serialiseError(err, nextAttempts, true),
473
+ updatedAt: new Date(),
474
+ })
475
+ .where(eq(jobRuns.id, claimed.id));
476
+ } else {
477
+ await this.markFailed(claimed, err, nextAttempts);
478
+ }
479
+ }
480
+ }
481
+
482
+ private async markFailed(
483
+ claimed: JobRunRow,
484
+ err: unknown,
485
+ finalAttempts: number,
486
+ ): Promise<void> {
487
+ await this.db
488
+ .update(jobRuns)
489
+ .set({
490
+ status: 'failed',
491
+ attempts: finalAttempts,
492
+ finishedAt: new Date(),
493
+ error: serialiseError(err, finalAttempts, false),
494
+ updatedAt: new Date(),
495
+ })
496
+ .where(eq(jobRuns.id, claimed.id));
497
+
498
+ // Parent-close-policy cascade: if this run has children under the same
499
+ // root_run_id and this run's own parentClosePolicy is 'terminate', cascade.
500
+ if (claimed.parentClosePolicy === 'terminate') {
501
+ try {
502
+ // JOB-8 — thread the run's own tenantId so the orchestrator's
503
+ // multi-tenant gate passes. Without this, every terminate-policy
504
+ // cascade throws MissingTenantIdError under multiTenant=true and
505
+ // the outer catch silently swallows it — children never cancel.
506
+ await this.orchestrator.cancel(claimed.id, {
507
+ cascade: true,
508
+ reason: 'parent-failed',
509
+ tenantId: claimed.tenantId,
510
+ });
511
+ } catch (cascadeErr) {
512
+ // cancel is idempotent; failure here is unusual but not fatal.
513
+ this.logger.warn(
514
+ `cascade on failed run ${claimed.id}: ${(cascadeErr as Error).message}`,
515
+ );
516
+ }
517
+ }
518
+ }
519
+
520
+ // ============================================================================
521
+ // ctx.step / ctx.spawnChild builders
522
+ // ============================================================================
523
+
524
+ private makeStepFn(run: JobRunRow) {
525
+ return async <TOutput>(
526
+ stepId: string,
527
+ fn: () => Promise<TOutput>,
528
+ _opts?: StepOptions,
529
+ ): Promise<TOutput> => {
530
+ void _opts;
531
+ const existing = await this.stepService.findStep(run.id, stepId);
532
+ if (existing?.status === 'completed') {
533
+ return existing.output as TOutput;
534
+ }
535
+
536
+ const seq = await this.nextStepSeq(run.id);
537
+ const startedAt = new Date();
538
+ const nextAttempts = (existing?.attempts ?? 0) + 1;
539
+ await this.stepService.recordStep({
540
+ jobRunId: run.id,
541
+ stepId,
542
+ kind: 'task',
543
+ seq,
544
+ status: 'running',
545
+ startedAt,
546
+ attempts: nextAttempts,
547
+ });
548
+ try {
549
+ const output = await fn();
550
+ await this.stepService.recordStep({
551
+ jobRunId: run.id,
552
+ stepId,
553
+ kind: 'task',
554
+ seq,
555
+ status: 'completed',
556
+ output: output as Record<string, unknown> | undefined,
557
+ finishedAt: new Date(),
558
+ attempts: nextAttempts,
559
+ });
560
+ return output;
561
+ } catch (err) {
562
+ await this.stepService.recordStep({
563
+ jobRunId: run.id,
564
+ stepId,
565
+ kind: 'task',
566
+ seq,
567
+ status: 'failed',
568
+ error: serialiseError(err, nextAttempts, false),
569
+ finishedAt: new Date(),
570
+ attempts: nextAttempts,
571
+ });
572
+ throw err;
573
+ }
574
+ };
575
+ }
576
+
577
+ private makeSpawnFn(run: JobRunRow) {
578
+ return async (
579
+ type: string,
580
+ input: unknown,
581
+ opts?: SpawnChildOptions,
582
+ ): Promise<JobRun> => {
583
+ return this.orchestrator.start(type, input, {
584
+ parentRunId: run.id,
585
+ parentClosePolicy: opts?.closePolicy,
586
+ runAt: opts?.runAt,
587
+ priority: opts?.priority,
588
+ tags: opts?.tags,
589
+ triggerSource: 'parent',
590
+ triggerRef: run.id,
591
+ });
592
+ };
593
+ }
594
+
595
+ /**
596
+ * Allocate the next `seq` for a given run. SELECT-max approach — runs
597
+ * typically have <100 steps so the scan is cheap, and correctness across
598
+ * retries is more important than the microseconds saved by an in-memory
599
+ * counter (which would drift if the worker crashes mid-run and another
600
+ * worker resumes via stale-claim sweep).
601
+ */
602
+ private async nextStepSeq(runId: string): Promise<number> {
603
+ const [row] = await this.db.execute(
604
+ sql`SELECT COALESCE(MAX(seq), 0) + 1 AS next FROM job_step WHERE job_run_id = ${runId}`,
605
+ ) as unknown as Array<{ next: number }>;
606
+ // pg driver returns { rows: [...] } for raw execute; tolerate both shapes.
607
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
608
+ const maybeRows = (row as any)?.rows;
609
+ if (Array.isArray(maybeRows) && maybeRows.length > 0) {
610
+ return Number(maybeRows[0].next ?? 1);
611
+ }
612
+ if (row && typeof (row as { next?: unknown }).next !== 'undefined') {
613
+ return Number((row as { next: unknown }).next);
614
+ }
615
+ return 1;
616
+ }
617
+
618
+ // ============================================================================
619
+ // (suppress unused-import noise)
620
+ // ============================================================================
621
+ }
622
+
623
+ // Terminal statuses re-exported for JOB-4 parity imports.
624
+ export { TERMINAL_STATUSES };