@malloy-publisher/server 0.0.198-dev4 → 0.0.198-dev6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,920 @@
1
+ /**
2
+ * Main-thread half of the package-load worker pool.
3
+ *
4
+ * Why this exists: Malloy compile is pure JavaScript that runs on the
5
+ * V8 main event loop. A large package can hold the loop for many
6
+ * seconds — long enough to time out the Kubernetes `/health/liveness`
7
+ * probe (timeout=5s, see worker/main.tf) and trigger a pod restart in
8
+ * the middle of an otherwise routine package load. This pool moves the
9
+ * CPU-bound bulk of `Package.create` (manifest read, every
10
+ * `.malloy`/`.malloynb` compile) into `worker_threads` workers, leaving
11
+ * the main loop free to serve `/health/liveness` and other API traffic.
12
+ *
13
+ * Public surface
14
+ * --------------
15
+ * getPackageLoadPool() — lazy singleton; respects PACKAGE_LOAD_WORKERS
16
+ * pool.loadPackage({...}) — submit one package for off-thread load
17
+ * pool.shutdown() — graceful drain (called from health.ts)
18
+ *
19
+ * The pool is REQUIRED. There is no in-process compile fallback —
20
+ * `PACKAGE_LOAD_WORKERS=0` throws at boot. Running Malloy compile on
21
+ * the main event loop is what tripped the K8s liveness probe in the
22
+ * first place; we don't want a knob that re-enables that. Pool-
23
+ * infrastructure failures (worker crash, job timeout, pool shutting
24
+ * down) reject `loadPackage()`; `Package.loadViaWorker` then rewraps
25
+ * them as `ServiceUnavailableError` so the HTTP layer responds 503.
26
+ *
27
+ * Concurrency model
28
+ * -----------------
29
+ * - Workers are spawned lazily on first use, up to N (configurable
30
+ * via PACKAGE_LOAD_WORKERS).
31
+ * - Each worker emits {type:'ready'} once its module has initialized;
32
+ * the pool waits for that before dispatching to a newly-spawned
33
+ * worker.
34
+ * - **Per-worker active job cap = 1.** Compile is CPU-bound and
35
+ * doesn't multiplex meaningfully inside a single event loop, so
36
+ * loading two large packages on the same worker just doubles each
37
+ * of their latencies rather than overlapping their work. Excess
38
+ * jobs queue at the pool layer and pick a worker the moment one
39
+ * becomes idle.
40
+ * - Schema/URL/connection-metadata RPCs from a worker back to the
41
+ * main thread (over MessagePort) are not counted against the
42
+ * active-job cap — they're sub-RPCs of the worker's currently
43
+ * active load-package job.
44
+ *
45
+ * Failure modes
46
+ * -------------
47
+ * - **Worker spawn fails.** `pw.ready` rejects on `worker.error`,
48
+ * `worker.exit`, or after `WORKER_SPAWN_TIMEOUT_MS`. Pending jobs
49
+ * waiting on that worker get a clean rejection and are re-queued
50
+ * to whichever worker becomes available next.
51
+ * - **Worker dies mid-job.** `worker.exit` fails every in-flight job
52
+ * on that worker; the pool prunes the dead worker, and the next
53
+ * `loadPackage()` lazily respawns up to `maxWorkers`.
54
+ * - **Job timeout.** `PACKAGE_LOAD_JOB_TIMEOUT_MS` (default 120s) caps a
55
+ * single package load. On timeout we **terminate the worker** and
56
+ * reject the caller — we do not silently fall back, and we do not
57
+ * leave a zombie compile burning CPU on the worker (that's the
58
+ * exact scenario PR-#767's job-timeout-without-terminate had).
59
+ *
60
+ * Schema-fetch RPC routing
61
+ * ------------------------
62
+ * When a worker proxies `fetchSchemaForTables` (or other connection
63
+ * metadata calls) back to the main thread, the message includes a
64
+ * `jobId`. The pool keeps a `jobId → JobContext` map so it knows
65
+ * which live `MalloyConfig` to dispatch the request against. The
66
+ * mapping is installed when the job is submitted and removed when
67
+ * it resolves.
68
+ */
69
+ import type {
70
+ Annotation,
71
+ FetchSchemaOptions,
72
+ InfoConnection,
73
+ LookupConnection,
74
+ MalloyConfig,
75
+ ModelDef,
76
+ SQLSourceDef,
77
+ TableSourceDef,
78
+ } from "@malloydata/malloy";
79
+ import * as Malloy from "@malloydata/malloy-interfaces";
80
+ import { Worker } from "node:worker_threads";
81
+ import { dirname, join } from "path";
82
+ import { fileURLToPath, pathToFileURL } from "url";
83
+
84
+ import { ModelCompilationError } from "../errors";
85
+ import { logger } from "../logger";
86
+ import type {
87
+ ConnectionMetadataRequest,
88
+ ConnectionMetadataResponse,
89
+ LoadPackageError,
90
+ LoadPackageRequest,
91
+ LoadPackageResult,
92
+ MainToWorkerMessage,
93
+ ReadUrlRequest,
94
+ ReadUrlResponse,
95
+ RpcErrorResponse,
96
+ SchemaForSqlRequest,
97
+ SchemaForSqlResponse,
98
+ SchemaForTablesRequest,
99
+ SchemaForTablesResponse,
100
+ SerializedError,
101
+ SerializedModel,
102
+ WorkerToMainMessage,
103
+ } from "./protocol";
104
+
105
+ // ──────────────────────────────────────────────────────────────────────
106
+ // Configuration
107
+ // ──────────────────────────────────────────────────────────────────────
108
+
109
+ /**
110
+ * Returns the configured worker pool size. The pool is REQUIRED — no
111
+ * in-process fallback exists. Defaults to 1 worker when the env var
112
+ * is unset; throws when set to 0 or to a non-positive / non-numeric
113
+ * value so we fail fast at boot rather than silently degrading.
114
+ *
115
+ * Operators set `PACKAGE_LOAD_WORKERS=N` (N ≥ 1) to tune the pool
116
+ * size for their workload. Setting it to 0 to "disable" the pool is
117
+ * unsupported by design — running Malloy compile on the main event
118
+ * loop is what tripped the K8s liveness probe in the first place
119
+ * and we don't want a config knob that re-enables that footgun.
120
+ */
121
+ export function getPackageLoadWorkerCount(): number {
122
+ const raw = process.env.PACKAGE_LOAD_WORKERS;
123
+ if (raw === undefined) return 1;
124
+ const parsed = Number.parseInt(raw, 10);
125
+ if (!Number.isFinite(parsed) || parsed < 1) {
126
+ throw new Error(
127
+ `PACKAGE_LOAD_WORKERS must be a positive integer (got ${JSON.stringify(raw)}). ` +
128
+ `Refusing to start without a package-load worker pool.`,
129
+ );
130
+ }
131
+ return parsed;
132
+ }
133
+
134
+ /**
135
+ * Wall-clock cap for a single load-package job. If a worker hangs we
136
+ * don't want to leak a Promise forever. Twice the K8s liveness
137
+ * threshold (5s × 15 fails = 75s) gives comfortable margin against
138
+ * false timeouts during cold starts while still failing fast on real
139
+ * hangs. Overridable via `PACKAGE_LOAD_JOB_TIMEOUT_MS`.
140
+ */
141
+ const PACKAGE_LOAD_JOB_TIMEOUT_MS = (() => {
142
+ const raw = process.env.PACKAGE_LOAD_JOB_TIMEOUT_MS;
143
+ if (raw === undefined) return 120_000;
144
+ const parsed = Number.parseInt(raw, 10);
145
+ if (!Number.isFinite(parsed) || parsed <= 0) return 120_000;
146
+ return parsed;
147
+ })();
148
+
149
+ /**
150
+ * How long a freshly-spawned worker has to send its `ready` handshake.
151
+ * If the worker can't load its bundle (missing `package_load_worker.mjs`,
152
+ * native module init failure, etc.), this fires and the pool fails
153
+ * the pending jobs on that worker without waiting forever. Distinct
154
+ * from the per-job timeout so a slow-to-start worker doesn't poison
155
+ * the job timeout budget.
156
+ */
157
+ const WORKER_SPAWN_TIMEOUT_MS = 30_000;
158
+
159
+ // ──────────────────────────────────────────────────────────────────────
160
+ // Internal types
161
+ // ──────────────────────────────────────────────────────────────────────
162
+
163
+ interface JobContext {
164
+ jobId: string;
165
+ pw: PoolWorker;
166
+ connections: LookupConnection<InfoConnection>;
167
+ urlReader: PackageLoadUrlReader;
168
+ resolve: (result: LoadPackageResult) => void;
169
+ reject: (err: Error) => void;
170
+ timeout: ReturnType<typeof setTimeout>;
171
+ }
172
+
173
+ interface QueuedJob {
174
+ request: LoadPackageJob;
175
+ resolve: (result: LoadPackageOutcome) => void;
176
+ reject: (err: Error) => void;
177
+ }
178
+
179
+ interface PoolWorker {
180
+ id: number;
181
+ worker: Worker;
182
+ ready: Promise<void>;
183
+ inFlight: Set<string>; // job ids — capped at 1 by per-worker active job rule
184
+ exited: boolean;
185
+ }
186
+
187
+ /**
188
+ * Minimal URL-reader shape the pool dispatches to. Intentionally
189
+ * narrower than Malloy's `URLReader` (which can also return
190
+ * `{contents, invalidationKey}`) — the pool only needs the string
191
+ * payload to forward back to the worker.
192
+ */
193
+ export interface PackageLoadUrlReader {
194
+ readURL(
195
+ url: URL,
196
+ ): Promise<string | { contents: string; invalidationKey?: unknown }>;
197
+ }
198
+
199
+ export interface LoadPackageJob {
200
+ packagePath: string;
201
+ packageName: string;
202
+ /**
203
+ * The live MalloyConfig. We don't ship it across the worker
204
+ * boundary; we hold it on the main side and answer the worker's
205
+ * proxy RPCs (for non-duckdb connections) against
206
+ * `config.connections`. The worker has its own DuckDB connection
207
+ * for embedded-database probes.
208
+ */
209
+ malloyConfig: MalloyConfig;
210
+ /** Default connection name (passed verbatim to the worker). */
211
+ defaultConnectionName: string | null;
212
+ /** Custom URLReader for non-file:// imports; falls back to fs in the worker. */
213
+ urlReader?: PackageLoadUrlReader;
214
+ /** Optional buildManifest passed through to Malloy Runtime. */
215
+ buildManifest?: unknown;
216
+ }
217
+
218
+ /**
219
+ * Adapter result the pool surfaces to callers. Same wire shape as
220
+ * `LoadPackageResult` but with the heavy `modelDef` / `sourceInfos`
221
+ * fields restored to their Malloy types so the consumer (currently
222
+ * `Package.createViaWorker`) doesn't need to repeat the cast at
223
+ * every call site. The protocol module uses `unknown` for these so
224
+ * it stays decoupled from the full Malloy type surface; this is
225
+ * where we re-tighten.
226
+ *
227
+ * `filterMap` stays as the wire array; `Model.fromSerialized` will
228
+ * rebuild a `Map` on demand. Keeping it array-shaped here means we
229
+ * don't allocate a Map for models the caller never looks at.
230
+ */
231
+ export interface LoadPackageOutcome {
232
+ packageMetadata: { name?: string; description?: string };
233
+ models: Array<
234
+ Omit<SerializedModel, "modelDef" | "sourceInfos"> & {
235
+ modelDef?: ModelDef;
236
+ sourceInfos?: Malloy.SourceInfo[];
237
+ }
238
+ >;
239
+ loadDurationMs: number;
240
+ }
241
+
242
+ // ──────────────────────────────────────────────────────────────────────
243
+ // Path to the bundled worker script
244
+ // ──────────────────────────────────────────────────────────────────────
245
+
246
+ /**
247
+ * Locate the worker entrypoint. In production builds it's bundled to
248
+ * `dist/package_load_worker.mjs` next to `server.mjs`. In dev
249
+ * (bun --watch) the source `.ts` next to this file loads directly —
250
+ * Bun supports `new Worker(<ts-url>)`. The .ts fallback also keeps
251
+ * `bun test` working without a build step.
252
+ *
253
+ * Uses `pathToFileURL` rather than a `file://${path}` template so the
254
+ * URL stays valid on Windows, where absolute paths look like
255
+ * `D:\foo\bar` (template form would yield `file://D:\foo\bar`, which
256
+ * parses as host `D:` rather than a path).
257
+ */
258
+ function resolveWorkerScript(): URL {
259
+ const thisFile = fileURLToPath(import.meta.url);
260
+ const thisDir = dirname(thisFile);
261
+ const distCandidate = join(thisDir, "package_load_worker.mjs");
262
+ if (thisFile.endsWith(".mjs") || thisFile.endsWith(".js")) {
263
+ return pathToFileURL(distCandidate);
264
+ }
265
+ const tsCandidate = join(thisDir, "package_load_worker.ts");
266
+ return pathToFileURL(tsCandidate);
267
+ }
268
+
269
+ // ──────────────────────────────────────────────────────────────────────
270
+ // The pool
271
+ // ──────────────────────────────────────────────────────────────────────
272
+
273
+ export class PackageLoadPool {
274
+ private readonly workers: PoolWorker[] = [];
275
+ private readonly queue: QueuedJob[] = [];
276
+ private readonly jobs = new Map<string, JobContext>();
277
+ private readonly maxWorkers: number;
278
+ private nextWorkerId = 0;
279
+ private nextJobId = 0;
280
+ private shuttingDown = false;
281
+ private readonly workerScript: URL;
282
+
283
+ constructor(maxWorkers: number, workerScript?: URL) {
284
+ if (!Number.isFinite(maxWorkers) || maxWorkers < 1) {
285
+ throw new Error(
286
+ `PackageLoadPool requires maxWorkers >= 1 (got ${maxWorkers}). ` +
287
+ `The in-process compile fallback was removed in favour of a hard ` +
288
+ `dependency on the worker pool; see getPackageLoadWorkerCount().`,
289
+ );
290
+ }
291
+ this.maxWorkers = maxWorkers;
292
+ this.workerScript = workerScript ?? resolveWorkerScript();
293
+ }
294
+
295
+ get size(): number {
296
+ return this.workers.filter((w) => !w.exited).length;
297
+ }
298
+
299
+ /**
300
+ * Submit a package for off-thread load. Returns when the worker
301
+ * has produced a {@link LoadPackageOutcome}. Per-package compile
302
+ * failures are surfaced **inside** the outcome (as
303
+ * `model.compilationError`), not as a rejected Promise — the
304
+ * Promise only rejects for whole-package errors (manifest missing,
305
+ * worker crash, RPC timeout, etc.) so the caller can distinguish
306
+ * "this package is broken; tell the user" from "the worker pool
307
+ * itself failed and we have a bigger problem".
308
+ */
309
+ async loadPackage(request: LoadPackageJob): Promise<LoadPackageOutcome> {
310
+ if (this.shuttingDown) {
311
+ throw new Error("PackageLoadPool is shutting down");
312
+ }
313
+ return new Promise<LoadPackageOutcome>((resolve, reject) => {
314
+ this.queue.push({ request, resolve, reject });
315
+ this.tryDispatch();
316
+ });
317
+ }
318
+
319
+ /**
320
+ * Drain every in-flight job and terminate the workers. Safe to
321
+ * call multiple times. Awaits each worker's `exit` event.
322
+ */
323
+ async shutdown(): Promise<void> {
324
+ if (this.shuttingDown) return;
325
+ this.shuttingDown = true;
326
+ // Fail any queued jobs that never got dispatched.
327
+ const queued = this.queue.splice(0, this.queue.length);
328
+ for (const qj of queued) {
329
+ qj.reject(new Error("PackageLoadPool: shutdown before dispatch"));
330
+ }
331
+ const exits = this.workers.map(
332
+ (pw) =>
333
+ new Promise<void>((resolve) => {
334
+ if (pw.exited) {
335
+ resolve();
336
+ return;
337
+ }
338
+ pw.worker.once("exit", () => resolve());
339
+ const msg: { type: "shutdown" } = { type: "shutdown" };
340
+ pw.worker.postMessage(msg);
341
+ // Hard ceiling — if a worker won't exit, terminate it.
342
+ setTimeout(() => {
343
+ if (!pw.exited) {
344
+ void pw.worker.terminate().finally(() => resolve());
345
+ }
346
+ }, 10_000);
347
+ }),
348
+ );
349
+ await Promise.all(exits);
350
+ }
351
+
352
+ // ────────────────────────────────────────────────────────────────
353
+ // Internals: dispatch / queue
354
+ // ────────────────────────────────────────────────────────────────
355
+
356
+ /**
357
+ * Greedy dispatcher: keep draining the queue while there's both
358
+ * a queued job and an available worker. Called from `loadPackage`
359
+ * (on submit) and from job completion / worker exit (to give the
360
+ * next job a worker that just freed up).
361
+ */
362
+ private tryDispatch(): void {
363
+ if (this.shuttingDown) return;
364
+ while (this.queue.length > 0) {
365
+ const worker = this.findIdleOrSpawnable();
366
+ if (!worker) return; // saturated; wait for a job to finish
367
+ const qj = this.queue.shift();
368
+ if (!qj) return;
369
+ void this.dispatchToWorker(worker, qj);
370
+ }
371
+ }
372
+
373
+ /**
374
+ * Return an idle worker, or spawn a new one if we're below
375
+ * `maxWorkers`. With per-worker active=1, "idle" means
376
+ * `inFlight.size === 0`. Returns null when the pool is saturated.
377
+ */
378
+ private findIdleOrSpawnable(): PoolWorker | null {
379
+ const alive = this.workers.filter((w) => !w.exited);
380
+ const idle = alive.find((w) => w.inFlight.size === 0);
381
+ if (idle) return idle;
382
+ if (alive.length < this.maxWorkers) {
383
+ const pw = this.spawnWorker();
384
+ this.workers.push(pw);
385
+ return pw;
386
+ }
387
+ return null;
388
+ }
389
+
390
+ private async dispatchToWorker(
391
+ pw: PoolWorker,
392
+ qj: QueuedJob,
393
+ ): Promise<void> {
394
+ try {
395
+ await pw.ready;
396
+ } catch (err) {
397
+ // Spawn failed; this worker is dead. Reject this job and
398
+ // try the next queued one — a different worker may already
399
+ // be alive, or `findIdleOrSpawnable` will lazily spawn one.
400
+ qj.reject(err as Error);
401
+ this.tryDispatch();
402
+ return;
403
+ }
404
+
405
+ this.nextJobId += 1;
406
+ const jobId = `job-${this.nextJobId}`;
407
+
408
+ const timeout = setTimeout(() => {
409
+ this.handleJobTimeout(pw, jobId, qj.request.packagePath);
410
+ }, PACKAGE_LOAD_JOB_TIMEOUT_MS);
411
+
412
+ this.jobs.set(jobId, {
413
+ jobId,
414
+ pw,
415
+ connections: qj.request.malloyConfig.connections,
416
+ urlReader: qj.request.urlReader ?? defaultUrlReader,
417
+ resolve: (result) => {
418
+ clearTimeout(timeout);
419
+ qj.resolve(adaptResult(result));
420
+ },
421
+ reject: (err) => {
422
+ clearTimeout(timeout);
423
+ qj.reject(err);
424
+ },
425
+ timeout,
426
+ });
427
+
428
+ // Register inFlight BEFORE postMessage so the next concurrent
429
+ // dispatcher pass sees this worker as busy (fixes Sha-Bang #1's
430
+ // 4/1 skew where simultaneous callers all tiebreak to alive[0]).
431
+ pw.inFlight.add(jobId);
432
+
433
+ const message: LoadPackageRequest = {
434
+ type: "load-package",
435
+ requestId: jobId,
436
+ packagePath: qj.request.packagePath,
437
+ packageName: qj.request.packageName,
438
+ defaultConnectionName: qj.request.defaultConnectionName,
439
+ buildManifest: qj.request.buildManifest,
440
+ };
441
+ pw.worker.postMessage(message);
442
+ }
443
+
444
+ /**
445
+ * On job timeout we DON'T silently fall back — that lets the
446
+ * worker keep burning CPU on the doomed compile while the caller
447
+ * also re-runs it on the main thread (the exact 2× CPU situation
448
+ * the pool exists to avoid). Instead: terminate the worker,
449
+ * reject the caller, and let the next call respawn lazily.
450
+ */
451
+ private handleJobTimeout(
452
+ pw: PoolWorker,
453
+ jobId: string,
454
+ packagePath: string,
455
+ ): void {
456
+ logger.error(
457
+ `PackageLoadPool: job ${jobId} (${packagePath}) timed out after ${PACKAGE_LOAD_JOB_TIMEOUT_MS}ms; terminating worker #${pw.id}`,
458
+ );
459
+ this.failJob(
460
+ jobId,
461
+ new Error(
462
+ `Package-load worker timed out after ${PACKAGE_LOAD_JOB_TIMEOUT_MS}ms (package=${packagePath})`,
463
+ ),
464
+ );
465
+ void pw.worker.terminate();
466
+ // The exit handler will mark the worker exited, fail any
467
+ // residual jobs on it, and trigger tryDispatch.
468
+ }
469
+
470
+ private spawnWorker(): PoolWorker {
471
+ this.nextWorkerId += 1;
472
+ const id = this.nextWorkerId;
473
+ logger.info(
474
+ `PackageLoadPool: spawning worker #${id} (script=${this.workerScript.toString()})`,
475
+ );
476
+ const worker = new Worker(this.workerScript, {
477
+ name: `malloy-package-load-worker-${id}`,
478
+ });
479
+
480
+ let readyResolve!: () => void;
481
+ let readyReject!: (err: Error) => void;
482
+ let readySettled = false;
483
+ const ready = new Promise<void>((resolve, reject) => {
484
+ readyResolve = () => {
485
+ if (readySettled) return;
486
+ readySettled = true;
487
+ resolve();
488
+ };
489
+ readyReject = (err: Error) => {
490
+ if (readySettled) return;
491
+ readySettled = true;
492
+ reject(err);
493
+ };
494
+ });
495
+ // Silence unhandled-rejection on `ready` itself — every caller
496
+ // that consumes it (via dispatchToWorker) handles rejection
497
+ // explicitly, but Node still tracks the original promise.
498
+ ready.catch(() => {});
499
+
500
+ const spawnTimer = setTimeout(() => {
501
+ readyReject(
502
+ new Error(
503
+ `Package-load worker #${id} failed to become ready within ${WORKER_SPAWN_TIMEOUT_MS}ms`,
504
+ ),
505
+ );
506
+ void worker.terminate();
507
+ }, WORKER_SPAWN_TIMEOUT_MS);
508
+
509
+ const pw: PoolWorker = {
510
+ id,
511
+ worker,
512
+ ready,
513
+ inFlight: new Set(),
514
+ exited: false,
515
+ };
516
+
517
+ worker.on("message", (msg: WorkerToMainMessage) => {
518
+ this.handleWorkerMessage(pw, msg, readyResolve, spawnTimer);
519
+ });
520
+
521
+ worker.on("error", (err) => {
522
+ logger.error(`PackageLoadPool: worker #${id} errored`, {
523
+ error: err,
524
+ });
525
+ clearTimeout(spawnTimer);
526
+ readyReject(err);
527
+ });
528
+
529
+ worker.on("exit", (code) => {
530
+ pw.exited = true;
531
+ clearTimeout(spawnTimer);
532
+ readyReject(
533
+ new Error(
534
+ `Package-load worker #${id} exited before becoming ready (code=${code})`,
535
+ ),
536
+ );
537
+ logger.warn(
538
+ `PackageLoadPool: worker #${id} exited (code=${code}, inFlight=${pw.inFlight.size})`,
539
+ );
540
+ // Fail any jobs that were running on this worker so callers
541
+ // don't strand waiting for a result that will never come.
542
+ for (const jobId of Array.from(pw.inFlight)) {
543
+ this.failJob(
544
+ jobId,
545
+ new Error(
546
+ `Package-load worker #${id} exited unexpectedly (code=${code})`,
547
+ ),
548
+ );
549
+ }
550
+ pw.inFlight.clear();
551
+ // Prune the dead worker so size accounting is honest and
552
+ // the next dispatch will see a free slot to respawn into.
553
+ const idx = this.workers.indexOf(pw);
554
+ if (idx >= 0) this.workers.splice(idx, 1);
555
+ // Give queued jobs a chance to dispatch elsewhere.
556
+ this.tryDispatch();
557
+ });
558
+
559
+ return pw;
560
+ }
561
+
562
+ // ────────────────────────────────────────────────────────────────
563
+ // Internals: message dispatch
564
+ // ────────────────────────────────────────────────────────────────
565
+
566
+ private handleWorkerMessage(
567
+ pw: PoolWorker,
568
+ msg: WorkerToMainMessage,
569
+ markReady: () => void,
570
+ spawnTimer: ReturnType<typeof setTimeout>,
571
+ ): void {
572
+ switch (msg.type) {
573
+ case "ready":
574
+ clearTimeout(spawnTimer);
575
+ markReady();
576
+ return;
577
+ case "load-package-result":
578
+ this.completeJob(pw, msg);
579
+ return;
580
+ case "load-package-error":
581
+ this.errorJob(pw, msg);
582
+ return;
583
+ case "connection-metadata":
584
+ void this.handleConnectionMetadata(pw, msg);
585
+ return;
586
+ case "schema-for-tables":
587
+ void this.handleSchemaForTables(pw, msg);
588
+ return;
589
+ case "schema-for-sql":
590
+ void this.handleSchemaForSql(pw, msg);
591
+ return;
592
+ case "read-url":
593
+ void this.handleReadUrl(pw, msg);
594
+ return;
595
+ default: {
596
+ const exhaustive: never = msg;
597
+ void exhaustive;
598
+ return;
599
+ }
600
+ }
601
+ }
602
+
603
+ private completeJob(pw: PoolWorker, msg: LoadPackageResult): void {
604
+ const ctx = this.jobs.get(msg.requestId);
605
+ if (!ctx) return;
606
+ this.jobs.delete(msg.requestId);
607
+ pw.inFlight.delete(msg.requestId);
608
+ ctx.resolve(msg);
609
+ // Worker is idle now — give it the next queued job (if any).
610
+ this.tryDispatch();
611
+ }
612
+
613
+ private errorJob(pw: PoolWorker, msg: LoadPackageError): void {
614
+ const ctx = this.jobs.get(msg.requestId);
615
+ if (!ctx) return;
616
+ this.jobs.delete(msg.requestId);
617
+ pw.inFlight.delete(msg.requestId);
618
+ ctx.reject(deserializeError(msg.error));
619
+ this.tryDispatch();
620
+ }
621
+
622
+ /** Fail a job out-of-band (timeout, worker exit). */
623
+ private failJob(jobId: string, error: Error): void {
624
+ const ctx = this.jobs.get(jobId);
625
+ if (!ctx) return;
626
+ this.jobs.delete(jobId);
627
+ ctx.pw.inFlight.delete(jobId);
628
+ ctx.reject(error);
629
+ }
630
+
631
+ private async handleConnectionMetadata(
632
+ pw: PoolWorker,
633
+ msg: ConnectionMetadataRequest,
634
+ ): Promise<void> {
635
+ const ctx = this.jobs.get(msg.jobId);
636
+ const reply = (
637
+ response: ConnectionMetadataResponse | RpcErrorResponse,
638
+ ): void => {
639
+ pw.worker.postMessage(response as MainToWorkerMessage);
640
+ };
641
+ if (!ctx) {
642
+ reply({
643
+ type: "rpc-error",
644
+ requestId: msg.requestId,
645
+ ok: false,
646
+ error: { name: "Error", message: `Unknown jobId ${msg.jobId}` },
647
+ });
648
+ return;
649
+ }
650
+ try {
651
+ const conn = await ctx.connections.lookupConnection(
652
+ msg.connectionName,
653
+ );
654
+ reply({
655
+ type: "connection-metadata-response",
656
+ requestId: msg.requestId,
657
+ ok: true,
658
+ metadata: {
659
+ name: msg.connectionName,
660
+ dialectName: conn.dialectName,
661
+ digest:
662
+ typeof conn.getDigest === "function"
663
+ ? conn.getDigest()
664
+ : msg.connectionName,
665
+ },
666
+ });
667
+ } catch (error) {
668
+ reply({
669
+ type: "rpc-error",
670
+ requestId: msg.requestId,
671
+ ok: false,
672
+ error: serializeError(error),
673
+ });
674
+ }
675
+ }
676
+
677
+ private async handleSchemaForTables(
678
+ pw: PoolWorker,
679
+ msg: SchemaForTablesRequest,
680
+ ): Promise<void> {
681
+ const ctx = this.jobs.get(msg.jobId);
682
+ const reply = (
683
+ response: SchemaForTablesResponse | RpcErrorResponse,
684
+ ): void => {
685
+ pw.worker.postMessage(response as MainToWorkerMessage);
686
+ };
687
+ if (!ctx) {
688
+ reply({
689
+ type: "rpc-error",
690
+ requestId: msg.requestId,
691
+ ok: false,
692
+ error: { name: "Error", message: `Unknown jobId ${msg.jobId}` },
693
+ });
694
+ return;
695
+ }
696
+ try {
697
+ const conn = await ctx.connections.lookupConnection(
698
+ msg.connectionName,
699
+ );
700
+ const result = await conn.fetchSchemaForTables(
701
+ msg.tables,
702
+ buildFetchOptions(msg.options),
703
+ );
704
+ reply({
705
+ type: "schema-for-tables-response",
706
+ requestId: msg.requestId,
707
+ ok: true,
708
+ schemas: result.schemas as Record<string, TableSourceDef>,
709
+ errors: result.errors,
710
+ });
711
+ } catch (error) {
712
+ reply({
713
+ type: "rpc-error",
714
+ requestId: msg.requestId,
715
+ ok: false,
716
+ error: serializeError(error),
717
+ });
718
+ }
719
+ }
720
+
721
+ private async handleSchemaForSql(
722
+ pw: PoolWorker,
723
+ msg: SchemaForSqlRequest,
724
+ ): Promise<void> {
725
+ const ctx = this.jobs.get(msg.jobId);
726
+ const reply = (
727
+ response: SchemaForSqlResponse | RpcErrorResponse,
728
+ ): void => {
729
+ pw.worker.postMessage(response as MainToWorkerMessage);
730
+ };
731
+ if (!ctx) {
732
+ reply({
733
+ type: "rpc-error",
734
+ requestId: msg.requestId,
735
+ ok: false,
736
+ error: { name: "Error", message: `Unknown jobId ${msg.jobId}` },
737
+ });
738
+ return;
739
+ }
740
+ try {
741
+ const conn = await ctx.connections.lookupConnection(
742
+ msg.connectionName,
743
+ );
744
+ const result = await conn.fetchSchemaForSQLStruct(
745
+ msg.sentence as Parameters<
746
+ InfoConnection["fetchSchemaForSQLStruct"]
747
+ >[0],
748
+ buildFetchOptions(msg.options),
749
+ );
750
+ if (result.error !== undefined) {
751
+ reply({
752
+ type: "schema-for-sql-response",
753
+ requestId: msg.requestId,
754
+ ok: true,
755
+ error: result.error,
756
+ });
757
+ } else {
758
+ reply({
759
+ type: "schema-for-sql-response",
760
+ requestId: msg.requestId,
761
+ ok: true,
762
+ structDef: result.structDef as SQLSourceDef,
763
+ });
764
+ }
765
+ } catch (error) {
766
+ reply({
767
+ type: "rpc-error",
768
+ requestId: msg.requestId,
769
+ ok: false,
770
+ error: serializeError(error),
771
+ });
772
+ }
773
+ }
774
+
775
+ private async handleReadUrl(
776
+ pw: PoolWorker,
777
+ msg: ReadUrlRequest,
778
+ ): Promise<void> {
779
+ const ctx = this.jobs.get(msg.jobId);
780
+ const reply = (response: ReadUrlResponse | RpcErrorResponse): void => {
781
+ pw.worker.postMessage(response as MainToWorkerMessage);
782
+ };
783
+ if (!ctx) {
784
+ reply({
785
+ type: "rpc-error",
786
+ requestId: msg.requestId,
787
+ ok: false,
788
+ error: { name: "Error", message: `Unknown jobId ${msg.jobId}` },
789
+ });
790
+ return;
791
+ }
792
+ try {
793
+ const raw = await ctx.urlReader.readURL(new URL(msg.url));
794
+ const contents = typeof raw === "string" ? raw : raw.contents;
795
+ reply({
796
+ type: "read-url-response",
797
+ requestId: msg.requestId,
798
+ ok: true,
799
+ contents,
800
+ });
801
+ } catch (error) {
802
+ reply({
803
+ type: "rpc-error",
804
+ requestId: msg.requestId,
805
+ ok: false,
806
+ error: serializeError(error),
807
+ });
808
+ }
809
+ }
810
+ }
811
+
812
+ function buildFetchOptions(options: {
813
+ refreshTimestamp?: number;
814
+ modelAnnotation?: Annotation;
815
+ }): FetchSchemaOptions {
816
+ const out: FetchSchemaOptions = {};
817
+ if (options.refreshTimestamp !== undefined) {
818
+ out.refreshTimestamp = options.refreshTimestamp;
819
+ }
820
+ if (options.modelAnnotation !== undefined) {
821
+ out.modelAnnotation = options.modelAnnotation;
822
+ }
823
+ return out;
824
+ }
825
+
826
+ /**
827
+ * Hydrate the wire-typed serialized model fields back into their
828
+ * full Malloy types. Cheap (one pass, no copy); `filterMap` is
829
+ * deliberately left as the wire array — `Model.fromSerialized`
830
+ * rebuilds the `Map` lazily for the models it actually uses.
831
+ */
832
+ function adaptResult(result: LoadPackageResult): LoadPackageOutcome {
833
+ return {
834
+ packageMetadata: result.packageMetadata,
835
+ models: result.models.map((m) => ({
836
+ ...m,
837
+ modelDef: m.modelDef as ModelDef | undefined,
838
+ sourceInfos: m.sourceInfos as Malloy.SourceInfo[] | undefined,
839
+ })),
840
+ loadDurationMs: result.loadDurationMs,
841
+ };
842
+ }
843
+
844
+ function serializeError(error: unknown): SerializedError {
845
+ if (error instanceof Error) {
846
+ return {
847
+ name: error.name,
848
+ message: error.message,
849
+ stack: error.stack,
850
+ };
851
+ }
852
+ return { name: "Error", message: String(error) };
853
+ }
854
+
855
+ /**
856
+ * Reconstitute an Error from a serialized payload. When the original
857
+ * was a Malloy compile error we re-wrap as `ModelCompilationError` so
858
+ * downstream `instanceof` checks (which decide e.g. HTTP 424 vs 500)
859
+ * keep firing across the worker boundary.
860
+ */
861
+ export function deserializeError(serialized: SerializedError): Error {
862
+ const err = new Error(serialized.message);
863
+ err.name = serialized.name;
864
+ if (serialized.stack) err.stack = serialized.stack;
865
+ if (serialized.malloyProblems) {
866
+ (err as unknown as { problems: unknown }).problems =
867
+ serialized.malloyProblems;
868
+ }
869
+ if (serialized.isCompilationError) {
870
+ // ModelCompilationError's ctor expects a MalloyError-shaped
871
+ // input but only reads `.message` at runtime. Cast through to
872
+ // satisfy the nominal type without losing data.
873
+ const wrapped = new ModelCompilationError(
874
+ err as unknown as ConstructorParameters<
875
+ typeof ModelCompilationError
876
+ >[0],
877
+ );
878
+ if (serialized.stack) wrapped.stack = serialized.stack;
879
+ return wrapped;
880
+ }
881
+ return err;
882
+ }
883
+
884
+ const defaultUrlReader = {
885
+ readURL: async (url: URL): Promise<string> => {
886
+ const { promises: fs } = await import("fs");
887
+ const { fileURLToPath } = await import("url");
888
+ const filePath =
889
+ url.protocol === "file:" ? fileURLToPath(url) : url.toString();
890
+ return fs.readFile(filePath, "utf8");
891
+ },
892
+ };
893
+
894
+ // ──────────────────────────────────────────────────────────────────────
895
+ // Singleton accessor
896
+ // ──────────────────────────────────────────────────────────────────────
897
+
898
+ let singleton: PackageLoadPool | null = null;
899
+
900
+ export function getPackageLoadPool(): PackageLoadPool {
901
+ if (singleton === null) {
902
+ const n = getPackageLoadWorkerCount();
903
+ singleton = new PackageLoadPool(n);
904
+ logger.info(
905
+ `Malloy package-load worker pool enabled (size=${n}). ` +
906
+ `Override with PACKAGE_LOAD_WORKERS=N (N >= 1).`,
907
+ );
908
+ }
909
+ return singleton;
910
+ }
911
+
912
+ /** Test-only: replace the singleton (and shut down the previous one). */
913
+ export async function __setPackageLoadPoolForTests(
914
+ pool: PackageLoadPool | null,
915
+ ): Promise<void> {
916
+ if (singleton && singleton !== pool) {
917
+ await singleton.shutdown();
918
+ }
919
+ singleton = pool;
920
+ }