@malloy-publisher/server 0.0.198-dev → 0.0.198-dev1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/README.docker.md +135 -20
  2. package/README.md +15 -0
  3. package/build.ts +42 -1
  4. package/dist/app/api-doc.yaml +51 -0
  5. package/dist/app/assets/EnvironmentPage-Dpee_Kn6.js +1 -0
  6. package/dist/app/assets/HomePage-DLRWTNoL.js +1 -0
  7. package/dist/app/assets/MainPage-DsVt5QGM.js +2 -0
  8. package/dist/app/assets/ModelPage-AwAugZ37.js +1 -0
  9. package/dist/app/assets/PackagePage-XQ-EWGTC.js +1 -0
  10. package/dist/app/assets/RouteError-3Mv8JQw7.js +1 -0
  11. package/dist/app/assets/WorkbookPage-DHYYpcYc.js +1 -0
  12. package/dist/app/assets/{core-w79IMXAG.es-Bd0UlzOL.js → core-DfcpQGVP.es-DQggNOdX.js} +14 -14
  13. package/dist/app/assets/{index-C513UodQ.js → index-BUp81Qdm.js} +15 -15
  14. package/dist/app/assets/index-D1pdwrUW.js +1803 -0
  15. package/dist/app/assets/index-Dv5bF4Ii.js +451 -0
  16. package/dist/app/assets/{index.umd-BMeMPq_9.js → index.umd-CQH4LZU8.js} +1 -1
  17. package/dist/app/index.html +2 -3
  18. package/dist/compile_worker.mjs +628 -0
  19. package/dist/default-publisher.config.json +23 -0
  20. package/dist/instrumentation.mjs +36 -38
  21. package/dist/server.mjs +2060 -913
  22. package/package.json +11 -12
  23. package/publisher.config.example.bigquery.json +33 -0
  24. package/publisher.config.example.duckdb.json +23 -0
  25. package/publisher.config.json +1 -11
  26. package/src/compile/compile_pool.spec.ts +227 -0
  27. package/src/compile/compile_pool.ts +729 -0
  28. package/src/compile/compile_worker.ts +683 -0
  29. package/src/compile/protocol.ts +251 -0
  30. package/src/config.spec.ts +306 -0
  31. package/src/config.ts +222 -2
  32. package/src/controller/compile.controller.ts +3 -1
  33. package/src/controller/connection.controller.ts +1 -1
  34. package/src/controller/model.controller.ts +8 -1
  35. package/src/controller/package.controller.ts +70 -29
  36. package/src/controller/query.controller.ts +3 -0
  37. package/src/default-publisher.config.json +23 -0
  38. package/src/errors.spec.ts +42 -0
  39. package/src/errors.ts +21 -0
  40. package/src/health.spec.ts +90 -0
  41. package/src/health.ts +86 -45
  42. package/src/logger.ts +1 -3
  43. package/src/mcp/tools/discovery_tools.ts +6 -2
  44. package/src/mcp/tools/execute_query_tool.ts +12 -0
  45. package/src/path_safety.spec.ts +158 -0
  46. package/src/path_safety.ts +140 -0
  47. package/src/pg_helpers.spec.ts +226 -0
  48. package/src/pg_helpers.ts +129 -0
  49. package/src/server-old.ts +3 -23
  50. package/src/server.ts +49 -0
  51. package/src/service/connection.spec.ts +6 -4
  52. package/src/service/connection.ts +8 -3
  53. package/src/service/connection_config.ts +2 -2
  54. package/src/service/environment.ts +621 -176
  55. package/src/service/environment_admission.spec.ts +180 -0
  56. package/src/service/environment_store.ts +22 -0
  57. package/src/service/filter_integration.spec.ts +110 -0
  58. package/src/service/givens_integration.spec.ts +192 -0
  59. package/src/service/manifest_service.spec.ts +7 -2
  60. package/src/service/manifest_service.ts +8 -2
  61. package/src/service/materialization_service.ts +14 -3
  62. package/src/service/model.spec.ts +105 -0
  63. package/src/service/model.ts +317 -10
  64. package/src/service/model_worker_path.spec.ts +125 -0
  65. package/src/service/package.ts +4 -3
  66. package/src/service/package_memory_governor.spec.ts +173 -0
  67. package/src/service/package_memory_governor.ts +233 -0
  68. package/src/service/package_race.spec.ts +208 -0
  69. package/src/storage/StorageManager.ts +71 -11
  70. package/src/storage/duckdb/schema.ts +41 -0
  71. package/src/utils.ts +11 -0
  72. package/tests/harness/rest_e2e.ts +2 -2
  73. package/tests/integration/concurrent_package/concurrent_package.integration.spec.ts +280 -0
  74. package/tests/integration/legacy_routes/legacy_routes.integration.spec.ts +259 -0
  75. package/tests/unit/duckdb/attached_databases.test.ts +5 -5
  76. package/tests/unit/duckdb/legacy_schema_migration.test.ts +194 -0
  77. package/tests/unit/storage/StorageManager.test.ts +166 -0
  78. package/dist/app/assets/EnvironmentPage-1j6QDWAy.js +0 -1
  79. package/dist/app/assets/HomePage-DMop21VG.js +0 -1
  80. package/dist/app/assets/MainPage-BbE8ETz1.js +0 -2
  81. package/dist/app/assets/ModelPage-D2jvfe3t.js +0 -1
  82. package/dist/app/assets/PackagePage-BbnhGoD3.js +0 -1
  83. package/dist/app/assets/RouteError-D3LGEZ3i.js +0 -1
  84. package/dist/app/assets/WorkbookPage-DttVIj4u.js +0 -1
  85. package/dist/app/assets/index-5K9YjIxF.js +0 -456
  86. package/dist/app/assets/index-DIgzgp69.js +0 -1742
@@ -0,0 +1,683 @@
1
+ /**
2
+ * Compile worker entry point.
3
+ *
4
+ * Runs inside a worker_threads `Worker`. Owns no DuckDB / native
5
+ * connection state — every schema lookup is proxied back to the
6
+ * main thread via {@link SchemaForTablesRequest} / {@link
7
+ * SchemaForSqlRequest}. The point of this file is to take the
8
+ * dominant CPU cost of a Malloy compile (parser, type checker, IR
9
+ * builder, sourceInfo extraction) off the main event loop so the
10
+ * Kubernetes liveness probe on `/health/liveness` never gets parked
11
+ * behind a multi-second compile.
12
+ *
13
+ * Contract:
14
+ * - Receives {@link CompileJobRequest} messages from the parent
15
+ * port. Dispatches one compile per message.
16
+ * - Proxies schema and URL-reader operations back to the parent
17
+ * via correlated RPC requests; awaits matching responses.
18
+ * - Sends back exactly one {@link CompileJobResult} or {@link
19
+ * CompileJobError} per job.
20
+ * - Honours a graceful {@link ShutdownRequest} so the pool can
21
+ * drain on SIGTERM.
22
+ *
23
+ * This file is bundled separately by build.ts and shipped as
24
+ * `dist/compile_worker.mjs`.
25
+ */
26
+ import {
27
+ contextOverlay,
28
+ MalloyConfig,
29
+ MalloyError,
30
+ Runtime,
31
+ isSourceDef,
32
+ modelDefToModelInfo,
33
+ type Annotation,
34
+ type Connection,
35
+ type FetchSchemaOptions,
36
+ type LookupConnection,
37
+ type ModelDef,
38
+ type ModelMaterializer,
39
+ type NamedModelObject,
40
+ type NamedQueryDef,
41
+ type SQLSourceDef,
42
+ type StructDef,
43
+ type TableSourceDef,
44
+ type TurtleDef,
45
+ } from "@malloydata/malloy";
46
+ import type { SQLSourceRequest } from "@malloydata/malloy/connection";
47
+ import * as Malloy from "@malloydata/malloy-interfaces";
48
+ import * as fs from "fs";
49
+ import { parentPort, threadId } from "node:worker_threads";
50
+ import { fileURLToPath } from "url";
51
+ import { parseFilters, type FilterDefinition } from "../service/filter";
52
+ import type {
53
+ CompileJobError,
54
+ CompileJobRequest,
55
+ CompileJobResult,
56
+ ConnectionMetadata,
57
+ ConnectionMetadataRequest,
58
+ ConnectionMetadataResponse,
59
+ MainToWorkerMessage,
60
+ ReadUrlRequest,
61
+ ReadUrlResponse,
62
+ SchemaForSqlRequest,
63
+ SchemaForSqlResponse,
64
+ SchemaForTablesRequest,
65
+ SchemaForTablesResponse,
66
+ SerializedError,
67
+ } from "./protocol";
68
+
69
+ if (!parentPort) {
70
+ throw new Error(
71
+ "compile_worker.ts must be loaded inside a worker_threads Worker",
72
+ );
73
+ }
74
+
75
+ const port = parentPort;
76
+
77
+ // ──────────────────────────────────────────────────────────────────────
78
+ // RPC plumbing for worker → main calls
79
+ // ──────────────────────────────────────────────────────────────────────
80
+
81
+ let nextRpcId = 0;
82
+ const pendingRpc = new Map<
83
+ string,
84
+ { resolve: (value: unknown) => void; reject: (err: Error) => void }
85
+ >();
86
+
87
+ function newRpcId(): string {
88
+ nextRpcId += 1;
89
+ return `w${threadId}-rpc-${nextRpcId}`;
90
+ }
91
+
92
+ function callMain<T>(send: (requestId: string) => void): Promise<T> {
93
+ const requestId = newRpcId();
94
+ return new Promise<T>((resolve, reject) => {
95
+ pendingRpc.set(requestId, {
96
+ resolve: (value) => resolve(value as T),
97
+ reject,
98
+ });
99
+ send(requestId);
100
+ });
101
+ }
102
+
103
+ function dispatchMainResponse(message: MainToWorkerMessage): void {
104
+ if (
105
+ message.type === "schema-for-tables-response" ||
106
+ message.type === "schema-for-sql-response" ||
107
+ message.type === "read-url-response" ||
108
+ message.type === "connection-metadata-response"
109
+ ) {
110
+ const pending = pendingRpc.get(message.requestId);
111
+ if (!pending) return;
112
+ pendingRpc.delete(message.requestId);
113
+ pending.resolve(message);
114
+ return;
115
+ }
116
+ if (message.type === "rpc-error") {
117
+ const pending = pendingRpc.get(message.requestId);
118
+ if (!pending) return;
119
+ pendingRpc.delete(message.requestId);
120
+ pending.reject(deserializeError(message.error));
121
+ return;
122
+ }
123
+ }
124
+
125
+ // ──────────────────────────────────────────────────────────────────────
126
+ // Stub InfoConnection that proxies schema fetches to the main thread
127
+ // ──────────────────────────────────────────────────────────────────────
128
+
129
+ /**
130
+ * Minimal `Connection` implementation that satisfies Malloy's compile
131
+ * pipeline. Only the methods called during compile are implemented
132
+ * meaningfully; the rest throw, since the worker never executes SQL.
133
+ *
134
+ * Holds the `jobId` so the main thread can route the schema RPC to
135
+ * the right environment-side `MalloyConfig`.
136
+ */
137
+ class ProxyConnection {
138
+ public readonly name: string;
139
+ public readonly dialectName: string;
140
+ private readonly digest: string;
141
+ private readonly jobId: string;
142
+
143
+ constructor(metadata: ConnectionMetadata, jobId: string) {
144
+ this.name = metadata.name;
145
+ this.dialectName = metadata.dialectName;
146
+ this.digest = metadata.digest;
147
+ this.jobId = jobId;
148
+ }
149
+
150
+ getDigest(): string {
151
+ return this.digest;
152
+ }
153
+
154
+ async fetchSchemaForTables(
155
+ tables: Record<string, string>,
156
+ options: FetchSchemaOptions,
157
+ ): Promise<{
158
+ schemas: Record<string, TableSourceDef>;
159
+ errors: Record<string, string>;
160
+ }> {
161
+ const response = await callMain<SchemaForTablesResponse>((requestId) => {
162
+ const req: SchemaForTablesRequest = {
163
+ type: "schema-for-tables",
164
+ requestId,
165
+ jobId: this.jobId,
166
+ connectionName: this.name,
167
+ tables,
168
+ options: serializeFetchOptions(options),
169
+ };
170
+ port.postMessage(req);
171
+ });
172
+ return { schemas: response.schemas, errors: response.errors };
173
+ }
174
+
175
+ async fetchSchemaForSQLStruct(
176
+ sentence: SQLSourceRequest,
177
+ options: FetchSchemaOptions,
178
+ ): Promise<
179
+ | { structDef: SQLSourceDef; error?: undefined }
180
+ | { error: string; structDef?: undefined }
181
+ > {
182
+ const response = await callMain<SchemaForSqlResponse>((requestId) => {
183
+ const req: SchemaForSqlRequest = {
184
+ type: "schema-for-sql",
185
+ requestId,
186
+ jobId: this.jobId,
187
+ connectionName: this.name,
188
+ sentence: sentence as unknown,
189
+ options: serializeFetchOptions(options),
190
+ };
191
+ port.postMessage(req);
192
+ });
193
+ if (response.error !== undefined) {
194
+ return { error: response.error };
195
+ }
196
+ if (response.structDef === undefined) {
197
+ return { error: "Empty SQL schema response from main thread" };
198
+ }
199
+ return { structDef: response.structDef };
200
+ }
201
+
202
+ // Compile path never calls these. We intentionally throw rather
203
+ // than silently no-op so a misrouted query in a worker surfaces
204
+ // as a loud error rather than a wrong-answer bug.
205
+ async runSQL(): Promise<never> {
206
+ throw new Error(
207
+ `ProxyConnection(${this.name}): runSQL is not available in compile workers`,
208
+ );
209
+ }
210
+ isPool(): false {
211
+ return false;
212
+ }
213
+ canPersist(): false {
214
+ return false;
215
+ }
216
+ canStream(): false {
217
+ return false;
218
+ }
219
+ async close(): Promise<void> {
220
+ /* no-op */
221
+ }
222
+ async idle(): Promise<void> {
223
+ /* no-op */
224
+ }
225
+ async estimateQueryCost(): Promise<never> {
226
+ throw new Error(
227
+ `ProxyConnection(${this.name}): estimateQueryCost not available in compile workers`,
228
+ );
229
+ }
230
+ async fetchMetadata(): Promise<Record<string, unknown>> {
231
+ return {};
232
+ }
233
+ async fetchTableMetadata(): Promise<Record<string, unknown>> {
234
+ return {};
235
+ }
236
+ }
237
+
238
+ function serializeFetchOptions(options: FetchSchemaOptions): {
239
+ refreshTimestamp?: number;
240
+ modelAnnotation?: Annotation;
241
+ } {
242
+ const out: { refreshTimestamp?: number; modelAnnotation?: Annotation } = {};
243
+ if (options.refreshTimestamp !== undefined) {
244
+ out.refreshTimestamp = options.refreshTimestamp;
245
+ }
246
+ if (options.modelAnnotation !== undefined) {
247
+ out.modelAnnotation = options.modelAnnotation;
248
+ }
249
+ return out;
250
+ }
251
+
252
+ // ──────────────────────────────────────────────────────────────────────
253
+ // URLReader: try fs first, fall back to main-thread RPC for non-file URLs
254
+ // ──────────────────────────────────────────────────────────────────────
255
+
256
+ function makeWorkerUrlReader(jobId: string) {
257
+ return {
258
+ readURL: async (url: URL): Promise<string> => {
259
+ if (url.protocol === "file:") {
260
+ const filePath = fileURLToPath(url);
261
+ return fs.promises.readFile(filePath, "utf8");
262
+ }
263
+ // Non-file URL — delegate to main so semantics stay
264
+ // identical to the in-process URL_READER (e.g. allow
265
+ // future https:// imports).
266
+ const response = await callMain<ReadUrlResponse>((requestId) => {
267
+ const req: ReadUrlRequest = {
268
+ type: "read-url",
269
+ requestId,
270
+ jobId,
271
+ url: url.toString(),
272
+ };
273
+ port.postMessage(req);
274
+ });
275
+ return response.contents;
276
+ },
277
+ };
278
+ }
279
+
280
+ // ──────────────────────────────────────────────────────────────────────
281
+ // MalloyConfig assembly inside the worker
282
+ // ──────────────────────────────────────────────────────────────────────
283
+
284
+ function buildWorkerMalloyConfig(job: CompileJobRequest): MalloyConfig {
285
+ // Connections are resolved lazily on first lookup via a metadata
286
+ // RPC back to the main thread — see ConnectionMetadataRequest.
287
+ // We never enumerate the connection list upfront; the package
288
+ // layer doesn't always have one (e.g. environment-wrapped
289
+ // connections appear only when Malloy compiles a `table('...')`
290
+ // reference that names them).
291
+ //
292
+ // Concurrent lookups for the same name are deduped via
293
+ // `inflight` — Malloy's compile pipeline can fan-out multiple
294
+ // schema fetches that all hit `lookupConnection(name)` before
295
+ // any of them resolve, and we don't want to N-multiply the RPC.
296
+ const proxies = new Map<string, ProxyConnection>();
297
+ const inflight = new Map<string, Promise<ProxyConnection>>();
298
+ const config = new MalloyConfig(
299
+ { connections: {} },
300
+ {
301
+ config: contextOverlay({ rootDirectory: job.packagePath }),
302
+ },
303
+ );
304
+ config.wrapConnections(
305
+ (_base: LookupConnection<Connection>): LookupConnection<Connection> => ({
306
+ lookupConnection: async (name?: string): Promise<Connection> => {
307
+ const effectiveName = name ?? job.defaultConnectionName ?? "duckdb";
308
+ const cached = proxies.get(effectiveName);
309
+ if (cached) return cached as unknown as Connection;
310
+ let pending = inflight.get(effectiveName);
311
+ if (!pending) {
312
+ pending = (async () => {
313
+ const response = await callMain<ConnectionMetadataResponse>(
314
+ (requestId) => {
315
+ const req: ConnectionMetadataRequest = {
316
+ type: "connection-metadata",
317
+ requestId,
318
+ jobId: job.requestId,
319
+ connectionName: effectiveName,
320
+ };
321
+ port.postMessage(req);
322
+ },
323
+ );
324
+ const proxy = new ProxyConnection(
325
+ response.metadata,
326
+ job.requestId,
327
+ );
328
+ proxies.set(effectiveName, proxy);
329
+ inflight.delete(effectiveName);
330
+ return proxy;
331
+ })();
332
+ inflight.set(effectiveName, pending);
333
+ }
334
+ const proxy = await pending;
335
+ return proxy as unknown as Connection;
336
+ },
337
+ }),
338
+ );
339
+ return config;
340
+ }
341
+
342
+ // ──────────────────────────────────────────────────────────────────────
343
+ // The actual compile — mirrors Model.create's in-process flow but
344
+ // only the parts that produce data shippable across postMessage.
345
+ // ──────────────────────────────────────────────────────────────────────
346
+
347
+ async function compile(job: CompileJobRequest): Promise<CompileJobResult> {
348
+ const compileStart = performance.now();
349
+
350
+ const malloyConfig = buildWorkerMalloyConfig(job);
351
+ const urlReader = makeWorkerUrlReader(job.requestId);
352
+
353
+ const runtime = new Runtime({
354
+ urlReader,
355
+ config: malloyConfig,
356
+ buildManifest:
357
+ job.buildManifest !== undefined
358
+ ? { entries: job.buildManifest, strict: false }
359
+ : undefined,
360
+ });
361
+
362
+ const modelURL = new URL(`file://${job.packagePath}/${job.modelPath}`);
363
+ const importBaseURL = new URL(".", modelURL);
364
+
365
+ const mm: ModelMaterializer = runtime.loadModel(modelURL, {
366
+ importBaseURL,
367
+ });
368
+
369
+ const compiledModel = await mm.getModel();
370
+ const modelDef = compiledModel._modelDef as ModelDef;
371
+
372
+ // Givens — converted to API shape here so the main thread can
373
+ // stash them on the Model without needing Malloy's MalloyGiven
374
+ // type (which has non-serializable methods).
375
+ const malloyGivens = Array.from(compiledModel.givens.values());
376
+ const givens: ApiGivenWire[] | undefined =
377
+ malloyGivens.length > 0
378
+ ? malloyGivens.map((g) => malloyGivenToWire(g))
379
+ : undefined;
380
+
381
+ // Imported sourceInfos — mirrors Model.create line 199–242. We
382
+ // collect them here so the main thread doesn't have to recompile
383
+ // imports just to fill in the response.
384
+ const sourceInfos: Malloy.SourceInfo[] = [];
385
+ const importedSourceNames = new Set<string>();
386
+ const imports = modelDef.imports ?? [];
387
+ for (const importLocation of imports) {
388
+ try {
389
+ const modelString = await urlReader.readURL(
390
+ new URL(importLocation.importURL),
391
+ );
392
+ const importedModelDef = (
393
+ await runtime.loadModel(modelString, { importBaseURL }).getModel()
394
+ )._modelDef;
395
+ const importedInfo = modelDefToModelInfo(importedModelDef);
396
+ const importedSources = importedInfo.entries.filter(
397
+ (entry) => entry.kind === "source",
398
+ ) as Malloy.SourceInfo[];
399
+ for (const source of importedSources) {
400
+ if (!importedSourceNames.has(source.name)) {
401
+ sourceInfos.push(source);
402
+ importedSourceNames.add(source.name);
403
+ }
404
+ }
405
+ } catch {
406
+ // Best-effort, matches the in-process Model.create behaviour
407
+ // of warning-and-skipping when an import can't be loaded.
408
+ }
409
+ }
410
+ const localInfo = modelDefToModelInfo(modelDef);
411
+ const localSources = localInfo.entries.filter(
412
+ (entry) => entry.kind === "source",
413
+ ) as Malloy.SourceInfo[];
414
+ for (const source of localSources) {
415
+ if (!importedSourceNames.has(source.name)) {
416
+ sourceInfos.push(source);
417
+ }
418
+ }
419
+
420
+ const { sources, filterMap } = extractSources(job.modelPath, modelDef);
421
+ const queries = extractQueries(job.modelPath, modelDef);
422
+ const filterMapEntries: Array<[string, FilterDefinition[]]> = Array.from(
423
+ filterMap.entries(),
424
+ );
425
+
426
+ return {
427
+ type: "compile-result",
428
+ requestId: job.requestId,
429
+ modelDef,
430
+ sourceInfos,
431
+ sources,
432
+ queries,
433
+ filterMap: filterMapEntries,
434
+ givens,
435
+ // dataStyles: the in-process HackyDataStylesAccumulator is fed
436
+ // by the URLReader. We don't reuse it here — main thread will
437
+ // accumulate its own when it builds the lazy materializer.
438
+ dataStyles: {},
439
+ compileDurationMs: performance.now() - compileStart,
440
+ };
441
+ }
442
+
443
+ /**
444
+ * Wire-friendly mirror of the publisher's `ApiGiven`. Inlined here so
445
+ * the worker doesn't import the OpenAPI-generated `components` map
446
+ * (which would drag the whole api.ts surface into the worker bundle).
447
+ * Kept structurally identical to `ApiGiven` so the main thread can
448
+ * type-assert it without conversion.
449
+ */
450
+ interface ApiGivenWire {
451
+ name: string;
452
+ type: string;
453
+ annotations?: string[];
454
+ }
455
+
456
+ interface MalloyGivenLike {
457
+ name: string;
458
+ type: { type: string; filterType?: string };
459
+ getTaglines(regex: RegExp): string[];
460
+ }
461
+
462
+ function malloyGivenToWire(given: MalloyGivenLike): ApiGivenWire {
463
+ const t = given.type;
464
+ const renderedType =
465
+ t.type === "filter expression" ? `filter<${t.filterType}>` : t.type;
466
+ return {
467
+ name: given.name,
468
+ type: renderedType,
469
+ annotations: given.getTaglines(/^#\(/),
470
+ };
471
+ }
472
+
473
+ // ──────────────────────────────────────────────────────────────────────
474
+ // extractSources / extractQueries — direct copies of the static
475
+ // helpers in Model.ts. Inlined here to keep the worker independent
476
+ // of the main-thread module graph (smaller bundle, fewer imports of
477
+ // things that pull in DuckDB or AWS SDK by transitive include).
478
+ // ──────────────────────────────────────────────────────────────────────
479
+
480
+ interface ApiSource {
481
+ name: string;
482
+ annotations?: string[];
483
+ views?: { name: string; annotations?: string[] }[];
484
+ filters?: unknown[];
485
+ }
486
+ interface ApiQuery {
487
+ name: string;
488
+ sourceName?: string;
489
+ annotations?: string[];
490
+ }
491
+
492
+ function extractSources(
493
+ modelPath: string,
494
+ modelDef: ModelDef,
495
+ ): { sources: ApiSource[]; filterMap: Map<string, FilterDefinition[]> } {
496
+ const filterMap = new Map<string, FilterDefinition[]>();
497
+ const sources: ApiSource[] = Object.values(modelDef.contents)
498
+ .filter((obj) => isSourceDef(obj))
499
+ .map((sourceObj) => {
500
+ const sourceName =
501
+ (sourceObj as StructDef).as || (sourceObj as StructDef).name;
502
+ const annotations = (sourceObj as StructDef).annotation?.blockNotes
503
+ ?.filter((note) => note.at.url.includes(modelPath))
504
+ .map((note) => note.text);
505
+
506
+ const collected: string[][] = [];
507
+ let cur: Annotation | undefined = (sourceObj as StructDef).annotation;
508
+ while (cur) {
509
+ if (cur.blockNotes) {
510
+ collected.push(cur.blockNotes.map((note) => note.text));
511
+ }
512
+ cur = cur.inherits;
513
+ }
514
+ const allAnnotations = collected.reverse().flat();
515
+ let filters: unknown[] | undefined;
516
+ if (allAnnotations.length > 0) {
517
+ try {
518
+ const parsed = parseFilters(allAnnotations);
519
+ if (parsed.length > 0) {
520
+ filterMap.set(sourceName, parsed);
521
+ const fields = (sourceObj as StructDef).fields;
522
+ filters = parsed.map((f) => {
523
+ const field = fields.find(
524
+ (fd) => (fd.as || fd.name) === f.dimension,
525
+ );
526
+ return {
527
+ name: f.name,
528
+ dimension: f.dimension,
529
+ type: f.type,
530
+ implicit: f.implicit,
531
+ required: f.required,
532
+ dimensionType: field?.type as string | undefined,
533
+ };
534
+ });
535
+ }
536
+ } catch {
537
+ // Mirrors the in-process behaviour: filter parse
538
+ // errors are warnings, not fatal compile failures.
539
+ }
540
+ }
541
+
542
+ const views = (sourceObj as StructDef).fields
543
+ .filter((f) => f.type === "turtle")
544
+ .filter((turtle) =>
545
+ (turtle as TurtleDef).pipeline
546
+ .map((stage) => stage.type)
547
+ .every((type) => type === "reduce"),
548
+ )
549
+ .map((turtle) => ({
550
+ name: turtle.as || turtle.name,
551
+ annotations: turtle?.annotation?.blockNotes
552
+ ?.filter((note) => note.at.url.includes(modelPath))
553
+ .map((note) => note.text),
554
+ }));
555
+
556
+ return {
557
+ name: sourceName,
558
+ annotations,
559
+ views,
560
+ filters,
561
+ } as ApiSource;
562
+ });
563
+
564
+ return { sources, filterMap };
565
+ }
566
+
567
+ function extractQueries(modelPath: string, modelDef: ModelDef): ApiQuery[] {
568
+ const isNamedQuery = (obj: NamedModelObject): obj is NamedQueryDef =>
569
+ obj.type === "query";
570
+ return Object.values(modelDef.contents)
571
+ .filter(isNamedQuery)
572
+ .map((q) => ({
573
+ name: q.as || q.name,
574
+ sourceName: typeof q.structRef === "string" ? q.structRef : undefined,
575
+ annotations: q?.annotation?.blockNotes
576
+ ?.filter((note: { at: { url: string } }) =>
577
+ note.at.url.includes(modelPath),
578
+ )
579
+ .map((note: { text: string }) => note.text),
580
+ }));
581
+ }
582
+
583
+ // ──────────────────────────────────────────────────────────────────────
584
+ // Error serialization
585
+ // ──────────────────────────────────────────────────────────────────────
586
+
587
+ function serializeError(error: unknown): SerializedError {
588
+ if (error instanceof MalloyError) {
589
+ // MalloyError is what Malloy throws for compile failures
590
+ // (parse / type / unresolved-reference errors). Flagging
591
+ // `isCompilationError` lets the main thread re-wrap it as a
592
+ // `ModelCompilationError` so callers' instanceof checks for
593
+ // that type still fire after a worker-side compile.
594
+ return {
595
+ name: error.name,
596
+ message: error.message,
597
+ stack: error.stack,
598
+ malloyProblems: error.problems as unknown[],
599
+ isCompilationError: true,
600
+ };
601
+ }
602
+ if (error instanceof Error) {
603
+ return {
604
+ name: error.name,
605
+ message: error.message,
606
+ stack: error.stack,
607
+ };
608
+ }
609
+ return { name: "Error", message: String(error) };
610
+ }
611
+
612
+ function deserializeError(serialized: SerializedError): Error {
613
+ const err = new Error(serialized.message);
614
+ err.name = serialized.name;
615
+ if (serialized.stack) err.stack = serialized.stack;
616
+ return err;
617
+ }
618
+
619
+ // ──────────────────────────────────────────────────────────────────────
620
+ // Message dispatcher
621
+ // ──────────────────────────────────────────────────────────────────────
622
+
623
+ let shuttingDown = false;
624
+ const inFlightJobs = new Set<string>();
625
+
626
+ port.on("message", (message: MainToWorkerMessage) => {
627
+ if (message.type === "shutdown") {
628
+ shuttingDown = true;
629
+ // Don't exit until in-flight jobs finish. Once empty we exit
630
+ // via the explicit process.exit() below; until then we just
631
+ // keep servicing message responses.
632
+ maybeExit();
633
+ return;
634
+ }
635
+ if (message.type === "compile") {
636
+ if (shuttingDown) {
637
+ const errMsg: CompileJobError = {
638
+ type: "compile-error",
639
+ requestId: message.requestId,
640
+ error: {
641
+ name: "ShuttingDown",
642
+ message: "Compile worker is shutting down",
643
+ },
644
+ };
645
+ port.postMessage(errMsg);
646
+ return;
647
+ }
648
+ inFlightJobs.add(message.requestId);
649
+ void runJob(message);
650
+ return;
651
+ }
652
+ dispatchMainResponse(message);
653
+ });
654
+
655
+ async function runJob(job: CompileJobRequest): Promise<void> {
656
+ try {
657
+ const result = await compile(job);
658
+ port.postMessage(result);
659
+ } catch (error) {
660
+ const errMsg: CompileJobError = {
661
+ type: "compile-error",
662
+ requestId: job.requestId,
663
+ error: serializeError(error),
664
+ };
665
+ port.postMessage(errMsg);
666
+ } finally {
667
+ inFlightJobs.delete(job.requestId);
668
+ maybeExit();
669
+ }
670
+ }
671
+
672
+ function maybeExit(): void {
673
+ if (shuttingDown && inFlightJobs.size === 0 && pendingRpc.size === 0) {
674
+ // Give the postMessage queue a tick to flush before exit so the
675
+ // last result actually reaches the parent.
676
+ setImmediate(() => process.exit(0));
677
+ }
678
+ }
679
+
680
+ // Announce readiness — the pool waits for this before dispatching
681
+ // jobs to a newly-spawned worker so we don't race the worker's
682
+ // module-init time.
683
+ port.postMessage({ type: "ready" });