@malloy-publisher/server 0.0.198 → 0.0.199

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/build.ts +30 -1
  2. package/dist/app/api-doc.yaml +51 -0
  3. package/dist/app/assets/{EnvironmentPage-C7rtH4mC.js → EnvironmentPage-Dpee_Kn6.js} +1 -1
  4. package/dist/app/assets/{HomePage-DwkH7OrS.js → HomePage-DLRWTNoL.js} +1 -1
  5. package/dist/app/assets/{MainPage-D38LtZDV.js → MainPage-DsVt5QGM.js} +1 -1
  6. package/dist/app/assets/{ModelPage-DOol8Mz7.js → ModelPage-AwAugZ37.js} +1 -1
  7. package/dist/app/assets/{PackagePage-0tgzA_kO.js → PackagePage-XQ-EWGTC.js} +1 -1
  8. package/dist/app/assets/{RouteError-BaMsOSly.js → RouteError-3Mv8JQw7.js} +1 -1
  9. package/dist/app/assets/{WorkbookPage-Cx4SePkx.js → WorkbookPage-DHYYpcYc.js} +1 -1
  10. package/dist/app/assets/{core-CbsC6R_Y.es-Cwf6asf3.js → core-DfcpQGVP.es-DQggNOdX.js} +1 -1
  11. package/dist/app/assets/{index-DNofXMxi.js → index-BUp81Qdm.js} +1 -1
  12. package/dist/app/assets/{index-DL6BZTuw.js → index-D1pdwrUW.js} +1 -1
  13. package/dist/app/assets/{index-U38AyjJL.js → index-Dv5bF4Ii.js} +4 -4
  14. package/dist/app/assets/{index.umd-B68wGGkM.js → index.umd-CQH4LZU8.js} +1 -1
  15. package/dist/app/index.html +1 -1
  16. package/dist/instrumentation.mjs +57 -36
  17. package/dist/package_load_worker.mjs +12213 -0
  18. package/dist/server.mjs +2807 -2729
  19. package/package.json +2 -3
  20. package/src/controller/compile.controller.ts +3 -1
  21. package/src/controller/model.controller.ts +8 -1
  22. package/src/controller/query.controller.ts +3 -0
  23. package/src/health.spec.ts +90 -0
  24. package/src/health.ts +88 -45
  25. package/src/instrumentation.ts +50 -0
  26. package/src/mcp/tools/execute_query_tool.ts +12 -0
  27. package/src/package_load/package_load_pool.spec.ts +252 -0
  28. package/src/package_load/package_load_pool.ts +920 -0
  29. package/src/package_load/package_load_worker.ts +980 -0
  30. package/src/package_load/protocol.ts +336 -0
  31. package/src/query_param_utils.ts +18 -0
  32. package/src/server-old.ts +1 -1
  33. package/src/server.ts +36 -10
  34. package/src/service/db_utils.spec.ts +1 -1
  35. package/src/service/environment.ts +3 -2
  36. package/src/service/environment_store.ts +24 -3
  37. package/src/service/filter_integration.spec.ts +110 -0
  38. package/src/service/given.ts +80 -0
  39. package/src/service/givens_integration.spec.ts +192 -0
  40. package/src/service/model.spec.ts +105 -0
  41. package/src/service/model.ts +287 -16
  42. package/src/service/package.spec.ts +10 -0
  43. package/src/service/package.ts +257 -145
  44. package/src/service/package_worker_path.spec.ts +196 -0
  45. package/tests/integration/concurrent_package/concurrent_package.integration.spec.ts +280 -0
@@ -0,0 +1,980 @@
1
+ /**
2
+ * Package-load worker entry point.
3
+ *
4
+ * Runs inside a worker_threads `Worker`. Owns **no native connection
5
+ * state of any kind** — every connection lookup, schema fetch, SQL
6
+ * block schema, and non-file URL read is proxied back to the main
7
+ * thread via correlated RPC messages. The worker is intentionally
8
+ * pure-CPU: only Malloy parser / type-checker / IR-builder runs here.
9
+ *
10
+ * Why no DuckDB in the worker?
11
+ * ----------------------------
12
+ * DuckDB's native bindings cannot be safely loaded into more than one
13
+ * isolate of the same Node/Bun process (we hit Bun crash 0x20131 when
14
+ * the worker isolate and the main isolate both `dlopen` duckdb-native).
15
+ * Even if Bun fixes that, holding a duckdb handle in the worker
16
+ * duplicates the in-memory DB state and adds native-module load
17
+ * latency to every worker spawn. Database probing (`readDatabases`)
18
+ * stays on the main thread where it can reuse the package's existing
19
+ * DuckDB connection (see PR #772).
20
+ *
21
+ * Boundary
22
+ * --------
23
+ * 1. Worker: read `publisher.json` (package manifest).
24
+ * 2. Worker: compile every `.malloy` and `.malloynb` via Malloy.
25
+ * All `lookupConnection(name)` / schema-fetch calls Malloy makes
26
+ * during compile are proxied to the main thread's live
27
+ * connection pool over RPC.
28
+ * 3. Worker: for each model, capture the structured-clonable fields
29
+ * the main-thread `Model` constructor needs: `modelDef`,
30
+ * `sourceInfos`, `sources`, `queries`, `filterMap`, `givens`,
31
+ * dataStyles, plus per-cell `modelDef` + `queryDef` for
32
+ * notebooks (so per-cell materializers / runnables can be
33
+ * hydrated on the main thread via `Runtime._loadModelFromModelDef`
34
+ * / `ModelMaterializer._loadQueryFromQueryDef` — no recompile).
35
+ * 4. Main thread: probe embedded `.parquet` / `.csv` databases
36
+ * against the package's existing DuckDB connection.
37
+ *
38
+ * Per-model compile failures are returned in-band on
39
+ * `SerializedModel.compilationError`; the rest of the package keeps
40
+ * loading. Whole-package failures (e.g. manifest missing) come back
41
+ * as `LoadPackageError`.
42
+ *
43
+ * Bundled separately by `build.ts` as `dist/package_load_worker.mjs`
44
+ * so `new Worker(...)` can load it without dragging in the entire
45
+ * server module graph.
46
+ */
47
+ import {
48
+ contextOverlay,
49
+ type Annotation,
50
+ type BuildManifestEntry,
51
+ type Connection,
52
+ type FetchSchemaOptions,
53
+ type LookupConnection,
54
+ MalloyConfig,
55
+ MalloyError,
56
+ type ModelDef,
57
+ type ModelMaterializer,
58
+ modelDefToModelInfo,
59
+ type NamedModelObject,
60
+ type NamedQueryDef,
61
+ type Query,
62
+ Runtime,
63
+ type SQLSourceDef,
64
+ type SQLSourceRequest,
65
+ type StructDef,
66
+ type TableSourceDef,
67
+ type TurtleDef,
68
+ isSourceDef,
69
+ } from "@malloydata/malloy";
70
+ import * as Malloy from "@malloydata/malloy-interfaces";
71
+ import {
72
+ MalloySQLParser,
73
+ MalloySQLStatementType,
74
+ } from "@malloydata/malloy-sql";
75
+ import * as fs from "fs";
76
+ import * as path from "path";
77
+ import { parentPort, threadId } from "node:worker_threads";
78
+ import recursive from "recursive-readdir";
79
+ import { fileURLToPath, pathToFileURL } from "url";
80
+
81
+ import {
82
+ MODEL_FILE_SUFFIX,
83
+ NOTEBOOK_FILE_SUFFIX,
84
+ PACKAGE_MANIFEST_NAME,
85
+ } from "../constants";
86
+ import { HackyDataStylesAccumulator } from "../data_styles";
87
+ import { parseFilters, type FilterDefinition } from "../service/filter";
88
+ import {
89
+ malloyGivenToApi,
90
+ type MalloyGiven,
91
+ type MalloyGivenApi,
92
+ } from "../service/given";
93
+ import { ignoreDotfiles } from "../utils";
94
+ import type {
95
+ ConnectionMetadata,
96
+ ConnectionMetadataRequest,
97
+ ConnectionMetadataResponse,
98
+ LoadPackageError,
99
+ LoadPackageRequest,
100
+ LoadPackageResult,
101
+ MainToWorkerMessage,
102
+ ReadUrlRequest,
103
+ ReadUrlResponse,
104
+ SchemaForSqlRequest,
105
+ SchemaForSqlResponse,
106
+ SchemaForTablesRequest,
107
+ SchemaForTablesResponse,
108
+ SerializedError,
109
+ SerializedModel,
110
+ SerializedNotebookCell,
111
+ } from "./protocol";
112
+
113
+ if (!parentPort) {
114
+ throw new Error(
115
+ "package_load_worker.ts must be loaded inside a worker_threads Worker",
116
+ );
117
+ }
118
+
119
+ const port = parentPort;
120
+
121
+ // ──────────────────────────────────────────────────────────────────────
122
+ // RPC plumbing for worker → main calls
123
+ // ──────────────────────────────────────────────────────────────────────
124
+
125
+ let nextRpcId = 0;
126
+ const pendingRpc = new Map<
127
+ string,
128
+ { resolve: (value: unknown) => void; reject: (err: Error) => void }
129
+ >();
130
+
131
+ function newRpcId(): string {
132
+ nextRpcId += 1;
133
+ return `w${threadId}-rpc-${nextRpcId}`;
134
+ }
135
+
136
+ function callMain<T>(send: (requestId: string) => void): Promise<T> {
137
+ const requestId = newRpcId();
138
+ return new Promise<T>((resolve, reject) => {
139
+ pendingRpc.set(requestId, {
140
+ resolve: (value) => resolve(value as T),
141
+ reject,
142
+ });
143
+ send(requestId);
144
+ });
145
+ }
146
+
147
+ function dispatchMainResponse(message: MainToWorkerMessage): void {
148
+ if (
149
+ message.type === "schema-for-tables-response" ||
150
+ message.type === "schema-for-sql-response" ||
151
+ message.type === "read-url-response" ||
152
+ message.type === "connection-metadata-response"
153
+ ) {
154
+ const pending = pendingRpc.get(message.requestId);
155
+ if (!pending) return;
156
+ pendingRpc.delete(message.requestId);
157
+ pending.resolve(message);
158
+ return;
159
+ }
160
+ if (message.type === "rpc-error") {
161
+ const pending = pendingRpc.get(message.requestId);
162
+ if (!pending) return;
163
+ pendingRpc.delete(message.requestId);
164
+ pending.reject(deserializeError(message.error));
165
+ return;
166
+ }
167
+ }
168
+
169
+ // ──────────────────────────────────────────────────────────────────────
170
+ // Proxy connection: stand-in for non-duckdb connections at compile time
171
+ // ──────────────────────────────────────────────────────────────────────
172
+
173
+ class ProxyConnection {
174
+ public readonly name: string;
175
+ public readonly dialectName: string;
176
+ private readonly digest: string;
177
+ private readonly jobId: string;
178
+
179
+ constructor(metadata: ConnectionMetadata, jobId: string) {
180
+ this.name = metadata.name;
181
+ this.dialectName = metadata.dialectName;
182
+ this.digest = metadata.digest;
183
+ this.jobId = jobId;
184
+ }
185
+
186
+ getDigest(): string {
187
+ return this.digest;
188
+ }
189
+
190
+ async fetchSchemaForTables(
191
+ tables: Record<string, string>,
192
+ options: FetchSchemaOptions,
193
+ ): Promise<{
194
+ schemas: Record<string, TableSourceDef>;
195
+ errors: Record<string, string>;
196
+ }> {
197
+ const response = await callMain<SchemaForTablesResponse>((requestId) => {
198
+ const req: SchemaForTablesRequest = {
199
+ type: "schema-for-tables",
200
+ requestId,
201
+ jobId: this.jobId,
202
+ connectionName: this.name,
203
+ tables,
204
+ options: serializeFetchOptions(options),
205
+ };
206
+ port.postMessage(req);
207
+ });
208
+ return { schemas: response.schemas, errors: response.errors };
209
+ }
210
+
211
+ async fetchSchemaForSQLStruct(
212
+ sentence: SQLSourceRequest,
213
+ options: FetchSchemaOptions,
214
+ ): Promise<
215
+ | { structDef: SQLSourceDef; error?: undefined }
216
+ | { error: string; structDef?: undefined }
217
+ > {
218
+ const response = await callMain<SchemaForSqlResponse>((requestId) => {
219
+ const req: SchemaForSqlRequest = {
220
+ type: "schema-for-sql",
221
+ requestId,
222
+ jobId: this.jobId,
223
+ connectionName: this.name,
224
+ sentence: sentence as unknown,
225
+ options: serializeFetchOptions(options),
226
+ };
227
+ port.postMessage(req);
228
+ });
229
+ if (response.error !== undefined) return { error: response.error };
230
+ if (response.structDef === undefined) {
231
+ return { error: "Empty SQL schema response from main thread" };
232
+ }
233
+ return { structDef: response.structDef };
234
+ }
235
+
236
+ // Compile path never calls these on a non-duckdb connection (the
237
+ // worker doesn't execute non-duckdb SQL). We throw rather than no-op
238
+ // so a misrouted call surfaces loudly.
239
+ async runSQL(): Promise<never> {
240
+ throw new Error(
241
+ `ProxyConnection(${this.name}): runSQL is not available in package-load workers`,
242
+ );
243
+ }
244
+ isPool(): false {
245
+ return false;
246
+ }
247
+ canPersist(): false {
248
+ return false;
249
+ }
250
+ canStream(): false {
251
+ return false;
252
+ }
253
+ async close(): Promise<void> {
254
+ /* no-op */
255
+ }
256
+ async idle(): Promise<void> {
257
+ /* no-op */
258
+ }
259
+ async estimateQueryCost(): Promise<never> {
260
+ throw new Error(
261
+ `ProxyConnection(${this.name}): estimateQueryCost not available in package-load workers`,
262
+ );
263
+ }
264
+ async fetchMetadata(): Promise<Record<string, unknown>> {
265
+ return {};
266
+ }
267
+ async fetchTableMetadata(): Promise<Record<string, unknown>> {
268
+ return {};
269
+ }
270
+ }
271
+
272
+ function serializeFetchOptions(options: FetchSchemaOptions): {
273
+ refreshTimestamp?: number;
274
+ modelAnnotation?: Annotation;
275
+ } {
276
+ const out: { refreshTimestamp?: number; modelAnnotation?: Annotation } = {};
277
+ if (options.refreshTimestamp !== undefined) {
278
+ out.refreshTimestamp = options.refreshTimestamp;
279
+ }
280
+ if (options.modelAnnotation !== undefined) {
281
+ out.modelAnnotation = options.modelAnnotation;
282
+ }
283
+ return out;
284
+ }
285
+
286
+ // ──────────────────────────────────────────────────────────────────────
287
+ // URLReader: file:// → fs; everything else proxies to main thread
288
+ // ──────────────────────────────────────────────────────────────────────
289
+
290
+ function makeWorkerUrlReader(jobId: string): {
291
+ readURL: (url: URL) => Promise<string>;
292
+ } {
293
+ return {
294
+ readURL: async (url: URL): Promise<string> => {
295
+ if (url.protocol === "file:") {
296
+ const filePath = fileURLToPath(url);
297
+ return fs.promises.readFile(filePath, "utf8");
298
+ }
299
+ const response = await callMain<ReadUrlResponse>((requestId) => {
300
+ const req: ReadUrlRequest = {
301
+ type: "read-url",
302
+ requestId,
303
+ jobId,
304
+ url: url.toString(),
305
+ };
306
+ port.postMessage(req);
307
+ });
308
+ return response.contents;
309
+ },
310
+ };
311
+ }
312
+
313
+ // ──────────────────────────────────────────────────────────────────────
314
+ // MalloyConfig assembly inside the worker
315
+ // ──────────────────────────────────────────────────────────────────────
316
+
317
+ /**
318
+ * Build the package's MalloyConfig inside the worker. Every
319
+ * connection name — including `"duckdb"` — resolves to a
320
+ * {@link ProxyConnection} that RPCs schema fetches back to the main
321
+ * thread. The worker never holds a native connection handle, which
322
+ * keeps it pure-CPU and avoids dlopen'ing duckdb in a second isolate.
323
+ *
324
+ * Concurrent `lookupConnection(name)` calls for the same name are
325
+ * deduped via `inflight` — Malloy's compile pipeline can fan out
326
+ * many schema fetches that all hit `lookupConnection` before any
327
+ * resolve, and we don't want to N-multiply the metadata RPC.
328
+ */
329
+ function buildWorkerMalloyConfig(job: LoadPackageRequest): MalloyConfig {
330
+ const proxies = new Map<string, ProxyConnection>();
331
+ const inflight = new Map<string, Promise<ProxyConnection>>();
332
+
333
+ const config = new MalloyConfig(
334
+ { connections: {} },
335
+ { config: contextOverlay({ rootDirectory: job.packagePath }) },
336
+ );
337
+ config.wrapConnections(
338
+ (_base: LookupConnection<Connection>): LookupConnection<Connection> => ({
339
+ lookupConnection: async (name?: string): Promise<Connection> => {
340
+ const effectiveName = name ?? job.defaultConnectionName ?? "duckdb";
341
+ const cached = proxies.get(effectiveName);
342
+ if (cached) return cached as unknown as Connection;
343
+ let pending = inflight.get(effectiveName);
344
+ if (!pending) {
345
+ pending = (async () => {
346
+ const response = await callMain<ConnectionMetadataResponse>(
347
+ (requestId) => {
348
+ const req: ConnectionMetadataRequest = {
349
+ type: "connection-metadata",
350
+ requestId,
351
+ jobId: job.requestId,
352
+ connectionName: effectiveName,
353
+ };
354
+ port.postMessage(req);
355
+ },
356
+ );
357
+ const proxy = new ProxyConnection(
358
+ response.metadata,
359
+ job.requestId,
360
+ );
361
+ proxies.set(effectiveName, proxy);
362
+ inflight.delete(effectiveName);
363
+ return proxy;
364
+ })();
365
+ inflight.set(effectiveName, pending);
366
+ }
367
+ return (await pending) as unknown as Connection;
368
+ },
369
+ }),
370
+ );
371
+ return config;
372
+ }
373
+
374
+ // ──────────────────────────────────────────────────────────────────────
375
+ // Filesystem helpers (replicated from service/package.ts so the worker
376
+ // stays decoupled from the main-thread service module graph)
377
+ // ──────────────────────────────────────────────────────────────────────
378
+
379
+ async function readPackageMetadata(
380
+ packagePath: string,
381
+ ): Promise<{ name?: string; description?: string }> {
382
+ const manifestPath = path.join(packagePath, PACKAGE_MANIFEST_NAME);
383
+ const contents = await fs.promises.readFile(manifestPath, "utf8");
384
+ const parsed = JSON.parse(contents) as {
385
+ name?: string;
386
+ description?: string;
387
+ };
388
+ return { name: parsed.name, description: parsed.description };
389
+ }
390
+
391
+ async function listPackageFiles(packagePath: string): Promise<string[]> {
392
+ const files = await recursive(packagePath, [ignoreDotfiles]);
393
+ return files.map((full: string) =>
394
+ path.relative(packagePath, full).replace(/\\/g, "/"),
395
+ );
396
+ }
397
+
398
+ function filterModelPaths(allRelative: string[]): string[] {
399
+ return allRelative.filter(
400
+ (p) => p.endsWith(MODEL_FILE_SUFFIX) || p.endsWith(NOTEBOOK_FILE_SUFFIX),
401
+ );
402
+ }
403
+
404
+ // ──────────────────────────────────────────────────────────────────────
405
+ // Model compile (mirrors service/model.ts but produces SerializedModel)
406
+ // ──────────────────────────────────────────────────────────────────────
407
+
408
+ interface ApiSourceWire {
409
+ name: string;
410
+ annotations?: string[];
411
+ views?: { name: string; annotations?: string[] }[];
412
+ filters?: unknown[];
413
+ givens?: unknown[];
414
+ }
415
+ interface ApiQueryWire {
416
+ name: string;
417
+ sourceName?: string;
418
+ annotations?: string[];
419
+ }
420
+ type ApiGivenWire = MalloyGivenApi;
421
+
422
+ async function collectImportedSourceInfos(
423
+ modelDef: ModelDef,
424
+ runtime: Runtime,
425
+ importBaseURL: URL,
426
+ ): Promise<{
427
+ sourceInfos: Malloy.SourceInfo[];
428
+ importedNames: Set<string>;
429
+ }> {
430
+ const sourceInfos: Malloy.SourceInfo[] = [];
431
+ const importedNames = new Set<string>();
432
+ const imports = modelDef.imports ?? [];
433
+ for (const importLocation of imports) {
434
+ try {
435
+ const modelString = await runtime.urlReader.readURL(
436
+ new URL(importLocation.importURL),
437
+ );
438
+ const importedModelDef = (
439
+ await runtime
440
+ .loadModel(modelString as string, { importBaseURL })
441
+ .getModel()
442
+ )._modelDef;
443
+ const importedInfo = modelDefToModelInfo(importedModelDef);
444
+ const importedSources = importedInfo.entries.filter(
445
+ (entry) => entry.kind === "source",
446
+ ) as Malloy.SourceInfo[];
447
+ for (const source of importedSources) {
448
+ if (!importedNames.has(source.name)) {
449
+ sourceInfos.push(source);
450
+ importedNames.add(source.name);
451
+ }
452
+ }
453
+ } catch {
454
+ // Best-effort, matches the in-process Model.create behaviour
455
+ // of warning-and-skipping when an import can't be loaded.
456
+ }
457
+ }
458
+ return { sourceInfos, importedNames };
459
+ }
460
+
461
+ function appendLocalSourceInfos(
462
+ modelDef: ModelDef,
463
+ target: Malloy.SourceInfo[],
464
+ importedNames: Set<string>,
465
+ ): void {
466
+ const localInfo = modelDefToModelInfo(modelDef);
467
+ const localSources = localInfo.entries.filter(
468
+ (entry) => entry.kind === "source",
469
+ ) as Malloy.SourceInfo[];
470
+ for (const source of localSources) {
471
+ if (!importedNames.has(source.name)) target.push(source);
472
+ }
473
+ }
474
+
475
+ function extractSources(
476
+ modelPath: string,
477
+ modelDef: ModelDef,
478
+ givens: ApiGivenWire[] | undefined,
479
+ ): { sources: ApiSourceWire[]; filterMap: Map<string, FilterDefinition[]> } {
480
+ const filterMap = new Map<string, FilterDefinition[]>();
481
+ const sources: ApiSourceWire[] = Object.values(modelDef.contents)
482
+ .filter((obj) => isSourceDef(obj))
483
+ .map((sourceObj) => {
484
+ const sourceName =
485
+ (sourceObj as StructDef).as || (sourceObj as StructDef).name;
486
+ const annotations = (sourceObj as StructDef).annotation?.blockNotes
487
+ ?.filter((note) => note.at.url.includes(modelPath))
488
+ .map((note) => note.text);
489
+
490
+ const collected: string[][] = [];
491
+ let cur: Annotation | undefined = (sourceObj as StructDef).annotation;
492
+ while (cur) {
493
+ if (cur.blockNotes) {
494
+ collected.push(cur.blockNotes.map((note) => note.text));
495
+ }
496
+ cur = cur.inherits;
497
+ }
498
+ const allAnnotations = collected.reverse().flat();
499
+ let filters: unknown[] | undefined;
500
+ if (allAnnotations.length > 0) {
501
+ try {
502
+ const parsed = parseFilters(allAnnotations);
503
+ if (parsed.length > 0) {
504
+ filterMap.set(sourceName, parsed);
505
+ const fields = (sourceObj as StructDef).fields;
506
+ filters = parsed.map((f) => {
507
+ const field = fields.find(
508
+ (fd) => (fd.as || fd.name) === f.dimension,
509
+ );
510
+ return {
511
+ name: f.name,
512
+ dimension: f.dimension,
513
+ type: f.type,
514
+ implicit: f.implicit,
515
+ required: f.required,
516
+ dimensionType: field?.type as string | undefined,
517
+ };
518
+ });
519
+ }
520
+ } catch {
521
+ /* parse errors are warnings; matches in-process */
522
+ }
523
+ }
524
+
525
+ const views = (sourceObj as StructDef).fields
526
+ .filter((f) => f.type === "turtle")
527
+ .filter((turtle) =>
528
+ (turtle as TurtleDef).pipeline
529
+ .map((stage) => stage.type)
530
+ .every((type) => type === "reduce"),
531
+ )
532
+ .map((turtle) => ({
533
+ name: turtle.as || turtle.name,
534
+ annotations: turtle?.annotation?.blockNotes
535
+ ?.filter((note) => note.at.url.includes(modelPath))
536
+ .map((note) => note.text),
537
+ }));
538
+
539
+ return {
540
+ name: sourceName,
541
+ annotations,
542
+ views,
543
+ filters,
544
+ givens,
545
+ } as ApiSourceWire;
546
+ });
547
+
548
+ return { sources, filterMap };
549
+ }
550
+
551
+ function extractQueries(modelPath: string, modelDef: ModelDef): ApiQueryWire[] {
552
+ const isNamedQuery = (obj: NamedModelObject): obj is NamedQueryDef =>
553
+ obj.type === "query";
554
+ return Object.values(modelDef.contents)
555
+ .filter(isNamedQuery)
556
+ .map((q) => ({
557
+ name: q.as || q.name,
558
+ sourceName: typeof q.structRef === "string" ? q.structRef : undefined,
559
+ annotations: q?.annotation?.blockNotes
560
+ ?.filter((note: { at: { url: string } }) =>
561
+ note.at.url.includes(modelPath),
562
+ )
563
+ .map((note: { text: string }) => note.text),
564
+ }));
565
+ }
566
+
567
+ function buildRuntimeForModel(
568
+ job: LoadPackageRequest,
569
+ malloyConfig: MalloyConfig,
570
+ jobId: string,
571
+ ): { runtime: Runtime; urlReader: HackyDataStylesAccumulator } {
572
+ const urlReader = new HackyDataStylesAccumulator(makeWorkerUrlReader(jobId));
573
+ const runtime = new Runtime({
574
+ urlReader,
575
+ config: malloyConfig,
576
+ buildManifest:
577
+ job.buildManifest !== undefined && job.buildManifest !== null
578
+ ? {
579
+ entries: job.buildManifest as Record<
580
+ string,
581
+ BuildManifestEntry
582
+ >,
583
+ strict: false,
584
+ }
585
+ : undefined,
586
+ });
587
+ return { runtime, urlReader };
588
+ }
589
+
590
+ async function compileMalloyModel(
591
+ job: LoadPackageRequest,
592
+ malloyConfig: MalloyConfig,
593
+ modelPath: string,
594
+ ): Promise<SerializedModel> {
595
+ const compileStart = performance.now();
596
+ const fullPath = path.join(job.packagePath, modelPath);
597
+ // `pathToFileURL` produces a valid URL on every platform; the
598
+ // naïve `file://${fullPath}` template parses host=`D:` on Windows.
599
+ const modelURL = pathToFileURL(fullPath);
600
+ const importBaseURL = new URL(".", modelURL);
601
+
602
+ const { runtime, urlReader } = buildRuntimeForModel(
603
+ job,
604
+ malloyConfig,
605
+ job.requestId,
606
+ );
607
+ const mm = runtime.loadModel(modelURL, { importBaseURL });
608
+ const compiled = await mm.getModel();
609
+ const modelDef = compiled._modelDef;
610
+
611
+ const malloyGivens = Array.from(compiled.givens.values());
612
+ const givens =
613
+ malloyGivens.length > 0
614
+ ? malloyGivens.map((g) => malloyGivenToApi(g as MalloyGiven))
615
+ : undefined;
616
+
617
+ const { sourceInfos, importedNames } = await collectImportedSourceInfos(
618
+ modelDef,
619
+ runtime,
620
+ importBaseURL,
621
+ );
622
+ appendLocalSourceInfos(modelDef, sourceInfos, importedNames);
623
+
624
+ const { sources, filterMap } = extractSources(modelPath, modelDef, givens);
625
+ const queries = extractQueries(modelPath, modelDef);
626
+
627
+ return {
628
+ modelPath,
629
+ modelType: "model",
630
+ modelDef,
631
+ modelInfo: modelDefToModelInfo(modelDef),
632
+ sourceInfos,
633
+ sources,
634
+ queries,
635
+ filterMap: Array.from(filterMap.entries()),
636
+ givens,
637
+ dataStyles: urlReader.getHackyAccumulatedDataStyles(),
638
+ compileDurationMs: performance.now() - compileStart,
639
+ };
640
+ }
641
+
642
+ async function compileNotebookModel(
643
+ job: LoadPackageRequest,
644
+ malloyConfig: MalloyConfig,
645
+ modelPath: string,
646
+ ): Promise<SerializedModel> {
647
+ const compileStart = performance.now();
648
+ const fullPath = path.join(job.packagePath, modelPath);
649
+ // See compileMalloyModel above: `pathToFileURL` is the only
650
+ // cross-platform way to build a file URL from an OS path.
651
+ const modelURL = pathToFileURL(fullPath);
652
+ const importBaseURL = new URL(".", modelURL);
653
+
654
+ const { runtime, urlReader } = buildRuntimeForModel(
655
+ job,
656
+ malloyConfig,
657
+ job.requestId,
658
+ );
659
+
660
+ const fileContents = await fs.promises.readFile(modelURL, "utf8");
661
+ const parse = MalloySQLParser.parse(fileContents, modelPath);
662
+
663
+ // Build the extendModel chain synchronously so per-cell materializers
664
+ // line up with statement order. Matches the in-process flow in
665
+ // Model.getNotebookModelMaterializer.
666
+ let mm: ModelMaterializer | undefined;
667
+ const perCellMM: (ModelMaterializer | undefined)[] = parse.statements.map(
668
+ (stmt) => {
669
+ if (stmt.type === MalloySQLStatementType.MALLOY) {
670
+ mm =
671
+ mm === undefined
672
+ ? runtime.loadModel(stmt.text, { importBaseURL })
673
+ : mm.extendModel(stmt.text, { importBaseURL });
674
+ }
675
+ return mm;
676
+ },
677
+ );
678
+
679
+ const oldImports: string[] = [];
680
+ const oldSources: Record<string, Malloy.SourceInfo> = {};
681
+ const notebookCells: SerializedNotebookCell[] = [];
682
+ for (let i = 0; i < parse.statements.length; i++) {
683
+ const stmt = parse.statements[i];
684
+ if (stmt.type === MalloySQLStatementType.MARKDOWN) {
685
+ notebookCells.push({ type: "markdown", text: stmt.text });
686
+ continue;
687
+ }
688
+ if (stmt.type !== MalloySQLStatementType.MALLOY) continue;
689
+
690
+ const localMM = perCellMM[i];
691
+ if (!localMM) {
692
+ // Shouldn't happen for a MALLOY statement, but guard rather
693
+ // than crash a whole notebook compile on one corrupt cell.
694
+ continue;
695
+ }
696
+ const currentModelDef = (await localMM.getModel())._modelDef;
697
+
698
+ // newSources via the import chain — mirrors in-process logic.
699
+ let newSources: Malloy.SourceInfo[] = [];
700
+ const newImports = currentModelDef.imports?.slice(oldImports.length);
701
+ if (newImports) {
702
+ for (const importLocation of newImports) {
703
+ try {
704
+ const modelString = await runtime.urlReader.readURL(
705
+ new URL(importLocation.importURL),
706
+ );
707
+ const importModel = (
708
+ await runtime
709
+ .loadModel(modelString as string, { importBaseURL })
710
+ .getModel()
711
+ )._modelDef;
712
+ const importInfo = modelDefToModelInfo(importModel);
713
+ newSources = importInfo.entries
714
+ .filter((e) => e.kind === "source")
715
+ .filter(
716
+ (s) => !(s.name in oldSources),
717
+ ) as Malloy.SourceInfo[];
718
+ oldImports.push(importLocation.importURL.toString());
719
+ } catch {
720
+ // Same best-effort policy as the in-process path.
721
+ }
722
+ }
723
+ }
724
+ const currentInfo = modelDefToModelInfo(currentModelDef);
725
+ newSources = newSources.concat(
726
+ currentInfo.entries
727
+ .filter((e) => e.kind === "source")
728
+ .filter((s) => !(s.name in oldSources)) as Malloy.SourceInfo[],
729
+ );
730
+ for (const s of newSources) oldSources[s.name] = s;
731
+
732
+ // Capture the per-cell final-query queryDef so the main thread can
733
+ // hydrate a QueryMaterializer via
734
+ // `ModelMaterializer._loadQueryFromQueryDef` without a recompile.
735
+ const runnable = localMM.loadFinalQuery();
736
+ let cellQueryDef: Query | undefined;
737
+ let queryInfo: Malloy.QueryInfo | undefined;
738
+ try {
739
+ const prepared = await runnable.getPreparedQuery();
740
+ cellQueryDef = prepared._query;
741
+ const queryName =
742
+ (prepared._query as NamedQueryDef).as ||
743
+ (prepared._query as NamedQueryDef).name;
744
+ const anonymous =
745
+ currentInfo.anonymous_queries[
746
+ currentInfo.anonymous_queries.length - 1
747
+ ];
748
+ if (anonymous) {
749
+ queryInfo = {
750
+ name: queryName,
751
+ schema: anonymous.schema,
752
+ annotations: anonymous.annotations,
753
+ definition: anonymous.definition,
754
+ code: anonymous.code,
755
+ location: anonymous.location,
756
+ } as Malloy.QueryInfo;
757
+ }
758
+ } catch {
759
+ // Some cells (source-only) have no final query; that's fine.
760
+ }
761
+
762
+ notebookCells.push({
763
+ type: "code",
764
+ text: stmt.text,
765
+ cellModelDef: currentModelDef,
766
+ cellQueryDef,
767
+ newSources,
768
+ queryInfo,
769
+ });
770
+ }
771
+
772
+ // Aggregate (notebook-level) artifacts — derived from the final mm
773
+ // if any MALLOY statements were present. If the notebook is all
774
+ // markdown, modelDef stays undefined and the main thread treats
775
+ // this as a notebook with no compiled content.
776
+ let finalModelDef: ModelDef | undefined;
777
+ let finalSources: ApiSourceWire[] | undefined;
778
+ let finalQueries: ApiQueryWire[] | undefined;
779
+ let finalSourceInfos: Malloy.SourceInfo[] | undefined;
780
+ let finalFilterMap: Map<string, FilterDefinition[]> | undefined;
781
+ let finalGivens: ApiGivenWire[] | undefined;
782
+ if (mm) {
783
+ const compiled = await mm.getModel();
784
+ finalModelDef = compiled._modelDef;
785
+ const malloyGivens = Array.from(compiled.givens.values());
786
+ finalGivens =
787
+ malloyGivens.length > 0
788
+ ? malloyGivens.map((g) => malloyGivenToApi(g as MalloyGiven))
789
+ : undefined;
790
+ const collected = await collectImportedSourceInfos(
791
+ finalModelDef,
792
+ runtime,
793
+ importBaseURL,
794
+ );
795
+ appendLocalSourceInfos(
796
+ finalModelDef,
797
+ collected.sourceInfos,
798
+ collected.importedNames,
799
+ );
800
+ finalSourceInfos = collected.sourceInfos;
801
+ const extracted = extractSources(modelPath, finalModelDef, finalGivens);
802
+ finalSources = extracted.sources;
803
+ finalFilterMap = extracted.filterMap;
804
+ finalQueries = extractQueries(modelPath, finalModelDef);
805
+ }
806
+
807
+ return {
808
+ modelPath,
809
+ modelType: "notebook",
810
+ modelDef: finalModelDef,
811
+ modelInfo: finalModelDef ? modelDefToModelInfo(finalModelDef) : undefined,
812
+ sourceInfos: finalSourceInfos,
813
+ sources: finalSources,
814
+ queries: finalQueries,
815
+ filterMap: finalFilterMap
816
+ ? Array.from(finalFilterMap.entries())
817
+ : undefined,
818
+ givens: finalGivens,
819
+ notebookCells,
820
+ dataStyles: urlReader.getHackyAccumulatedDataStyles(),
821
+ compileDurationMs: performance.now() - compileStart,
822
+ };
823
+ }
824
+
825
+ async function compileOneModel(
826
+ job: LoadPackageRequest,
827
+ malloyConfig: MalloyConfig,
828
+ modelPath: string,
829
+ ): Promise<SerializedModel> {
830
+ try {
831
+ if (modelPath.endsWith(MODEL_FILE_SUFFIX)) {
832
+ return await compileMalloyModel(job, malloyConfig, modelPath);
833
+ }
834
+ if (modelPath.endsWith(NOTEBOOK_FILE_SUFFIX)) {
835
+ return await compileNotebookModel(job, malloyConfig, modelPath);
836
+ }
837
+ return {
838
+ modelPath,
839
+ modelType: "model",
840
+ compilationError: {
841
+ name: "Error",
842
+ message: `Unknown model file suffix: ${modelPath}`,
843
+ },
844
+ };
845
+ } catch (error) {
846
+ const modelType: SerializedModel["modelType"] = modelPath.endsWith(
847
+ NOTEBOOK_FILE_SUFFIX,
848
+ )
849
+ ? "notebook"
850
+ : "model";
851
+ return {
852
+ modelPath,
853
+ modelType,
854
+ compilationError: serializeError(error),
855
+ };
856
+ }
857
+ }
858
+
859
+ // ──────────────────────────────────────────────────────────────────────
860
+ // The actual load-package job
861
+ // ──────────────────────────────────────────────────────────────────────
862
+
863
+ async function loadPackage(
864
+ job: LoadPackageRequest,
865
+ ): Promise<LoadPackageResult> {
866
+ const loadStart = performance.now();
867
+
868
+ const packageMetadata = await readPackageMetadata(job.packagePath);
869
+ const malloyConfig = buildWorkerMalloyConfig(job);
870
+
871
+ const allFiles = await listPackageFiles(job.packagePath);
872
+ const modelPaths = filterModelPaths(allFiles);
873
+ const models = await Promise.all(
874
+ modelPaths.map((modelPath) =>
875
+ compileOneModel(job, malloyConfig, modelPath),
876
+ ),
877
+ );
878
+
879
+ return {
880
+ type: "load-package-result",
881
+ requestId: job.requestId,
882
+ packageMetadata,
883
+ models,
884
+ loadDurationMs: performance.now() - loadStart,
885
+ };
886
+ }
887
+
888
+ // ──────────────────────────────────────────────────────────────────────
889
+ // Error serialization
890
+ // ──────────────────────────────────────────────────────────────────────
891
+
892
+ function serializeError(error: unknown): SerializedError {
893
+ if (error instanceof MalloyError) {
894
+ return {
895
+ name: error.name,
896
+ message: error.message,
897
+ stack: error.stack,
898
+ malloyProblems: error.problems as unknown[],
899
+ isCompilationError: true,
900
+ };
901
+ }
902
+ if (error instanceof Error) {
903
+ return {
904
+ name: error.name,
905
+ message: error.message,
906
+ stack: error.stack,
907
+ };
908
+ }
909
+ return { name: "Error", message: String(error) };
910
+ }
911
+
912
+ function deserializeError(serialized: SerializedError): Error {
913
+ const err = new Error(serialized.message);
914
+ err.name = serialized.name;
915
+ if (serialized.stack) err.stack = serialized.stack;
916
+ return err;
917
+ }
918
+
919
+ // ──────────────────────────────────────────────────────────────────────
920
+ // Message dispatcher
921
+ // ──────────────────────────────────────────────────────────────────────
922
+
923
+ let shuttingDown = false;
924
+ const inFlightJobs = new Set<string>();
925
+
926
+ port.on("message", (message: MainToWorkerMessage) => {
927
+ if (message.type === "shutdown") {
928
+ shuttingDown = true;
929
+ maybeExit();
930
+ return;
931
+ }
932
+ if (message.type === "load-package") {
933
+ if (shuttingDown) {
934
+ const errMsg: LoadPackageError = {
935
+ type: "load-package-error",
936
+ requestId: message.requestId,
937
+ error: {
938
+ name: "ShuttingDown",
939
+ message: "Package-load worker is shutting down",
940
+ },
941
+ };
942
+ port.postMessage(errMsg);
943
+ return;
944
+ }
945
+ inFlightJobs.add(message.requestId);
946
+ void runJob(message);
947
+ return;
948
+ }
949
+ dispatchMainResponse(message);
950
+ });
951
+
952
+ async function runJob(job: LoadPackageRequest): Promise<void> {
953
+ try {
954
+ const result = await loadPackage(job);
955
+ port.postMessage(result);
956
+ } catch (error) {
957
+ const errMsg: LoadPackageError = {
958
+ type: "load-package-error",
959
+ requestId: job.requestId,
960
+ error: serializeError(error),
961
+ };
962
+ port.postMessage(errMsg);
963
+ } finally {
964
+ inFlightJobs.delete(job.requestId);
965
+ maybeExit();
966
+ }
967
+ }
968
+
969
+ function maybeExit(): void {
970
+ if (shuttingDown && inFlightJobs.size === 0 && pendingRpc.size === 0) {
971
+ // Give the postMessage queue a tick to flush before exit so the
972
+ // last result actually reaches the parent.
973
+ setImmediate(() => process.exit(0));
974
+ }
975
+ }
976
+
977
+ // Announce readiness — the pool waits for this before dispatching jobs
978
+ // to a newly-spawned worker so we don't race the worker's module-init
979
+ // time.
980
+ port.postMessage({ type: "ready" });