@malloy-publisher/server 0.0.198-dev4 → 0.0.198-dev6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,61 +1,103 @@
1
1
  /**
2
- * Compile worker entry point.
2
+ * Package-load worker entry point.
3
3
  *
4
- * Runs inside a worker_threads `Worker`. Owns no DuckDB / native
5
- * connection state — every schema lookup is proxied back to the
6
- * main thread via {@link SchemaForTablesRequest} / {@link
7
- * SchemaForSqlRequest}. The point of this file is to take the
8
- * dominant CPU cost of a Malloy compile (parser, type checker, IR
9
- * builder, sourceInfo extraction) off the main event loop so the
10
- * Kubernetes liveness probe on `/health/liveness` never gets parked
11
- * behind a multi-second compile.
4
+ * Runs inside a worker_threads `Worker`. Owns **no native connection
5
+ * state of any kind** — every connection lookup, schema fetch, SQL
6
+ * block schema, and non-file URL read is proxied back to the main
7
+ * thread via correlated RPC messages. The worker is intentionally
8
+ * pure-CPU: only Malloy parser / type-checker / IR-builder runs here.
12
9
  *
13
- * Contract:
14
- * - Receives {@link CompileJobRequest} messages from the parent
15
- * port. Dispatches one compile per message.
16
- * - Proxies schema and URL-reader operations back to the parent
17
- * via correlated RPC requests; awaits matching responses.
18
- * - Sends back exactly one {@link CompileJobResult} or {@link
19
- * CompileJobError} per job.
20
- * - Honours a graceful {@link ShutdownRequest} so the pool can
21
- * drain on SIGTERM.
10
+ * Why no DuckDB in the worker?
11
+ * ----------------------------
12
+ * DuckDB's native bindings cannot be safely loaded into more than one
13
+ * isolate of the same Node/Bun process (we hit Bun crash 0x20131 when
14
+ * the worker isolate and the main isolate both `dlopen` duckdb-native).
15
+ * Even if Bun fixes that, holding a duckdb handle in the worker
16
+ * duplicates the in-memory DB state and adds native-module load
17
+ * latency to every worker spawn. Database probing (`readDatabases`)
18
+ * stays on the main thread where it can reuse the package's existing
19
+ * DuckDB connection (see PR #772).
22
20
  *
23
- * This file is bundled separately by build.ts and shipped as
24
- * `dist/compile_worker.mjs`.
21
+ * Boundary
22
+ * --------
23
+ * 1. Worker: read `publisher.json` (package manifest).
24
+ * 2. Worker: compile every `.malloy` and `.malloynb` via Malloy.
25
+ * All `lookupConnection(name)` / schema-fetch calls Malloy makes
26
+ * during compile are proxied to the main thread's live
27
+ * connection pool over RPC.
28
+ * 3. Worker: for each model, capture the structured-clonable fields
29
+ * the main-thread `Model` constructor needs: `modelDef`,
30
+ * `sourceInfos`, `sources`, `queries`, `filterMap`, `givens`,
31
+ * dataStyles, plus per-cell `modelDef` + `queryDef` for
32
+ * notebooks (so per-cell materializers / runnables can be
33
+ * hydrated on the main thread via `Runtime._loadModelFromModelDef`
34
+ * / `ModelMaterializer._loadQueryFromQueryDef` — no recompile).
35
+ * 4. Main thread: probe embedded `.parquet` / `.csv` databases
36
+ * against the package's existing DuckDB connection.
37
+ *
38
+ * Per-model compile failures are returned in-band on
39
+ * `SerializedModel.compilationError`; the rest of the package keeps
40
+ * loading. Whole-package failures (e.g. manifest missing) come back
41
+ * as `LoadPackageError`.
42
+ *
43
+ * Bundled separately by `build.ts` as `dist/package_load_worker.mjs`
44
+ * so `new Worker(...)` can load it without dragging in the entire
45
+ * server module graph.
25
46
  */
26
47
  import {
27
48
  contextOverlay,
28
- MalloyConfig,
29
- MalloyError,
30
- Runtime,
31
- isSourceDef,
32
- modelDefToModelInfo,
33
49
  type Annotation,
50
+ type BuildManifestEntry,
34
51
  type Connection,
35
52
  type FetchSchemaOptions,
36
53
  type LookupConnection,
54
+ MalloyConfig,
55
+ MalloyError,
37
56
  type ModelDef,
38
57
  type ModelMaterializer,
58
+ modelDefToModelInfo,
39
59
  type NamedModelObject,
40
60
  type NamedQueryDef,
61
+ type Query,
62
+ Runtime,
41
63
  type SQLSourceDef,
42
64
  type SQLSourceRequest,
43
65
  type StructDef,
44
66
  type TableSourceDef,
45
67
  type TurtleDef,
68
+ isSourceDef,
46
69
  } from "@malloydata/malloy";
47
70
  import * as Malloy from "@malloydata/malloy-interfaces";
71
+ import {
72
+ MalloySQLParser,
73
+ MalloySQLStatementType,
74
+ } from "@malloydata/malloy-sql";
48
75
  import * as fs from "fs";
76
+ import * as path from "path";
49
77
  import { parentPort, threadId } from "node:worker_threads";
50
- import { fileURLToPath } from "url";
78
+ import recursive from "recursive-readdir";
79
+ import { fileURLToPath, pathToFileURL } from "url";
80
+
81
+ import {
82
+ MODEL_FILE_SUFFIX,
83
+ NOTEBOOK_FILE_SUFFIX,
84
+ PACKAGE_MANIFEST_NAME,
85
+ } from "../constants";
86
+ import { HackyDataStylesAccumulator } from "../data_styles";
51
87
  import { parseFilters, type FilterDefinition } from "../service/filter";
88
+ import {
89
+ malloyGivenToApi,
90
+ type MalloyGiven,
91
+ type MalloyGivenApi,
92
+ } from "../service/given";
93
+ import { ignoreDotfiles } from "../utils";
52
94
  import type {
53
- CompileJobError,
54
- CompileJobRequest,
55
- CompileJobResult,
56
95
  ConnectionMetadata,
57
96
  ConnectionMetadataRequest,
58
97
  ConnectionMetadataResponse,
98
+ LoadPackageError,
99
+ LoadPackageRequest,
100
+ LoadPackageResult,
59
101
  MainToWorkerMessage,
60
102
  ReadUrlRequest,
61
103
  ReadUrlResponse,
@@ -64,11 +106,13 @@ import type {
64
106
  SchemaForTablesRequest,
65
107
  SchemaForTablesResponse,
66
108
  SerializedError,
109
+ SerializedModel,
110
+ SerializedNotebookCell,
67
111
  } from "./protocol";
68
112
 
69
113
  if (!parentPort) {
70
114
  throw new Error(
71
- "compile_worker.ts must be loaded inside a worker_threads Worker",
115
+ "package_load_worker.ts must be loaded inside a worker_threads Worker",
72
116
  );
73
117
  }
74
118
 
@@ -123,17 +167,9 @@ function dispatchMainResponse(message: MainToWorkerMessage): void {
123
167
  }
124
168
 
125
169
  // ──────────────────────────────────────────────────────────────────────
126
- // Stub InfoConnection that proxies schema fetches to the main thread
170
+ // Proxy connection: stand-in for non-duckdb connections at compile time
127
171
  // ──────────────────────────────────────────────────────────────────────
128
172
 
129
- /**
130
- * Minimal `Connection` implementation that satisfies Malloy's compile
131
- * pipeline. Only the methods called during compile are implemented
132
- * meaningfully; the rest throw, since the worker never executes SQL.
133
- *
134
- * Holds the `jobId` so the main thread can route the schema RPC to
135
- * the right environment-side `MalloyConfig`.
136
- */
137
173
  class ProxyConnection {
138
174
  public readonly name: string;
139
175
  public readonly dialectName: string;
@@ -190,21 +226,19 @@ class ProxyConnection {
190
226
  };
191
227
  port.postMessage(req);
192
228
  });
193
- if (response.error !== undefined) {
194
- return { error: response.error };
195
- }
229
+ if (response.error !== undefined) return { error: response.error };
196
230
  if (response.structDef === undefined) {
197
231
  return { error: "Empty SQL schema response from main thread" };
198
232
  }
199
233
  return { structDef: response.structDef };
200
234
  }
201
235
 
202
- // Compile path never calls these. We intentionally throw rather
203
- // than silently no-op so a misrouted query in a worker surfaces
204
- // as a loud error rather than a wrong-answer bug.
236
+ // Compile path never calls these on a non-duckdb connection (the
237
+ // worker doesn't execute non-duckdb SQL). We throw rather than no-op
238
+ // so a misrouted call surfaces loudly.
205
239
  async runSQL(): Promise<never> {
206
240
  throw new Error(
207
- `ProxyConnection(${this.name}): runSQL is not available in compile workers`,
241
+ `ProxyConnection(${this.name}): runSQL is not available in package-load workers`,
208
242
  );
209
243
  }
210
244
  isPool(): false {
@@ -224,7 +258,7 @@ class ProxyConnection {
224
258
  }
225
259
  async estimateQueryCost(): Promise<never> {
226
260
  throw new Error(
227
- `ProxyConnection(${this.name}): estimateQueryCost not available in compile workers`,
261
+ `ProxyConnection(${this.name}): estimateQueryCost not available in package-load workers`,
228
262
  );
229
263
  }
230
264
  async fetchMetadata(): Promise<Record<string, unknown>> {
@@ -250,19 +284,18 @@ function serializeFetchOptions(options: FetchSchemaOptions): {
250
284
  }
251
285
 
252
286
  // ──────────────────────────────────────────────────────────────────────
253
- // URLReader: try fs first, fall back to main-thread RPC for non-file URLs
287
+ // URLReader: file:// fs; everything else proxies to main thread
254
288
  // ──────────────────────────────────────────────────────────────────────
255
289
 
256
- function makeWorkerUrlReader(jobId: string) {
290
+ function makeWorkerUrlReader(jobId: string): {
291
+ readURL: (url: URL) => Promise<string>;
292
+ } {
257
293
  return {
258
294
  readURL: async (url: URL): Promise<string> => {
259
295
  if (url.protocol === "file:") {
260
296
  const filePath = fileURLToPath(url);
261
297
  return fs.promises.readFile(filePath, "utf8");
262
298
  }
263
- // Non-file URL — delegate to main so semantics stay
264
- // identical to the in-process URL_READER (e.g. allow
265
- // future https:// imports).
266
299
  const response = await callMain<ReadUrlResponse>((requestId) => {
267
300
  const req: ReadUrlRequest = {
268
301
  type: "read-url",
@@ -281,25 +314,25 @@ function makeWorkerUrlReader(jobId: string) {
281
314
  // MalloyConfig assembly inside the worker
282
315
  // ──────────────────────────────────────────────────────────────────────
283
316
 
284
- function buildWorkerMalloyConfig(job: CompileJobRequest): MalloyConfig {
285
- // Connections are resolved lazily on first lookup via a metadata
286
- // RPC back to the main threadsee ConnectionMetadataRequest.
287
- // We never enumerate the connection list upfront; the package
288
- // layer doesn't always have one (e.g. environment-wrapped
289
- // connections appear only when Malloy compiles a `table('...')`
290
- // reference that names them).
291
- //
292
- // Concurrent lookups for the same name are deduped via
293
- // `inflight` Malloy's compile pipeline can fan-out multiple
294
- // schema fetches that all hit `lookupConnection(name)` before
295
- // any of them resolve, and we don't want to N-multiply the RPC.
317
+ /**
318
+ * Build the package's MalloyConfig inside the worker. Every
319
+ * connection name including `"duckdb"`resolves to a
320
+ * {@link ProxyConnection} that RPCs schema fetches back to the main
321
+ * thread. The worker never holds a native connection handle, which
322
+ * keeps it pure-CPU and avoids dlopen'ing duckdb in a second isolate.
323
+ *
324
+ * Concurrent `lookupConnection(name)` calls for the same name are
325
+ * deduped via `inflight` Malloy's compile pipeline can fan out
326
+ * many schema fetches that all hit `lookupConnection` before any
327
+ * resolve, and we don't want to N-multiply the metadata RPC.
328
+ */
329
+ function buildWorkerMalloyConfig(job: LoadPackageRequest): MalloyConfig {
296
330
  const proxies = new Map<string, ProxyConnection>();
297
331
  const inflight = new Map<string, Promise<ProxyConnection>>();
332
+
298
333
  const config = new MalloyConfig(
299
334
  { connections: {} },
300
- {
301
- config: contextOverlay({ rootDirectory: job.packagePath }),
302
- },
335
+ { config: contextOverlay({ rootDirectory: job.packagePath }) },
303
336
  );
304
337
  config.wrapConnections(
305
338
  (_base: LookupConnection<Connection>): LookupConnection<Connection> => ({
@@ -331,8 +364,7 @@ function buildWorkerMalloyConfig(job: CompileJobRequest): MalloyConfig {
331
364
  })();
332
365
  inflight.set(effectiveName, pending);
333
366
  }
334
- const proxy = await pending;
335
- return proxy as unknown as Connection;
367
+ return (await pending) as unknown as Connection;
336
368
  },
337
369
  }),
338
370
  );
@@ -340,96 +372,82 @@ function buildWorkerMalloyConfig(job: CompileJobRequest): MalloyConfig {
340
372
  }
341
373
 
342
374
  // ──────────────────────────────────────────────────────────────────────
343
- // The actual compile mirrors Model.create's in-process flow but
344
- // only the parts that produce data shippable across postMessage.
375
+ // Filesystem helpers (replicated from service/package.ts so the worker
376
+ // stays decoupled from the main-thread service module graph)
345
377
  // ──────────────────────────────────────────────────────────────────────
346
378
 
347
- async function compile(job: CompileJobRequest): Promise<CompileJobResult> {
348
- const compileStart = performance.now();
379
+ async function readPackageMetadata(
380
+ packagePath: string,
381
+ ): Promise<{ name?: string; description?: string }> {
382
+ const manifestPath = path.join(packagePath, PACKAGE_MANIFEST_NAME);
383
+ const contents = await fs.promises.readFile(manifestPath, "utf8");
384
+ const parsed = JSON.parse(contents) as {
385
+ name?: string;
386
+ description?: string;
387
+ };
388
+ return { name: parsed.name, description: parsed.description };
389
+ }
349
390
 
350
- const malloyConfig = buildWorkerMalloyConfig(job);
351
- const urlReader = makeWorkerUrlReader(job.requestId);
391
+ async function listPackageFiles(packagePath: string): Promise<string[]> {
392
+ const files = await recursive(packagePath, [ignoreDotfiles]);
393
+ return files.map((full: string) =>
394
+ path.relative(packagePath, full).replace(/\\/g, "/"),
395
+ );
396
+ }
352
397
 
353
- const runtime = new Runtime({
354
- urlReader,
355
- config: malloyConfig,
356
- // job.buildManifest is wire-typed as `unknown` because the
357
- // worker protocol doesn't depend on Malloy types — assert
358
- // the shape Malloy's Runtime expects here. We only pass the
359
- // wrapper when the caller actually supplied entries.
360
- buildManifest:
361
- job.buildManifest !== undefined && job.buildManifest !== null
362
- ? {
363
- entries: job.buildManifest as Record<
364
- string,
365
- import("@malloydata/malloy").BuildManifestEntry
366
- >,
367
- strict: false,
368
- }
369
- : undefined,
370
- });
398
+ function filterModelPaths(allRelative: string[]): string[] {
399
+ return allRelative.filter(
400
+ (p) => p.endsWith(MODEL_FILE_SUFFIX) || p.endsWith(NOTEBOOK_FILE_SUFFIX),
401
+ );
402
+ }
371
403
 
372
- // Two compile shapes:
373
- // 1. File-backed: `modelPath` resolves to a file:// URL the runtime
374
- // reads via urlReader. The importBaseURL is the model's
375
- // directory.
376
- // 2. Inline-source: `inlineSource` is a Malloy string the runtime
377
- // compiles directly. Mostly used by synthesized snippets like
378
- // `source: temp is duckdb.table('<path>')` from the package
379
- // database-info probe. We use the caller-provided importBaseURL
380
- // (or fall back to the package root) so any `import "…"`
381
- // statements in the snippet resolve correctly.
382
- const isInline = typeof job.inlineSource === "string";
383
- if (!isInline && typeof job.modelPath !== "string") {
384
- throw new Error(
385
- "CompileJobRequest must supply either inlineSource or modelPath",
386
- );
387
- }
388
- const importBaseURL = isInline
389
- ? new URL(job.importBaseURL ?? `file://${job.packagePath}/`)
390
- : new URL(".", new URL(`file://${job.packagePath}/${job.modelPath}`));
391
-
392
- const mm: ModelMaterializer = isInline
393
- ? runtime.loadModel(job.inlineSource as string, { importBaseURL })
394
- : runtime.loadModel(
395
- new URL(`file://${job.packagePath}/${job.modelPath}`),
396
- { importBaseURL },
397
- );
398
-
399
- const compiledModel = await mm.getModel();
400
- const modelDef = compiledModel._modelDef as ModelDef;
401
-
402
- // Givens — converted to API shape here so the main thread can
403
- // stash them on the Model without needing Malloy's MalloyGiven
404
- // type (which has non-serializable methods).
405
- const malloyGivens = Array.from(compiledModel.givens.values());
406
- const givens: ApiGivenWire[] | undefined =
407
- malloyGivens.length > 0
408
- ? malloyGivens.map((g) => malloyGivenToWire(g))
409
- : undefined;
404
+ // ──────────────────────────────────────────────────────────────────────
405
+ // Model compile (mirrors service/model.ts but produces SerializedModel)
406
+ // ──────────────────────────────────────────────────────────────────────
410
407
 
411
- // Imported sourceInfos — mirrors Model.create line 199–242. We
412
- // collect them here so the main thread doesn't have to recompile
413
- // imports just to fill in the response.
408
+ interface ApiSourceWire {
409
+ name: string;
410
+ annotations?: string[];
411
+ views?: { name: string; annotations?: string[] }[];
412
+ filters?: unknown[];
413
+ givens?: unknown[];
414
+ }
415
+ interface ApiQueryWire {
416
+ name: string;
417
+ sourceName?: string;
418
+ annotations?: string[];
419
+ }
420
+ type ApiGivenWire = MalloyGivenApi;
421
+
422
+ async function collectImportedSourceInfos(
423
+ modelDef: ModelDef,
424
+ runtime: Runtime,
425
+ importBaseURL: URL,
426
+ ): Promise<{
427
+ sourceInfos: Malloy.SourceInfo[];
428
+ importedNames: Set<string>;
429
+ }> {
414
430
  const sourceInfos: Malloy.SourceInfo[] = [];
415
- const importedSourceNames = new Set<string>();
431
+ const importedNames = new Set<string>();
416
432
  const imports = modelDef.imports ?? [];
417
433
  for (const importLocation of imports) {
418
434
  try {
419
- const modelString = await urlReader.readURL(
435
+ const modelString = await runtime.urlReader.readURL(
420
436
  new URL(importLocation.importURL),
421
437
  );
422
438
  const importedModelDef = (
423
- await runtime.loadModel(modelString, { importBaseURL }).getModel()
439
+ await runtime
440
+ .loadModel(modelString as string, { importBaseURL })
441
+ .getModel()
424
442
  )._modelDef;
425
443
  const importedInfo = modelDefToModelInfo(importedModelDef);
426
444
  const importedSources = importedInfo.entries.filter(
427
445
  (entry) => entry.kind === "source",
428
446
  ) as Malloy.SourceInfo[];
429
447
  for (const source of importedSources) {
430
- if (!importedSourceNames.has(source.name)) {
448
+ if (!importedNames.has(source.name)) {
431
449
  sourceInfos.push(source);
432
- importedSourceNames.add(source.name);
450
+ importedNames.add(source.name);
433
451
  }
434
452
  }
435
453
  } catch {
@@ -437,102 +455,30 @@ async function compile(job: CompileJobRequest): Promise<CompileJobResult> {
437
455
  // of warning-and-skipping when an import can't be loaded.
438
456
  }
439
457
  }
458
+ return { sourceInfos, importedNames };
459
+ }
460
+
461
+ function appendLocalSourceInfos(
462
+ modelDef: ModelDef,
463
+ target: Malloy.SourceInfo[],
464
+ importedNames: Set<string>,
465
+ ): void {
440
466
  const localInfo = modelDefToModelInfo(modelDef);
441
467
  const localSources = localInfo.entries.filter(
442
468
  (entry) => entry.kind === "source",
443
469
  ) as Malloy.SourceInfo[];
444
470
  for (const source of localSources) {
445
- if (!importedSourceNames.has(source.name)) {
446
- sourceInfos.push(source);
447
- }
471
+ if (!importedNames.has(source.name)) target.push(source);
448
472
  }
449
-
450
- // Inline-source compiles have no on-disk modelPath. extractSources /
451
- // extractQueries use modelPath only to filter annotations by the URL
452
- // they came from; the inline path has no such annotations to filter,
453
- // so `""` (matches everything via `includes`) is the correct neutral.
454
- const modelPathForAnnotations = job.modelPath ?? "";
455
- const { sources, filterMap } = extractSources(
456
- modelPathForAnnotations,
457
- modelDef,
458
- );
459
- const queries = extractQueries(modelPathForAnnotations, modelDef);
460
- const filterMapEntries: Array<[string, FilterDefinition[]]> = Array.from(
461
- filterMap.entries(),
462
- );
463
-
464
- return {
465
- type: "compile-result",
466
- requestId: job.requestId,
467
- modelDef,
468
- sourceInfos,
469
- sources,
470
- queries,
471
- filterMap: filterMapEntries,
472
- givens,
473
- // dataStyles: the in-process HackyDataStylesAccumulator is fed
474
- // by the URLReader. We don't reuse it here — main thread will
475
- // accumulate its own when it builds the lazy materializer.
476
- dataStyles: {},
477
- compileDurationMs: performance.now() - compileStart,
478
- };
479
- }
480
-
481
- /**
482
- * Wire-friendly mirror of the publisher's `ApiGiven`. Inlined here so
483
- * the worker doesn't import the OpenAPI-generated `components` map
484
- * (which would drag the whole api.ts surface into the worker bundle).
485
- * Kept structurally identical to `ApiGiven` so the main thread can
486
- * type-assert it without conversion.
487
- */
488
- interface ApiGivenWire {
489
- name: string;
490
- type: string;
491
- annotations?: string[];
492
- }
493
-
494
- interface MalloyGivenLike {
495
- name: string;
496
- type: { type: string; filterType?: string };
497
- getTaglines(regex: RegExp): string[];
498
- }
499
-
500
- function malloyGivenToWire(given: MalloyGivenLike): ApiGivenWire {
501
- const t = given.type;
502
- const renderedType =
503
- t.type === "filter expression" ? `filter<${t.filterType}>` : t.type;
504
- return {
505
- name: given.name,
506
- type: renderedType,
507
- annotations: given.getTaglines(/^#\(/),
508
- };
509
- }
510
-
511
- // ──────────────────────────────────────────────────────────────────────
512
- // extractSources / extractQueries — direct copies of the static
513
- // helpers in Model.ts. Inlined here to keep the worker independent
514
- // of the main-thread module graph (smaller bundle, fewer imports of
515
- // things that pull in DuckDB or AWS SDK by transitive include).
516
- // ──────────────────────────────────────────────────────────────────────
517
-
518
- interface ApiSource {
519
- name: string;
520
- annotations?: string[];
521
- views?: { name: string; annotations?: string[] }[];
522
- filters?: unknown[];
523
- }
524
- interface ApiQuery {
525
- name: string;
526
- sourceName?: string;
527
- annotations?: string[];
528
473
  }
529
474
 
530
475
  function extractSources(
531
476
  modelPath: string,
532
477
  modelDef: ModelDef,
533
- ): { sources: ApiSource[]; filterMap: Map<string, FilterDefinition[]> } {
478
+ givens: ApiGivenWire[] | undefined,
479
+ ): { sources: ApiSourceWire[]; filterMap: Map<string, FilterDefinition[]> } {
534
480
  const filterMap = new Map<string, FilterDefinition[]>();
535
- const sources: ApiSource[] = Object.values(modelDef.contents)
481
+ const sources: ApiSourceWire[] = Object.values(modelDef.contents)
536
482
  .filter((obj) => isSourceDef(obj))
537
483
  .map((sourceObj) => {
538
484
  const sourceName =
@@ -572,8 +518,7 @@ function extractSources(
572
518
  });
573
519
  }
574
520
  } catch {
575
- // Mirrors the in-process behaviour: filter parse
576
- // errors are warnings, not fatal compile failures.
521
+ /* parse errors are warnings; matches in-process */
577
522
  }
578
523
  }
579
524
 
@@ -596,13 +541,14 @@ function extractSources(
596
541
  annotations,
597
542
  views,
598
543
  filters,
599
- } as ApiSource;
544
+ givens,
545
+ } as ApiSourceWire;
600
546
  });
601
547
 
602
548
  return { sources, filterMap };
603
549
  }
604
550
 
605
- function extractQueries(modelPath: string, modelDef: ModelDef): ApiQuery[] {
551
+ function extractQueries(modelPath: string, modelDef: ModelDef): ApiQueryWire[] {
606
552
  const isNamedQuery = (obj: NamedModelObject): obj is NamedQueryDef =>
607
553
  obj.type === "query";
608
554
  return Object.values(modelDef.contents)
@@ -618,17 +564,333 @@ function extractQueries(modelPath: string, modelDef: ModelDef): ApiQuery[] {
618
564
  }));
619
565
  }
620
566
 
567
+ function buildRuntimeForModel(
568
+ job: LoadPackageRequest,
569
+ malloyConfig: MalloyConfig,
570
+ jobId: string,
571
+ ): { runtime: Runtime; urlReader: HackyDataStylesAccumulator } {
572
+ const urlReader = new HackyDataStylesAccumulator(makeWorkerUrlReader(jobId));
573
+ const runtime = new Runtime({
574
+ urlReader,
575
+ config: malloyConfig,
576
+ buildManifest:
577
+ job.buildManifest !== undefined && job.buildManifest !== null
578
+ ? {
579
+ entries: job.buildManifest as Record<
580
+ string,
581
+ BuildManifestEntry
582
+ >,
583
+ strict: false,
584
+ }
585
+ : undefined,
586
+ });
587
+ return { runtime, urlReader };
588
+ }
589
+
590
+ async function compileMalloyModel(
591
+ job: LoadPackageRequest,
592
+ malloyConfig: MalloyConfig,
593
+ modelPath: string,
594
+ ): Promise<SerializedModel> {
595
+ const compileStart = performance.now();
596
+ const fullPath = path.join(job.packagePath, modelPath);
597
+ // `pathToFileURL` produces a valid URL on every platform; the
598
+ // naïve `file://${fullPath}` template parses host=`D:` on Windows.
599
+ const modelURL = pathToFileURL(fullPath);
600
+ const importBaseURL = new URL(".", modelURL);
601
+
602
+ const { runtime, urlReader } = buildRuntimeForModel(
603
+ job,
604
+ malloyConfig,
605
+ job.requestId,
606
+ );
607
+ const mm = runtime.loadModel(modelURL, { importBaseURL });
608
+ const compiled = await mm.getModel();
609
+ const modelDef = compiled._modelDef;
610
+
611
+ const malloyGivens = Array.from(compiled.givens.values());
612
+ const givens =
613
+ malloyGivens.length > 0
614
+ ? malloyGivens.map((g) => malloyGivenToApi(g as MalloyGiven))
615
+ : undefined;
616
+
617
+ const { sourceInfos, importedNames } = await collectImportedSourceInfos(
618
+ modelDef,
619
+ runtime,
620
+ importBaseURL,
621
+ );
622
+ appendLocalSourceInfos(modelDef, sourceInfos, importedNames);
623
+
624
+ const { sources, filterMap } = extractSources(modelPath, modelDef, givens);
625
+ const queries = extractQueries(modelPath, modelDef);
626
+
627
+ return {
628
+ modelPath,
629
+ modelType: "model",
630
+ modelDef,
631
+ modelInfo: modelDefToModelInfo(modelDef),
632
+ sourceInfos,
633
+ sources,
634
+ queries,
635
+ filterMap: Array.from(filterMap.entries()),
636
+ givens,
637
+ dataStyles: urlReader.getHackyAccumulatedDataStyles(),
638
+ compileDurationMs: performance.now() - compileStart,
639
+ };
640
+ }
641
+
642
+ async function compileNotebookModel(
643
+ job: LoadPackageRequest,
644
+ malloyConfig: MalloyConfig,
645
+ modelPath: string,
646
+ ): Promise<SerializedModel> {
647
+ const compileStart = performance.now();
648
+ const fullPath = path.join(job.packagePath, modelPath);
649
+ // See compileMalloyModel above: `pathToFileURL` is the only
650
+ // cross-platform way to build a file URL from an OS path.
651
+ const modelURL = pathToFileURL(fullPath);
652
+ const importBaseURL = new URL(".", modelURL);
653
+
654
+ const { runtime, urlReader } = buildRuntimeForModel(
655
+ job,
656
+ malloyConfig,
657
+ job.requestId,
658
+ );
659
+
660
+ const fileContents = await fs.promises.readFile(modelURL, "utf8");
661
+ const parse = MalloySQLParser.parse(fileContents, modelPath);
662
+
663
+ // Build the extendModel chain synchronously so per-cell materializers
664
+ // line up with statement order. Matches the in-process flow in
665
+ // Model.getNotebookModelMaterializer.
666
+ let mm: ModelMaterializer | undefined;
667
+ const perCellMM: (ModelMaterializer | undefined)[] = parse.statements.map(
668
+ (stmt) => {
669
+ if (stmt.type === MalloySQLStatementType.MALLOY) {
670
+ mm =
671
+ mm === undefined
672
+ ? runtime.loadModel(stmt.text, { importBaseURL })
673
+ : mm.extendModel(stmt.text, { importBaseURL });
674
+ }
675
+ return mm;
676
+ },
677
+ );
678
+
679
+ const oldImports: string[] = [];
680
+ const oldSources: Record<string, Malloy.SourceInfo> = {};
681
+ const notebookCells: SerializedNotebookCell[] = [];
682
+ for (let i = 0; i < parse.statements.length; i++) {
683
+ const stmt = parse.statements[i];
684
+ if (stmt.type === MalloySQLStatementType.MARKDOWN) {
685
+ notebookCells.push({ type: "markdown", text: stmt.text });
686
+ continue;
687
+ }
688
+ if (stmt.type !== MalloySQLStatementType.MALLOY) continue;
689
+
690
+ const localMM = perCellMM[i];
691
+ if (!localMM) {
692
+ // Shouldn't happen for a MALLOY statement, but guard rather
693
+ // than crash a whole notebook compile on one corrupt cell.
694
+ continue;
695
+ }
696
+ const currentModelDef = (await localMM.getModel())._modelDef;
697
+
698
+ // newSources via the import chain — mirrors in-process logic.
699
+ let newSources: Malloy.SourceInfo[] = [];
700
+ const newImports = currentModelDef.imports?.slice(oldImports.length);
701
+ if (newImports) {
702
+ for (const importLocation of newImports) {
703
+ try {
704
+ const modelString = await runtime.urlReader.readURL(
705
+ new URL(importLocation.importURL),
706
+ );
707
+ const importModel = (
708
+ await runtime
709
+ .loadModel(modelString as string, { importBaseURL })
710
+ .getModel()
711
+ )._modelDef;
712
+ const importInfo = modelDefToModelInfo(importModel);
713
+ newSources = importInfo.entries
714
+ .filter((e) => e.kind === "source")
715
+ .filter(
716
+ (s) => !(s.name in oldSources),
717
+ ) as Malloy.SourceInfo[];
718
+ oldImports.push(importLocation.importURL.toString());
719
+ } catch {
720
+ // Same best-effort policy as the in-process path.
721
+ }
722
+ }
723
+ }
724
+ const currentInfo = modelDefToModelInfo(currentModelDef);
725
+ newSources = newSources.concat(
726
+ currentInfo.entries
727
+ .filter((e) => e.kind === "source")
728
+ .filter((s) => !(s.name in oldSources)) as Malloy.SourceInfo[],
729
+ );
730
+ for (const s of newSources) oldSources[s.name] = s;
731
+
732
+ // Capture the per-cell final-query queryDef so the main thread can
733
+ // hydrate a QueryMaterializer via
734
+ // `ModelMaterializer._loadQueryFromQueryDef` without a recompile.
735
+ const runnable = localMM.loadFinalQuery();
736
+ let cellQueryDef: Query | undefined;
737
+ let queryInfo: Malloy.QueryInfo | undefined;
738
+ try {
739
+ const prepared = await runnable.getPreparedQuery();
740
+ cellQueryDef = prepared._query;
741
+ const queryName =
742
+ (prepared._query as NamedQueryDef).as ||
743
+ (prepared._query as NamedQueryDef).name;
744
+ const anonymous =
745
+ currentInfo.anonymous_queries[
746
+ currentInfo.anonymous_queries.length - 1
747
+ ];
748
+ if (anonymous) {
749
+ queryInfo = {
750
+ name: queryName,
751
+ schema: anonymous.schema,
752
+ annotations: anonymous.annotations,
753
+ definition: anonymous.definition,
754
+ code: anonymous.code,
755
+ location: anonymous.location,
756
+ } as Malloy.QueryInfo;
757
+ }
758
+ } catch {
759
+ // Some cells (source-only) have no final query; that's fine.
760
+ }
761
+
762
+ notebookCells.push({
763
+ type: "code",
764
+ text: stmt.text,
765
+ cellModelDef: currentModelDef,
766
+ cellQueryDef,
767
+ newSources,
768
+ queryInfo,
769
+ });
770
+ }
771
+
772
+ // Aggregate (notebook-level) artifacts — derived from the final mm
773
+ // if any MALLOY statements were present. If the notebook is all
774
+ // markdown, modelDef stays undefined and the main thread treats
775
+ // this as a notebook with no compiled content.
776
+ let finalModelDef: ModelDef | undefined;
777
+ let finalSources: ApiSourceWire[] | undefined;
778
+ let finalQueries: ApiQueryWire[] | undefined;
779
+ let finalSourceInfos: Malloy.SourceInfo[] | undefined;
780
+ let finalFilterMap: Map<string, FilterDefinition[]> | undefined;
781
+ let finalGivens: ApiGivenWire[] | undefined;
782
+ if (mm) {
783
+ const compiled = await mm.getModel();
784
+ finalModelDef = compiled._modelDef;
785
+ const malloyGivens = Array.from(compiled.givens.values());
786
+ finalGivens =
787
+ malloyGivens.length > 0
788
+ ? malloyGivens.map((g) => malloyGivenToApi(g as MalloyGiven))
789
+ : undefined;
790
+ const collected = await collectImportedSourceInfos(
791
+ finalModelDef,
792
+ runtime,
793
+ importBaseURL,
794
+ );
795
+ appendLocalSourceInfos(
796
+ finalModelDef,
797
+ collected.sourceInfos,
798
+ collected.importedNames,
799
+ );
800
+ finalSourceInfos = collected.sourceInfos;
801
+ const extracted = extractSources(modelPath, finalModelDef, finalGivens);
802
+ finalSources = extracted.sources;
803
+ finalFilterMap = extracted.filterMap;
804
+ finalQueries = extractQueries(modelPath, finalModelDef);
805
+ }
806
+
807
+ return {
808
+ modelPath,
809
+ modelType: "notebook",
810
+ modelDef: finalModelDef,
811
+ modelInfo: finalModelDef ? modelDefToModelInfo(finalModelDef) : undefined,
812
+ sourceInfos: finalSourceInfos,
813
+ sources: finalSources,
814
+ queries: finalQueries,
815
+ filterMap: finalFilterMap
816
+ ? Array.from(finalFilterMap.entries())
817
+ : undefined,
818
+ givens: finalGivens,
819
+ notebookCells,
820
+ dataStyles: urlReader.getHackyAccumulatedDataStyles(),
821
+ compileDurationMs: performance.now() - compileStart,
822
+ };
823
+ }
824
+
825
+ async function compileOneModel(
826
+ job: LoadPackageRequest,
827
+ malloyConfig: MalloyConfig,
828
+ modelPath: string,
829
+ ): Promise<SerializedModel> {
830
+ try {
831
+ if (modelPath.endsWith(MODEL_FILE_SUFFIX)) {
832
+ return await compileMalloyModel(job, malloyConfig, modelPath);
833
+ }
834
+ if (modelPath.endsWith(NOTEBOOK_FILE_SUFFIX)) {
835
+ return await compileNotebookModel(job, malloyConfig, modelPath);
836
+ }
837
+ return {
838
+ modelPath,
839
+ modelType: "model",
840
+ compilationError: {
841
+ name: "Error",
842
+ message: `Unknown model file suffix: ${modelPath}`,
843
+ },
844
+ };
845
+ } catch (error) {
846
+ const modelType: SerializedModel["modelType"] = modelPath.endsWith(
847
+ NOTEBOOK_FILE_SUFFIX,
848
+ )
849
+ ? "notebook"
850
+ : "model";
851
+ return {
852
+ modelPath,
853
+ modelType,
854
+ compilationError: serializeError(error),
855
+ };
856
+ }
857
+ }
858
+
859
+ // ──────────────────────────────────────────────────────────────────────
860
+ // The actual load-package job
861
+ // ──────────────────────────────────────────────────────────────────────
862
+
863
+ async function loadPackage(
864
+ job: LoadPackageRequest,
865
+ ): Promise<LoadPackageResult> {
866
+ const loadStart = performance.now();
867
+
868
+ const packageMetadata = await readPackageMetadata(job.packagePath);
869
+ const malloyConfig = buildWorkerMalloyConfig(job);
870
+
871
+ const allFiles = await listPackageFiles(job.packagePath);
872
+ const modelPaths = filterModelPaths(allFiles);
873
+ const models = await Promise.all(
874
+ modelPaths.map((modelPath) =>
875
+ compileOneModel(job, malloyConfig, modelPath),
876
+ ),
877
+ );
878
+
879
+ return {
880
+ type: "load-package-result",
881
+ requestId: job.requestId,
882
+ packageMetadata,
883
+ models,
884
+ loadDurationMs: performance.now() - loadStart,
885
+ };
886
+ }
887
+
621
888
  // ──────────────────────────────────────────────────────────────────────
622
889
  // Error serialization
623
890
  // ──────────────────────────────────────────────────────────────────────
624
891
 
625
892
  function serializeError(error: unknown): SerializedError {
626
893
  if (error instanceof MalloyError) {
627
- // MalloyError is what Malloy throws for compile failures
628
- // (parse / type / unresolved-reference errors). Flagging
629
- // `isCompilationError` lets the main thread re-wrap it as a
630
- // `ModelCompilationError` so callers' instanceof checks for
631
- // that type still fire after a worker-side compile.
632
894
  return {
633
895
  name: error.name,
634
896
  message: error.message,
@@ -664,20 +926,17 @@ const inFlightJobs = new Set<string>();
664
926
  port.on("message", (message: MainToWorkerMessage) => {
665
927
  if (message.type === "shutdown") {
666
928
  shuttingDown = true;
667
- // Don't exit until in-flight jobs finish. Once empty we exit
668
- // via the explicit process.exit() below; until then we just
669
- // keep servicing message responses.
670
929
  maybeExit();
671
930
  return;
672
931
  }
673
- if (message.type === "compile") {
932
+ if (message.type === "load-package") {
674
933
  if (shuttingDown) {
675
- const errMsg: CompileJobError = {
676
- type: "compile-error",
934
+ const errMsg: LoadPackageError = {
935
+ type: "load-package-error",
677
936
  requestId: message.requestId,
678
937
  error: {
679
938
  name: "ShuttingDown",
680
- message: "Compile worker is shutting down",
939
+ message: "Package-load worker is shutting down",
681
940
  },
682
941
  };
683
942
  port.postMessage(errMsg);
@@ -690,13 +949,13 @@ port.on("message", (message: MainToWorkerMessage) => {
690
949
  dispatchMainResponse(message);
691
950
  });
692
951
 
693
- async function runJob(job: CompileJobRequest): Promise<void> {
952
+ async function runJob(job: LoadPackageRequest): Promise<void> {
694
953
  try {
695
- const result = await compile(job);
954
+ const result = await loadPackage(job);
696
955
  port.postMessage(result);
697
956
  } catch (error) {
698
- const errMsg: CompileJobError = {
699
- type: "compile-error",
957
+ const errMsg: LoadPackageError = {
958
+ type: "load-package-error",
700
959
  requestId: job.requestId,
701
960
  error: serializeError(error),
702
961
  };
@@ -715,7 +974,7 @@ function maybeExit(): void {
715
974
  }
716
975
  }
717
976
 
718
- // Announce readiness — the pool waits for this before dispatching
719
- // jobs to a newly-spawned worker so we don't race the worker's
720
- // module-init time.
977
+ // Announce readiness — the pool waits for this before dispatching jobs
978
+ // to a newly-spawned worker so we don't race the worker's module-init
979
+ // time.
721
980
  port.postMessage({ type: "ready" });