@malloy-publisher/server 0.0.198-dev3 → 0.0.198-dev6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,6 +29,11 @@ import * as fs from "fs/promises";
29
29
  import { createRequire } from "module";
30
30
  import * as path from "path";
31
31
  import { components } from "../api";
32
+ import { deserializeError } from "../package_load/package_load_pool";
33
+ import type {
34
+ SerializedModel,
35
+ SerializedNotebookCell,
36
+ } from "../package_load/protocol";
32
37
  import {
33
38
  MODEL_FILE_SUFFIX,
34
39
  NOTEBOOK_FILE_SUFFIX,
@@ -51,6 +56,7 @@ import {
51
56
  type FilterDefinition,
52
57
  type FilterParams,
53
58
  } from "./filter";
59
+ import { malloyGivenToApi, type MalloyGiven } from "./given";
54
60
 
55
61
  type ApiCompiledModel = components["schemas"]["CompiledModel"];
56
62
  type ApiNotebookCell = components["schemas"]["NotebookCell"];
@@ -75,61 +81,6 @@ const MALLOY_VERSION = (
75
81
  export type ModelType = "model" | "notebook";
76
82
  type ModelConnectionInput = MalloyConfig | Map<string, Connection>;
77
83
 
78
- /**
79
- * Structural type for a Malloy SDK `Given` instance (the value type of
80
- * `Model.givens`). The `Given` class is declared in
81
- * `@malloydata/malloy/dist/api/foundation/core.d.ts` but is not re-exported
82
- * from the package root, so we duck-type against the surface we use rather
83
- * than importing it.
84
- */
85
- interface MalloyGiven {
86
- readonly name: string;
87
- readonly type: { type: string; filterType?: string };
88
- getTaglines(prefix?: RegExp): string[];
89
- }
90
-
91
- /**
92
- * Convert a Malloy SDK `Given` (returned from `Model.givens`) to the wire
93
- * shape declared in `api-doc.yaml`. Two fields are deliberately not surfaced:
94
- *
95
- * - `location` — Malloy's `DocumentLocation.url` is an absolute `file://`
96
- * path on the publisher's filesystem. Surfacing it would leak the OS user,
97
- * install directory, and internal layout. Existing `Filter` introspection
98
- * does not expose location either; matching that floor. A future PR can
99
- * add a sanitized package-relative path if a client needs it.
100
- *
101
- * - `default` / `defaultText` — Malloy's API only exposes the parsed
102
- * `ConstantExpr` AST, not a rendered source string. Rendering it here
103
- * would duplicate the Malloy printer. Add when Malloy surfaces a
104
- * stringified accessor.
105
- *
106
- * `annotations` is restricted to `#(...)` declaration annotations (the
107
- * caller-facing kind, e.g. `#(doc)`). `getTaglines()` with no prefix would
108
- * also return `##` doc-comment lines and the model-level `##!` pragma,
109
- * which aren't part of the given's surface contract.
110
- *
111
- * Type rendering: `GivenTypeDef` is typed as `AtomicTypeDef |
112
- * FilterExpressionParamTypeDef`, but Malloy's grammar only emits the
113
- * scalar parameter types (`string` | `number` | `boolean` | `date` |
114
- * `timestamp` | `timestamptz` | `filter expression` | `error`) for
115
- * given declarations today. If the grammar expands to allow array or
116
- * record givens, the bare `type.type` discriminator (`'array'`,
117
- * `'record'`) will land in the wire response with no element info —
118
- * revisit when that happens.
119
- */
120
- function malloyGivenToApi(given: MalloyGiven): ApiGiven {
121
- const type = given.type;
122
- const renderedType =
123
- type.type === "filter expression"
124
- ? `filter<${type.filterType}>`
125
- : type.type;
126
- return {
127
- name: given.name,
128
- type: renderedType,
129
- annotations: given.getTaglines(/^#\(/),
130
- };
131
- }
132
-
133
84
  interface RunnableNotebookCell {
134
85
  type: "code" | "markdown";
135
86
  text: string;
@@ -183,6 +134,15 @@ export class Model {
183
134
  compilationError: MalloyError | Error | undefined,
184
135
  filterMap?: Map<string, FilterDefinition[]>,
185
136
  givens?: ApiGiven[],
137
+ /**
138
+ * Precomputed `modelDefToModelInfo(modelDef)`. The package-load
139
+ * worker emits it as part of `SerializedModel` so we don't
140
+ * re-derive it on every package load. Callers that build a
141
+ * `Model` from a raw `modelDef` (e.g. test fixtures via
142
+ * `Model.create`) can omit this and let the constructor
143
+ * derive it lazily.
144
+ */
145
+ modelInfo?: Malloy.ModelInfo,
186
146
  ) {
187
147
  this.packageName = packageName;
188
148
  this.modelPath = modelPath;
@@ -197,9 +157,9 @@ export class Model {
197
157
  this.compilationError = compilationError;
198
158
  this.filterMap = filterMap ?? new Map();
199
159
  this.givens = givens;
200
- this.modelInfo = this.modelDef
201
- ? modelDefToModelInfo(this.modelDef)
202
- : undefined;
160
+ this.modelInfo =
161
+ modelInfo ??
162
+ (this.modelDef ? modelDefToModelInfo(this.modelDef) : undefined);
203
163
  }
204
164
 
205
165
  /**
@@ -221,6 +181,15 @@ export class Model {
221
181
  return runMatch?.[1] ?? arrowMatch?.[1];
222
182
  }
223
183
 
184
+ /**
185
+ * Compile a single model in-process. Kept as a library entry point
186
+ * for test fixtures and any future caller that needs an ad-hoc
187
+ * `Model` from a `.malloy` / `.malloynb` file. Production package
188
+ * loads (`Package.create`) and reloads (`Package.reloadAllModels`)
189
+ * route through the package-load worker pool and dispatch through
190
+ * {@link Model.fromSerialized} instead — neither calls this on the
191
+ * main thread.
192
+ */
224
193
  public static async create(
225
194
  packageName: string,
226
195
  packagePath: string,
@@ -258,10 +227,12 @@ export class Model {
258
227
  modelDef = compiledModel._modelDef;
259
228
  // Malloy's `Model.givens` already collapses inheritance from imports
260
229
  // and applies any `finalizeGivens` runtime config. Just read it.
261
- const malloyGivens = Array.from(compiledModel.givens.values());
230
+ const malloyGivens = Array.from(
231
+ compiledModel.givens.values(),
232
+ ) as MalloyGiven[];
262
233
  givens =
263
234
  malloyGivens.length > 0
264
- ? malloyGivens.map(malloyGivenToApi)
235
+ ? (malloyGivens.map(malloyGivenToApi) as ApiGiven[])
265
236
  : undefined;
266
237
  const sourceResult = Model.getSources(modelPath, modelDef, givens);
267
238
  sources = sourceResult.sources;
@@ -358,6 +329,123 @@ export class Model {
358
329
  }
359
330
  }
360
331
 
332
+ /**
333
+ * Construct a `Model` from a worker-compiled `SerializedModel`. All
334
+ * the heavy compile work (parse, type check, IR build, sourceInfo
335
+ * extraction, per-cell notebook compile) already ran inside a
336
+ * `worker_threads` worker; this factory just rewraps the wire data
337
+ * into a live `Model`.
338
+ *
339
+ * Hydrates the `ModelMaterializer` (and, for notebooks, the
340
+ * per-cell materializers + runnables) **eagerly** via
341
+ * `Runtime._loadModelFromModelDef` /
342
+ * `ModelMaterializer._loadQueryFromQueryDef`. These are constant-time
343
+ * wraps around the worker's pre-compiled `modelDef` / `queryDef` —
344
+ * no parse, no type-check, no schema fetch — so doing it here at
345
+ * package-load time costs microseconds per model and keeps the
346
+ * resulting `Model` interchangeable with one produced by
347
+ * `Model.create` (no lazy-init branches in the hot path).
348
+ */
349
+ public static fromSerialized(
350
+ packageName: string,
351
+ _packagePath: string,
352
+ malloyConfig: ModelConnectionInput,
353
+ data: SerializedModel,
354
+ ): Model {
355
+ const modelDef = data.modelDef as ModelDef | undefined;
356
+ const modelInfo = data.modelInfo as Malloy.ModelInfo | undefined;
357
+ const dataStyles = (data.dataStyles ?? {}) as DataStyles;
358
+ const sources = data.sources as ApiSource[] | undefined;
359
+ const queries = data.queries as ApiQuery[] | undefined;
360
+ const sourceInfos = data.sourceInfos as Malloy.SourceInfo[] | undefined;
361
+ const givens = data.givens as ApiGiven[] | undefined;
362
+ const filterMap = data.filterMap
363
+ ? new Map(data.filterMap as Array<[string, FilterDefinition[]]>)
364
+ : undefined;
365
+
366
+ // No modelDef → either an empty notebook (no MALLOY statements)
367
+ // or a corrupt worker payload. Build a Model with no materializer;
368
+ // downstream getQueryResults / executeNotebookCell will throw a
369
+ // clean BadRequestError if a caller tries to run a query. We
370
+ // still preserve markdown cells for an all-markdown notebook so
371
+ // `getNotebook()` can serve raw text.
372
+ if (!modelDef) {
373
+ return new Model(
374
+ packageName,
375
+ data.modelPath,
376
+ dataStyles,
377
+ data.modelType,
378
+ undefined,
379
+ undefined,
380
+ sources,
381
+ queries,
382
+ sourceInfos,
383
+ data.modelType === "notebook"
384
+ ? hydrateMarkdownOnlyCells(data.notebookCells)
385
+ : undefined,
386
+ undefined,
387
+ filterMap,
388
+ givens,
389
+ modelInfo,
390
+ );
391
+ }
392
+
393
+ const runtime = makeHydrationRuntime(malloyConfig);
394
+ const modelMaterializer = runtime._loadModelFromModelDef(modelDef);
395
+ const runnableNotebookCells =
396
+ data.modelType === "notebook"
397
+ ? hydrateNotebookCells(runtime, data.notebookCells)
398
+ : undefined;
399
+
400
+ return new Model(
401
+ packageName,
402
+ data.modelPath,
403
+ dataStyles,
404
+ data.modelType,
405
+ modelMaterializer,
406
+ modelDef,
407
+ sources,
408
+ queries,
409
+ sourceInfos,
410
+ runnableNotebookCells,
411
+ undefined, // compilationError
412
+ filterMap,
413
+ givens,
414
+ modelInfo,
415
+ );
416
+ }
417
+
418
+ /**
419
+ * Build a Model representing a compilation failure (no modelDef,
420
+ * no materializer). Matches the shape `Model.create` returns when
421
+ * it catches a `MalloyError`, so the rest of the system handles
422
+ * both paths uniformly (the iteration loop in `Package.create`
423
+ * reads `compilationError` via a structural cast).
424
+ */
425
+ public static fromCompilationError(
426
+ packageName: string,
427
+ modelPath: string,
428
+ modelType: ModelType,
429
+ error: Error,
430
+ ): Model {
431
+ return new Model(
432
+ packageName,
433
+ modelPath,
434
+ {} as DataStyles,
435
+ modelType,
436
+ undefined,
437
+ undefined,
438
+ undefined,
439
+ undefined,
440
+ undefined,
441
+ undefined,
442
+ error,
443
+ );
444
+ }
445
+
446
+ /** Look up the deserialized error helper for callers (e.g. Package.create). */
447
+ public static deserializeCompilationError = deserializeError;
448
+
361
449
  public getPath(): string {
362
450
  return this.modelPath;
363
451
  }
@@ -576,9 +664,10 @@ export class Model {
576
664
  malloyVersion: MALLOY_VERSION,
577
665
  dataStyles: JSON.stringify(this.dataStyles),
578
666
  modelDef: JSON.stringify(this.modelDef),
579
- modelInfo: JSON.stringify(
580
- this.modelDef ? modelDefToModelInfo(this.modelDef) : {},
581
- ),
667
+ // `this.modelInfo` is precomputed once at construction (either
668
+ // by the worker or in the Model.create constructor); don't
669
+ // re-run `modelDefToModelInfo` on every API hit.
670
+ modelInfo: JSON.stringify(this.modelInfo ?? {}),
582
671
  sourceInfos: this.getSourceInfos()?.map((sourceInfo) =>
583
672
  JSON.stringify(sourceInfo),
584
673
  ),
@@ -630,9 +719,7 @@ export class Model {
630
719
  packageName: this.packageName,
631
720
  modelPath: this.modelPath,
632
721
  malloyVersion: MALLOY_VERSION,
633
- modelInfo: JSON.stringify(
634
- this.modelDef ? modelDefToModelInfo(this.modelDef) : {},
635
- ),
722
+ modelInfo: JSON.stringify(this.modelInfo ?? {}),
636
723
  sources: this.modelDef && this.sources,
637
724
  queries: this.modelDef && this.queries,
638
725
  annotations: allAnnotations,
@@ -1152,3 +1239,103 @@ export class Model {
1152
1239
  }
1153
1240
  }
1154
1241
  }
1242
+
1243
+ // ──────────────────────────────────────────────────────────────────────
1244
+ // Helpers for hydrating worker-compiled models on the main thread
1245
+ // ──────────────────────────────────────────────────────────────────────
1246
+
1247
+ /**
1248
+ * Minimal subset of `Runtime` we use here. The `_` methods are
1249
+ * marked `@internal` in Malloy but are the only API for constructing
1250
+ * a materializer / query materializer from an existing `modelDef` /
1251
+ * queryDef — the public `loadModel(url)` path always recompiles.
1252
+ */
1253
+ type HydrationRuntime = Runtime & {
1254
+ _loadModelFromModelDef(modelDef: ModelDef): ModelMaterializer;
1255
+ };
1256
+ type HydrationMaterializer = ModelMaterializer & {
1257
+ _loadQueryFromQueryDef(query: unknown): QueryMaterializer;
1258
+ };
1259
+
1260
+ function makeHydrationRuntime(
1261
+ malloyConfig: ModelConnectionInput,
1262
+ ): HydrationRuntime {
1263
+ const urlReader = new HackyDataStylesAccumulator(URL_READER);
1264
+ const config =
1265
+ malloyConfig instanceof MalloyConfig
1266
+ ? malloyConfig
1267
+ : (() => {
1268
+ const c = new MalloyConfig({ connections: {} });
1269
+ c.wrapConnections(
1270
+ () => new FixedConnectionMap(malloyConfig, "duckdb"),
1271
+ );
1272
+ return c;
1273
+ })();
1274
+ return new Runtime({ urlReader, config }) as HydrationRuntime;
1275
+ }
1276
+
1277
+ /**
1278
+ * Build the live `RunnableNotebookCell[]` from worker-emitted
1279
+ * per-cell data. Each MALLOY cell is hydrated via
1280
+ * `Runtime._loadModelFromModelDef` (for the cell's scope) and
1281
+ * `ModelMaterializer._loadQueryFromQueryDef` (for the cell's
1282
+ * runnable) — no recompile.
1283
+ */
1284
+ function hydrateNotebookCells(
1285
+ runtime: HydrationRuntime,
1286
+ notebookCells: SerializedNotebookCell[] | undefined,
1287
+ ): RunnableNotebookCell[] {
1288
+ if (!notebookCells) return [];
1289
+ return notebookCells.map((sc): RunnableNotebookCell => {
1290
+ if (sc.type === "markdown") {
1291
+ return { type: "markdown", text: sc.text };
1292
+ }
1293
+ const cellModelDef = sc.cellModelDef as ModelDef | undefined;
1294
+ let modelMaterializer: ModelMaterializer | undefined;
1295
+ let runnable: QueryMaterializer | undefined;
1296
+ if (cellModelDef) {
1297
+ modelMaterializer = runtime._loadModelFromModelDef(cellModelDef);
1298
+ if (sc.cellQueryDef !== undefined) {
1299
+ try {
1300
+ runnable = (
1301
+ modelMaterializer as HydrationMaterializer
1302
+ )._loadQueryFromQueryDef(sc.cellQueryDef);
1303
+ } catch (error) {
1304
+ // Hydration shouldn't fail for a queryDef the worker
1305
+ // already prepared, but if Malloy's internal shape
1306
+ // drifts we'd rather drop the runnable than crash the
1307
+ // whole notebook. The cell remains markdown-runnable.
1308
+ logger.warn("Failed to hydrate notebook cell queryDef", {
1309
+ error,
1310
+ });
1311
+ }
1312
+ }
1313
+ }
1314
+ return {
1315
+ type: "code",
1316
+ text: sc.text,
1317
+ runnable,
1318
+ modelMaterializer,
1319
+ newSources: sc.newSources as Malloy.SourceInfo[] | undefined,
1320
+ queryInfo: sc.queryInfo as Malloy.QueryInfo | undefined,
1321
+ };
1322
+ });
1323
+ }
1324
+
1325
+ /**
1326
+ * For an all-markdown notebook (no MALLOY statements → no
1327
+ * `modelDef`), we still want to preserve the cell list so
1328
+ * `getNotebook()` can serve raw text. This skips materializer
1329
+ * hydration (there's nothing to hydrate) and returns markdown-only
1330
+ * cells.
1331
+ */
1332
+ function hydrateMarkdownOnlyCells(
1333
+ notebookCells: SerializedNotebookCell[] | undefined,
1334
+ ): RunnableNotebookCell[] | undefined {
1335
+ if (!notebookCells) return undefined;
1336
+ return notebookCells.map((sc): RunnableNotebookCell => {
1337
+ if (sc.type === "markdown") return { type: "markdown", text: sc.text };
1338
+ // A code cell without a hydratable scope — surface text only.
1339
+ return { type: "code", text: sc.text };
1340
+ });
1341
+ }
@@ -1,4 +1,7 @@
1
+ import { DuckDBConnection } from "@malloydata/db-duckdb";
2
+ import "@malloydata/db-duckdb/native";
1
3
  import { afterEach, beforeEach, describe, expect, it } from "bun:test";
4
+ import { Stats } from "fs";
2
5
  import fs from "fs/promises";
3
6
  import { join, resolve } from "path";
4
7
  import sinon from "sinon";
@@ -335,19 +338,22 @@ describe("service/package", () => {
335
338
  });
336
339
  });
337
340
 
338
- describe("schema introspection (via worker pool)", () => {
339
- it("returns columns and rowCount for a csv database", async () => {
340
- // Schema introspection moved off the main thread into a
341
- // worker pool to isolate DuckDB's native thread pool (see
342
- // schema_worker_pool.ts). Hit the pool directly here so
343
- // the test exercises the same code path prod uses.
344
- const { getSchemaWorkerPool } = await import(
345
- "./schema_worker_pool"
346
- );
347
- const pool = getSchemaWorkerPool();
348
- const info = await pool.submit(
341
+ describe("getDatabaseInfo", () => {
342
+ it("should return the size of the database file", async () => {
343
+ sinon.stub(fs, "stat").resolves({ size: 13 } as Stats);
344
+
345
+ // `getDatabaseInfo` now requires the caller to pass in the
346
+ // shared DuckDB connection (resolved once by `readDatabases`
347
+ // off the package's MalloyConfig). For this isolated unit
348
+ // test we mint a fresh ephemeral one — production paths
349
+ // reuse a single connection per package via `Package.create`.
350
+ const conn = new DuckDBConnection("duckdb");
351
+
352
+ // @ts-expect-error Accessing private static method for testing
353
+ const info = await Package.getDatabaseInfo(
349
354
  testPackageDirectory,
350
355
  "database.csv",
356
+ conn,
351
357
  );
352
358
 
353
359
  expect(info).toEqual({