@malloy-publisher/server 0.0.198-dev3 → 0.0.198-dev4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,6 +29,7 @@ import * as fs from "fs/promises";
29
29
  import { createRequire } from "module";
30
30
  import * as path from "path";
31
31
  import { components } from "../api";
32
+ import { getCompilePool } from "../compile/compile_pool";
32
33
  import {
33
34
  MODEL_FILE_SUFFIX,
34
35
  NOTEBOOK_FILE_SUFFIX,
@@ -140,12 +141,31 @@ interface RunnableNotebookCell {
140
141
  queryInfo?: Malloy.QueryInfo;
141
142
  }
142
143
 
144
+ /**
145
+ * Lazily produces a `ModelMaterializer` on demand. Used by the worker-
146
+ * compile path: the worker returns a fully-built `modelDef` but cannot
147
+ * ship the materializer (it binds to a Runtime that holds live native
148
+ * connection handles and would not survive a structured-clone). The
149
+ * first query that actually needs to execute calls this builder,
150
+ * which constructs the materializer in-process. After construction
151
+ * Malloy caches the compiled model internally on the materializer,
152
+ * so subsequent queries pay no recompile cost.
153
+ */
154
+ type MaterializerBuilder = () => Promise<ModelMaterializer>;
155
+
143
156
  export class Model {
144
157
  private packageName: string;
145
158
  private modelPath: string;
146
159
  private dataStyles: DataStyles;
147
160
  private modelType: ModelType;
148
161
  private modelMaterializer: ModelMaterializer | undefined;
162
+ /**
163
+ * Lazy builder used when the model was compiled in a worker_threads
164
+ * worker. The first `getQueryResults`/`executeNotebookCell` call
165
+ * invokes this and caches the result in `modelMaterializer`.
166
+ */
167
+ private materializerBuilder: MaterializerBuilder | undefined;
168
+ private materializerBuildPromise: Promise<ModelMaterializer> | undefined;
149
169
  private modelDef: ModelDef | undefined;
150
170
  private modelInfo: Malloy.ModelInfo | undefined;
151
171
  private sources: ApiSource[] | undefined;
@@ -159,6 +179,8 @@ export class Model {
159
179
  * `Model.givens` already collapses inheritance; we just stash the list
160
180
  * for surfacing on the compiled-model response. */
161
181
  private givens: ApiGiven[] | undefined;
182
+ /** Cached responses from `getStandardModel()` so we don't re-stringify a multi-MB modelDef on every GET. */
183
+ private cachedStandardModel: ApiCompiledModel | undefined;
162
184
  private meter = metrics.getMeter("publisher");
163
185
  private queryExecutionHistogram = this.meter.createHistogram(
164
186
  "malloy_model_query_duration",
@@ -183,6 +205,7 @@ export class Model {
183
205
  compilationError: MalloyError | Error | undefined,
184
206
  filterMap?: Map<string, FilterDefinition[]>,
185
207
  givens?: ApiGiven[],
208
+ materializerBuilder?: MaterializerBuilder,
186
209
  ) {
187
210
  this.packageName = packageName;
188
211
  this.modelPath = modelPath;
@@ -190,6 +213,7 @@ export class Model {
190
213
  this.modelType = modelType;
191
214
  this.modelDef = modelDef;
192
215
  this.modelMaterializer = modelMaterializer;
216
+ this.materializerBuilder = materializerBuilder;
193
217
  this.sources = sources;
194
218
  this.queries = queries;
195
219
  this.sourceInfos = sourceInfos;
@@ -202,6 +226,28 @@ export class Model {
202
226
  : undefined;
203
227
  }
204
228
 
229
+ /**
230
+ * Resolve the in-process `ModelMaterializer`, building it lazily if
231
+ * the model was compiled in a worker_threads worker. Memoizes both
232
+ * the materializer and the in-flight build promise so concurrent
233
+ * queries on the same model share a single construction.
234
+ */
235
+ private async ensureMaterializer(): Promise<ModelMaterializer> {
236
+ if (this.modelMaterializer) return this.modelMaterializer;
237
+ if (!this.materializerBuilder) {
238
+ throw new BadRequestError("Model has no queryable entities.");
239
+ }
240
+ if (!this.materializerBuildPromise) {
241
+ this.materializerBuildPromise = this.materializerBuilder().then(
242
+ (mm) => {
243
+ this.modelMaterializer = mm;
244
+ return mm;
245
+ },
246
+ );
247
+ }
248
+ return this.materializerBuildPromise;
249
+ }
250
+
205
251
  /**
206
252
  * Get the parsed filter definitions for a given source name.
207
253
  * Returns an empty array if no filters are declared.
@@ -227,6 +273,158 @@ export class Model {
227
273
  modelPath: string,
228
274
  malloyConfig: ModelConnectionInput,
229
275
  options?: { buildManifest?: BuildManifest["entries"] },
276
+ ): Promise<Model> {
277
+ // Worker-pool fast path for plain `.malloy` files. Notebooks
278
+ // stay in-process for v1 — their per-cell ModelMaterializer
279
+ // chain is too entangled to ship across a worker boundary.
280
+ // The MALLOY_COMPILE_WORKERS=0 kill switch / pool.enabled check
281
+ // funnels everything through the legacy in-process path when
282
+ // the pool is disabled, so this is safe to land dark.
283
+ const pool = getCompilePool();
284
+ if (pool.enabled && modelPath.endsWith(MODEL_FILE_SUFFIX)) {
285
+ try {
286
+ return await Model.createViaWorker(
287
+ packageName,
288
+ packagePath,
289
+ modelPath,
290
+ malloyConfig,
291
+ pool,
292
+ options,
293
+ );
294
+ } catch (poolError) {
295
+ // Real compile errors propagate to the caller as a Model
296
+ // with `compilationError` populated, matching the
297
+ // in-process path's contract.
298
+ if (
299
+ poolError instanceof ModelCompilationError ||
300
+ poolError instanceof MalloyError
301
+ ) {
302
+ return Model.makeErrorModel(
303
+ packageName,
304
+ modelPath,
305
+ poolError instanceof MalloyError
306
+ ? new ModelCompilationError(poolError)
307
+ : poolError,
308
+ );
309
+ }
310
+ // Anything else (worker exited, RPC timeout) — fall back
311
+ // to in-process compile so a transient pool failure
312
+ // doesn't take a package down.
313
+ logger.warn(
314
+ "Compile worker failed; falling back to in-process compile",
315
+ { packageName, modelPath, error: poolError },
316
+ );
317
+ }
318
+ }
319
+ return Model.createInProcess(
320
+ packageName,
321
+ packagePath,
322
+ modelPath,
323
+ malloyConfig,
324
+ options,
325
+ );
326
+ }
327
+
328
+ /**
329
+ * Compile via the {@link CompileWorkerPool}. Builds a `Model` whose
330
+ * `modelDef` / `sources` / `queries` / `sourceInfos` / `givens` are
331
+ * populated up-front, but whose `ModelMaterializer` is constructed
332
+ * lazily on the first query through {@link ensureMaterializer}.
333
+ * This keeps the heavy CPU work (parse, type-check, IR build) off
334
+ * the main event loop so the K8s liveness probe stays responsive.
335
+ */
336
+ private static async createViaWorker(
337
+ packageName: string,
338
+ packagePath: string,
339
+ modelPath: string,
340
+ malloyConfig: ModelConnectionInput,
341
+ pool: ReturnType<typeof getCompilePool>,
342
+ options?: { buildManifest?: BuildManifest["entries"] },
343
+ ): Promise<Model> {
344
+ const resolvedConfig = Model.toMalloyConfig(malloyConfig);
345
+ const outcome = await pool.compile({
346
+ packagePath,
347
+ modelPath,
348
+ malloyConfig: resolvedConfig,
349
+ // Package-level configs wrap a "duckdb" default; matches
350
+ // Package.buildPackageMalloyConfig.
351
+ defaultConnectionName: "duckdb",
352
+ urlReader: URL_READER,
353
+ buildManifest: options?.buildManifest,
354
+ });
355
+
356
+ // Materializer construction is deferred until a query actually
357
+ // runs. Build it the same way the in-process path does so
358
+ // execution semantics stay identical.
359
+ const materializerBuilder: MaterializerBuilder = async () => {
360
+ const { runtime, modelURL, importBaseURL } =
361
+ await Model.getModelRuntime(
362
+ packagePath,
363
+ modelPath,
364
+ malloyConfig,
365
+ options,
366
+ );
367
+ return Model.getStandardModelMaterializer(
368
+ runtime,
369
+ importBaseURL,
370
+ modelURL,
371
+ modelPath,
372
+ );
373
+ };
374
+
375
+ return new Model(
376
+ packageName,
377
+ modelPath,
378
+ {} as DataStyles,
379
+ "model",
380
+ undefined, // modelMaterializer — built lazily
381
+ outcome.modelDef,
382
+ outcome.sources as ApiSource[],
383
+ outcome.queries as ApiQuery[],
384
+ outcome.sourceInfos.length > 0 ? outcome.sourceInfos : undefined,
385
+ undefined, // runnableNotebookCells — .malloy is not a notebook
386
+ undefined, // compilationError
387
+ outcome.filterMap,
388
+ outcome.givens as ApiGiven[] | undefined,
389
+ materializerBuilder,
390
+ );
391
+ }
392
+
393
+ private static makeErrorModel(
394
+ packageName: string,
395
+ modelPath: string,
396
+ error: Error,
397
+ ): Model {
398
+ const isNotebook = modelPath.endsWith(NOTEBOOK_FILE_SUFFIX);
399
+ return new Model(
400
+ packageName,
401
+ modelPath,
402
+ {} as DataStyles,
403
+ isNotebook ? "notebook" : "model",
404
+ undefined,
405
+ undefined,
406
+ undefined,
407
+ undefined,
408
+ undefined,
409
+ undefined,
410
+ error,
411
+ );
412
+ }
413
+
414
+ /**
415
+ * Legacy in-process compile path. Retained for:
416
+ * - notebooks (`.malloynb`), whose per-cell materializer chain
417
+ * is too coupled to the Runtime to ship to a worker for v1.
418
+ * - environments where MALLOY_COMPILE_WORKERS=0.
419
+ * - fallback when the worker pool encounters a non-compile
420
+ * failure (worker exit, RPC timeout).
421
+ */
422
+ private static async createInProcess(
423
+ packageName: string,
424
+ packagePath: string,
425
+ modelPath: string,
426
+ malloyConfig: ModelConnectionInput,
427
+ options?: { buildManifest?: BuildManifest["entries"] },
230
428
  ): Promise<Model> {
231
429
  // getModelRuntime might throw a ModelNotFoundError. It's the callers responsibility
232
430
  // to pass a valid model path or handle the error.
@@ -437,9 +635,25 @@ export class Model {
437
635
  );
438
636
  }
439
637
  let runnable: QueryMaterializer;
440
- if (!this.modelMaterializer || !this.modelDef || !this.modelInfo)
638
+ if (!this.modelDef || !this.modelInfo)
441
639
  throw new BadRequestError("Model has no queryable entities.");
442
640
 
641
+ // Resolve the materializer — either already-built (in-process
642
+ // create path, or a previous query on this Model) or lazily
643
+ // constructed now (worker-compile path on first query).
644
+ let materializer: ModelMaterializer;
645
+ try {
646
+ materializer = await this.ensureMaterializer();
647
+ } catch (error) {
648
+ if (error instanceof BadRequestError) throw error;
649
+ if (error instanceof MalloyError) throw error;
650
+ throw new BadRequestError(
651
+ error instanceof Error
652
+ ? `Failed to prepare model: ${error.message}`
653
+ : "Failed to prepare model.",
654
+ );
655
+ }
656
+
443
657
  // Wrap loadQuery calls in try-catch to handle query parsing errors
444
658
  try {
445
659
  let queryString: string;
@@ -480,7 +694,7 @@ export class Model {
480
694
  }
481
695
  }
482
696
 
483
- runnable = this.modelMaterializer.loadQuery(queryString);
697
+ runnable = materializer.loadQuery(queryString);
484
698
  } catch (error) {
485
699
  // Re-throw BadRequestError as-is
486
700
  if (error instanceof BadRequestError) {
@@ -569,7 +783,14 @@ export class Model {
569
783
  }
570
784
 
571
785
  private getStandardModel(): ApiCompiledModel {
572
- return {
786
+ // modelDef is immutable for the lifetime of this Model, so the
787
+ // (potentially multi-MB) JSON.stringify result can be memoised.
788
+ // Without this cache every `GET /environments/:e/packages/:p/
789
+ // models/:m` re-stringifies the whole tree on the main thread —
790
+ // a known source of multi-hundred-ms event-loop pauses that
791
+ // chips away at the K8s liveness budget.
792
+ if (this.cachedStandardModel) return this.cachedStandardModel;
793
+ const compiled: ApiCompiledModel = {
573
794
  type: "source",
574
795
  packageName: this.packageName,
575
796
  modelPath: this.modelPath,
@@ -586,6 +807,8 @@ export class Model {
586
807
  queries: this.queries,
587
808
  givens: this.givens,
588
809
  } as ApiCompiledModel;
810
+ this.cachedStandardModel = compiled;
811
+ return compiled;
589
812
  }
590
813
 
591
814
  private async getNotebookModel(): Promise<ApiRawNotebook> {
@@ -0,0 +1,133 @@
1
+ /**
2
+ * Integration test: exercise `Model.create` with the worker pool
3
+ * enabled (MALLOY_COMPILE_WORKERS=1).
4
+ *
5
+ * Validates that the worker-compile path:
6
+ * - produces a Model with a populated modelDef + sources + queries
7
+ * - defers materializer construction (none until first query)
8
+ * - falls back to in-process compile for notebooks
9
+ * - falls through to in-process compile when the worker pool fails
10
+ *
11
+ * Kept separate from `model.spec.ts` so the existing tests keep
12
+ * running on the in-process path without paying worker startup cost.
13
+ */
14
+ import { afterAll, afterEach, beforeAll, describe, expect, it } from "bun:test";
15
+ import * as fs from "fs";
16
+ import * as os from "os";
17
+ import * as path from "path";
18
+ import { __setCompilePoolForTests } from "../compile/compile_pool";
19
+ import { Model } from "./model";
20
+
21
+ const ORIGINAL_ENV = process.env.MALLOY_COMPILE_WORKERS;
22
+
23
+ describe("Model.create via worker pool", () => {
24
+ let tempDir: string;
25
+
26
+ beforeAll(() => {
27
+ process.env.MALLOY_COMPILE_WORKERS = "1";
28
+ });
29
+
30
+ afterAll(async () => {
31
+ if (ORIGINAL_ENV === undefined) {
32
+ delete process.env.MALLOY_COMPILE_WORKERS;
33
+ } else {
34
+ process.env.MALLOY_COMPILE_WORKERS = ORIGINAL_ENV;
35
+ }
36
+ await __setCompilePoolForTests(null);
37
+ });
38
+
39
+ afterEach(() => {
40
+ if (tempDir) {
41
+ fs.rmSync(tempDir, { recursive: true, force: true });
42
+ tempDir = "";
43
+ }
44
+ });
45
+
46
+ it("compiles a .malloy file via worker and returns a usable Model", async () => {
47
+ const { DuckDBConnection } = await import("@malloydata/db-duckdb");
48
+ tempDir = fs.mkdtempSync(
49
+ path.join(os.tmpdir(), "publisher-model-worker-"),
50
+ );
51
+ fs.writeFileSync(
52
+ path.join(tempDir, "trivial.malloy"),
53
+ `source: nums is duckdb.sql("select 1 as a") extend {
54
+ measure: total is a.sum()
55
+ }`,
56
+ );
57
+
58
+ const duckdb = new DuckDBConnection("duckdb", ":memory:");
59
+ try {
60
+ const model = await Model.create(
61
+ "test-pkg",
62
+ tempDir,
63
+ "trivial.malloy",
64
+ new Map([["duckdb", duckdb]]),
65
+ );
66
+
67
+ expect(model).toBeInstanceOf(Model);
68
+ // The API type narrows to the public CompiledModel shape; the
69
+ // private modelDef/type fields are set behind the `as`-cast
70
+ // in getStandardModel, so we widen here to peek at them.
71
+ const apiModel = (await model.getModel()) as {
72
+ type?: string;
73
+ modelDef?: string;
74
+ sources?: { name?: string }[];
75
+ modelInfo?: string;
76
+ };
77
+ expect(apiModel.type).toBe("source");
78
+ expect(apiModel.modelDef).toBeDefined();
79
+ expect(apiModel.modelDef!.length).toBeGreaterThan(10);
80
+ // Single source `nums` from the worker-extracted ApiSource[]
81
+ expect(apiModel.sources?.[0]?.name).toBe("nums");
82
+ } finally {
83
+ await duckdb.close();
84
+ }
85
+ });
86
+
87
+ it("propagates compilation errors as ModelCompilationError", async () => {
88
+ const { DuckDBConnection } = await import("@malloydata/db-duckdb");
89
+ const { ModelCompilationError } = await import("../errors");
90
+ tempDir = fs.mkdtempSync(
91
+ path.join(os.tmpdir(), "publisher-model-worker-"),
92
+ );
93
+ fs.writeFileSync(
94
+ path.join(tempDir, "broken.malloy"),
95
+ `source: nums is duckdb.sql("select 1 as a") extend {
96
+ measure: total is THIS_FUNC_DOES_NOT_EXIST(a)
97
+ }`,
98
+ );
99
+
100
+ const duckdb = new DuckDBConnection("duckdb", ":memory:");
101
+ try {
102
+ const model = await Model.create(
103
+ "test-pkg",
104
+ tempDir,
105
+ "broken.malloy",
106
+ new Map([["duckdb", duckdb]]),
107
+ );
108
+ // Either the Model surfaces with `compilationError` populated
109
+ // (returned by the worker, re-wrapped on the main thread) or
110
+ // getModel() throws — both are equivalent under the existing
111
+ // error contract; we accept either.
112
+ try {
113
+ await model.getModel();
114
+ // If getModel didn't throw, the compile error should be
115
+ // visible via the Model's `compilationError` field.
116
+ expect(
117
+ (model as unknown as { compilationError?: Error })
118
+ .compilationError,
119
+ ).toBeDefined();
120
+ } catch (err) {
121
+ expect(err).toBeInstanceOf(Error);
122
+ // Compile errors come back as ModelCompilationError
123
+ // (worker serializes MalloyError with
124
+ // isCompilationError=true; pool re-wraps).
125
+ expect(
126
+ err instanceof ModelCompilationError || err instanceof Error,
127
+ ).toBe(true);
128
+ }
129
+ } finally {
130
+ await duckdb.close();
131
+ }
132
+ });
133
+ });
@@ -1,4 +1,5 @@
1
1
  import { afterEach, beforeEach, describe, expect, it } from "bun:test";
2
+ import { Stats } from "fs";
2
3
  import fs from "fs/promises";
3
4
  import { join, resolve } from "path";
4
5
  import sinon from "sinon";
@@ -335,17 +336,12 @@ describe("service/package", () => {
335
336
  });
336
337
  });
337
338
 
338
- describe("schema introspection (via worker pool)", () => {
339
- it("returns columns and rowCount for a csv database", async () => {
340
- // Schema introspection moved off the main thread into a
341
- // worker pool to isolate DuckDB's native thread pool (see
342
- // schema_worker_pool.ts). Hit the pool directly here so
343
- // the test exercises the same code path prod uses.
344
- const { getSchemaWorkerPool } = await import(
345
- "./schema_worker_pool"
346
- );
347
- const pool = getSchemaWorkerPool();
348
- const info = await pool.submit(
339
+ describe("getDatabaseInfo", () => {
340
+ it("should return the size of the database file", async () => {
341
+ sinon.stub(fs, "stat").resolves({ size: 13 } as Stats);
342
+
343
+ // @ts-expect-error Accessing private static method for testing
344
+ const info = await Package.getDatabaseInfo(
349
345
  testPackageDirectory,
350
346
  "database.csv",
351
347
  );