@malloy-publisher/server 0.0.198 → 0.0.199

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/build.ts +30 -1
  2. package/dist/app/api-doc.yaml +51 -0
  3. package/dist/app/assets/{EnvironmentPage-C7rtH4mC.js → EnvironmentPage-Dpee_Kn6.js} +1 -1
  4. package/dist/app/assets/{HomePage-DwkH7OrS.js → HomePage-DLRWTNoL.js} +1 -1
  5. package/dist/app/assets/{MainPage-D38LtZDV.js → MainPage-DsVt5QGM.js} +1 -1
  6. package/dist/app/assets/{ModelPage-DOol8Mz7.js → ModelPage-AwAugZ37.js} +1 -1
  7. package/dist/app/assets/{PackagePage-0tgzA_kO.js → PackagePage-XQ-EWGTC.js} +1 -1
  8. package/dist/app/assets/{RouteError-BaMsOSly.js → RouteError-3Mv8JQw7.js} +1 -1
  9. package/dist/app/assets/{WorkbookPage-Cx4SePkx.js → WorkbookPage-DHYYpcYc.js} +1 -1
  10. package/dist/app/assets/{core-CbsC6R_Y.es-Cwf6asf3.js → core-DfcpQGVP.es-DQggNOdX.js} +1 -1
  11. package/dist/app/assets/{index-DNofXMxi.js → index-BUp81Qdm.js} +1 -1
  12. package/dist/app/assets/{index-DL6BZTuw.js → index-D1pdwrUW.js} +1 -1
  13. package/dist/app/assets/{index-U38AyjJL.js → index-Dv5bF4Ii.js} +4 -4
  14. package/dist/app/assets/{index.umd-B68wGGkM.js → index.umd-CQH4LZU8.js} +1 -1
  15. package/dist/app/index.html +1 -1
  16. package/dist/instrumentation.mjs +57 -36
  17. package/dist/package_load_worker.mjs +12213 -0
  18. package/dist/server.mjs +2807 -2729
  19. package/package.json +2 -3
  20. package/src/controller/compile.controller.ts +3 -1
  21. package/src/controller/model.controller.ts +8 -1
  22. package/src/controller/query.controller.ts +3 -0
  23. package/src/health.spec.ts +90 -0
  24. package/src/health.ts +88 -45
  25. package/src/instrumentation.ts +50 -0
  26. package/src/mcp/tools/execute_query_tool.ts +12 -0
  27. package/src/package_load/package_load_pool.spec.ts +252 -0
  28. package/src/package_load/package_load_pool.ts +920 -0
  29. package/src/package_load/package_load_worker.ts +980 -0
  30. package/src/package_load/protocol.ts +336 -0
  31. package/src/query_param_utils.ts +18 -0
  32. package/src/server-old.ts +1 -1
  33. package/src/server.ts +36 -10
  34. package/src/service/db_utils.spec.ts +1 -1
  35. package/src/service/environment.ts +3 -2
  36. package/src/service/environment_store.ts +24 -3
  37. package/src/service/filter_integration.spec.ts +110 -0
  38. package/src/service/given.ts +80 -0
  39. package/src/service/givens_integration.spec.ts +192 -0
  40. package/src/service/model.spec.ts +105 -0
  41. package/src/service/model.ts +287 -16
  42. package/src/service/package.spec.ts +10 -0
  43. package/src/service/package.ts +257 -145
  44. package/src/service/package_worker_path.spec.ts +196 -0
  45. package/tests/integration/concurrent_package/concurrent_package.integration.spec.ts +280 -0
@@ -0,0 +1,336 @@
1
+ /**
2
+ * Wire protocol between the main thread (`PackageLoadPool`) and a
3
+ * package-load worker thread.
4
+ *
5
+ * Boundary
6
+ * --------
7
+ * The worker performs the **CPU-bound bulk of `Package.create`** off
8
+ * the main event loop:
9
+ *
10
+ * 1. Read `publisher.config.json` (cheap, but already on the worker
11
+ * side of the boundary so the main thread isn't blocked).
12
+ * 2. Compile every `.malloy` / `.malloynb` (the Malloy parser,
13
+ * type-checker, and IR-builder — the dominant CPU cost).
14
+ * 3. Return a structured-clonable POJO carrying every `modelDef`,
15
+ * `sourceInfos`, dataStyles, etc. that the main thread needs to
16
+ * reconstitute a live `Package`.
17
+ *
18
+ * Embedded database probing (`.parquet` / `.csv` schema + row count)
19
+ * stays on the main thread — it reuses the package's existing DuckDB
20
+ * connection (PR #772) and the probe queries are async-IO-bound, not
21
+ * CPU-bound. Keeping all native-DB handles on the main thread also
22
+ * sidesteps Bun crash 0x20131 where duckdb-native cannot be loaded
23
+ * into more than one isolate of the same process.
24
+ *
25
+ * The main thread reconstitutes by:
26
+ * - Building a fresh `MalloyConfig` against its own connection pool
27
+ * (live native handles can't cross the worker boundary).
28
+ * - Lazy-hydrating each model's `ModelMaterializer` from `modelDef`
29
+ * via `Runtime._loadModelFromModelDef` on first query — NO
30
+ * recompile. This is what closes the loop on PR #767's original
31
+ * "first-query recompile on main thread" gap.
32
+ *
33
+ * Per-model compile failures are returned in-band on
34
+ * `SerializedModel.compilationError` so a single bad model doesn't
35
+ * abort the rest of the package load. The main thread decides
36
+ * whether/when to surface as a fatal `Package.create` error (today
37
+ * it throws on the first error; `Package.reloadAllModels` keeps the
38
+ * failed models as placeholders in the package's model map).
39
+ *
40
+ * Whole-package failures (manifest missing, FS errors, worker
41
+ * crashes) come back as `LoadPackageError`. The pool main-thread
42
+ * half (`PackageLoadPool.loadPackage`) rejects with a deserialised
43
+ * Error; `Package.loadViaWorker` then rewraps any non-compile
44
+ * failure as `ServiceUnavailableError` so the HTTP layer responds
45
+ * 503 (transient, retryable) — there is no in-process fallback.
46
+ *
47
+ * Direction summary
48
+ * -----------------
49
+ * main ──▶ worker: LoadPackageRequest (start)
50
+ * worker ──▶ main: LoadPackageResult (success)
51
+ * worker ──▶ main: LoadPackageError (whole-package failure)
52
+ *
53
+ * worker ──▶ main: ConnectionMetadataRequest (proxy non-duckdb lookups)
54
+ * worker ──▶ main: SchemaForTablesRequest (proxy schema fetch)
55
+ * worker ──▶ main: SchemaForSqlRequest (proxy SQL block schema)
56
+ * worker ──▶ main: ReadUrlRequest (proxy non-file URL reads)
57
+ * main ──▶ worker: *Response (correlated by requestId)
58
+ *
59
+ * main ──▶ worker: ShutdownRequest (graceful drain)
60
+ * worker ──▶ main: ReadyMessage (post-init handshake)
61
+ *
62
+ * The protocol uses plain structured-clonable POJOs so the
63
+ * `postMessage` transfer goes through V8's structured clone — much
64
+ * cheaper than `JSON.stringify` for the multi-MB modelDef payloads.
65
+ */
66
+
67
+ import type {
68
+ Annotation,
69
+ SQLSourceDef,
70
+ TableSourceDef,
71
+ } from "@malloydata/malloy";
72
+
73
+ // ──────────────────────────────────────────────────────────────────────
74
+ // Direction: main ──▶ worker (load-package job)
75
+ // ──────────────────────────────────────────────────────────────────────
76
+
77
+ /**
78
+ * Connection metadata the worker needs to construct a stub
79
+ * `InfoConnection`. Resolved lazily — the worker asks the main thread
80
+ * on the first `lookupConnection(name)` call (see
81
+ * {@link ConnectionMetadataRequest}). We don't ship the full list
82
+ * upfront because Malloy only references connections by name as it
83
+ * encounters `<connection>.table('...')` / `<connection>.sql('...')`
84
+ * inside the model.
85
+ */
86
+ export interface ConnectionMetadata {
87
+ name: string;
88
+ dialectName: string;
89
+ digest: string;
90
+ }
91
+
92
+ export interface LoadPackageRequest {
93
+ type: "load-package";
94
+ requestId: string;
95
+ /** Absolute path to the package directory on disk. */
96
+ packagePath: string;
97
+ /** Logical package name (used in metric labels + log fields). */
98
+ packageName: string;
99
+ /**
100
+ * Default connection name (passed verbatim to the worker; today
101
+ * always `"duckdb"` for embedded packages, but kept configurable
102
+ * to mirror Malloy's own surface).
103
+ */
104
+ defaultConnectionName: string | null;
105
+ /** Optional row-build manifest passed through to Malloy Runtime. */
106
+ buildManifest?: unknown;
107
+ }
108
+
109
+ // ──────────────────────────────────────────────────────────────────────
110
+ // Direction: worker ──▶ main (load-package result)
111
+ // ──────────────────────────────────────────────────────────────────────
112
+
113
+ /**
114
+ * Wire shape for one compiled model in the package. Mirrors the
115
+ * data a main-thread `Model` constructor needs without holding a
116
+ * `ModelMaterializer` reference (that binds to live native
117
+ * connection handles and can't cross the worker boundary).
118
+ *
119
+ * `compilationError` is set when this single model failed to
120
+ * compile but the rest of the package is fine; the main thread
121
+ * decides whether to abort `Package.create`.
122
+ */
123
+ export interface SerializedModel {
124
+ /** Path relative to the package root, forward-slash normalized. */
125
+ modelPath: string;
126
+ modelType: "model" | "notebook";
127
+ /** Set when the model compiled successfully. Wire-typed as
128
+ * `unknown` so the protocol module doesn't drag in the full
129
+ * Malloy type surface; cast to `ModelDef` on receipt. */
130
+ modelDef?: unknown;
131
+ /**
132
+ * Precomputed `modelDefToModelInfo(modelDef)`. Shipped from the
133
+ * worker so the main-thread `Model` constructor doesn't pay the
134
+ * derivation cost on every package load and every subsequent
135
+ * `getModel()` / `getNotebook()` API hit can stringify a cached
136
+ * object instead of recomputing.
137
+ */
138
+ modelInfo?: unknown;
139
+ sourceInfos?: unknown[];
140
+ sources?: unknown[];
141
+ queries?: unknown[];
142
+ filterMap?: Array<[string, unknown[]]>;
143
+ givens?: unknown[];
144
+ /** Notebook (.malloynb) only — per-cell pre-extracted info. */
145
+ notebookCells?: SerializedNotebookCell[];
146
+ /** Accumulated dataStyles from sibling `.styles.json` files. */
147
+ dataStyles?: unknown;
148
+ /** Wall-clock ms spent compiling this single model in the worker. */
149
+ compileDurationMs?: number;
150
+ /** Set when the model failed to compile. */
151
+ compilationError?: SerializedError;
152
+ }
153
+
154
+ export interface SerializedNotebookCell {
155
+ type: "code" | "markdown";
156
+ /** Raw cell text. */
157
+ text: string;
158
+ /**
159
+ * Per-cell ModelDef captured at the cell's point in the
160
+ * `extendModel` chain. The main thread hydrates a per-cell
161
+ * `ModelMaterializer` from this via
162
+ * `Runtime._loadModelFromModelDef`, so cell-level filter
163
+ * refinement can compile new queries against the correct scope
164
+ * without ever recompiling the .malloynb itself.
165
+ */
166
+ cellModelDef?: unknown;
167
+ /**
168
+ * The final-query QueryDef for this cell, captured during the
169
+ * worker's compile. Main thread hydrates a `QueryMaterializer`
170
+ * via `ModelMaterializer._loadQueryFromQueryDef` — no recompile.
171
+ */
172
+ cellQueryDef?: unknown;
173
+ newSources?: unknown[];
174
+ queryInfo?: unknown;
175
+ }
176
+
177
+ export interface LoadPackageResult {
178
+ type: "load-package-result";
179
+ requestId: string;
180
+ packageMetadata: { name?: string; description?: string };
181
+ models: SerializedModel[];
182
+ /** Wall-clock ms inside the worker for the full package load. */
183
+ loadDurationMs: number;
184
+ }
185
+
186
+ export interface LoadPackageError {
187
+ type: "load-package-error";
188
+ requestId: string;
189
+ error: SerializedError;
190
+ }
191
+
192
+ /**
193
+ * Error wire-shape. We cannot transfer `Error` instances directly
194
+ * across `postMessage` cleanly (Bun/Node behaviour diverges on stack
195
+ * propagation), so we ship a structured payload and reconstitute on
196
+ * the main thread.
197
+ */
198
+ export interface SerializedError {
199
+ name: string;
200
+ message: string;
201
+ stack?: string;
202
+ /** Set when the error originated as a Malloy `MalloyError`. */
203
+ malloyProblems?: unknown[];
204
+ /** Set when the error originated as `ModelCompilationError`. */
205
+ isCompilationError?: boolean;
206
+ }
207
+
208
+ // ──────────────────────────────────────────────────────────────────────
209
+ // Direction: worker ──▶ main (proxy connection metadata)
210
+ // ──────────────────────────────────────────────────────────────────────
211
+
212
+ export interface ConnectionMetadataRequest {
213
+ type: "connection-metadata";
214
+ requestId: string;
215
+ jobId: string;
216
+ connectionName: string;
217
+ }
218
+
219
+ export interface ConnectionMetadataResponse {
220
+ type: "connection-metadata-response";
221
+ requestId: string;
222
+ ok: true;
223
+ metadata: ConnectionMetadata;
224
+ }
225
+
226
+ // ──────────────────────────────────────────────────────────────────────
227
+ // Direction: worker ──▶ main (proxy schema fetches for non-duckdb)
228
+ // ──────────────────────────────────────────────────────────────────────
229
+
230
+ export interface SchemaForTablesRequest {
231
+ type: "schema-for-tables";
232
+ requestId: string;
233
+ /** Job this RPC belongs to (so main routes to the right config). */
234
+ jobId: string;
235
+ connectionName: string;
236
+ tables: Record<string, string>;
237
+ options: {
238
+ refreshTimestamp?: number;
239
+ modelAnnotation?: Annotation;
240
+ };
241
+ }
242
+
243
+ export interface SchemaForTablesResponse {
244
+ type: "schema-for-tables-response";
245
+ requestId: string;
246
+ ok: true;
247
+ schemas: Record<string, TableSourceDef>;
248
+ errors: Record<string, string>;
249
+ }
250
+
251
+ export interface SchemaForSqlRequest {
252
+ type: "schema-for-sql";
253
+ requestId: string;
254
+ jobId: string;
255
+ connectionName: string;
256
+ sentence: unknown;
257
+ options: {
258
+ refreshTimestamp?: number;
259
+ modelAnnotation?: Annotation;
260
+ };
261
+ }
262
+
263
+ export interface SchemaForSqlResponse {
264
+ type: "schema-for-sql-response";
265
+ requestId: string;
266
+ ok: true;
267
+ structDef?: SQLSourceDef;
268
+ error?: string;
269
+ }
270
+
271
+ export interface RpcErrorResponse {
272
+ type: "rpc-error";
273
+ requestId: string;
274
+ ok: false;
275
+ error: SerializedError;
276
+ }
277
+
278
+ // ──────────────────────────────────────────────────────────────────────
279
+ // Direction: worker ──▶ main (file read for non-file URLs)
280
+ // ──────────────────────────────────────────────────────────────────────
281
+
282
+ /**
283
+ * Workers read most files directly via `fs` (they share the host's
284
+ * filesystem namespace). This RPC exists for the rare case where the
285
+ * package URL reader has host-specific behaviour (e.g. virtual files,
286
+ * remote URLs) — we delegate back to the main thread's URL reader so
287
+ * compile semantics stay identical to the in-process path.
288
+ */
289
+ export interface ReadUrlRequest {
290
+ type: "read-url";
291
+ requestId: string;
292
+ jobId: string;
293
+ url: string;
294
+ }
295
+
296
+ export interface ReadUrlResponse {
297
+ type: "read-url-response";
298
+ requestId: string;
299
+ ok: true;
300
+ contents: string;
301
+ invalidationKey?: string | number | null;
302
+ }
303
+
304
+ // ──────────────────────────────────────────────────────────────────────
305
+ // Lifecycle
306
+ // ──────────────────────────────────────────────────────────────────────
307
+
308
+ export interface ShutdownRequest {
309
+ type: "shutdown";
310
+ }
311
+
312
+ export interface ReadyMessage {
313
+ type: "ready";
314
+ }
315
+
316
+ // ──────────────────────────────────────────────────────────────────────
317
+ // Union types for routing
318
+ // ──────────────────────────────────────────────────────────────────────
319
+
320
+ export type MainToWorkerMessage =
321
+ | LoadPackageRequest
322
+ | ConnectionMetadataResponse
323
+ | SchemaForTablesResponse
324
+ | SchemaForSqlResponse
325
+ | ReadUrlResponse
326
+ | RpcErrorResponse
327
+ | ShutdownRequest;
328
+
329
+ export type WorkerToMainMessage =
330
+ | LoadPackageResult
331
+ | LoadPackageError
332
+ | ConnectionMetadataRequest
333
+ | SchemaForTablesRequest
334
+ | SchemaForSqlRequest
335
+ | ReadUrlRequest
336
+ | ReadyMessage;
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Express query-param normalization helpers.
3
+ *
4
+ * Kept in a standalone file (no transitive imports) so unit specs can
5
+ * exercise them without dragging in `server.ts` — which transitively
6
+ * constructs `EnvironmentStore` and kicks off an async storage init
7
+ * (clone of `malloy-samples`, package downloads, ...). When that init
8
+ * runs in a `bun test` process it races the test runner's exit and
9
+ * leaves a partially-populated `publisher_data/` on disk, which the
10
+ * next process (integration tests) then trips over.
11
+ */
12
+
13
+ /** Normalize an Express query param into a string[] or undefined. */
14
+ export function normalizeQueryArray(value: unknown): string[] | undefined {
15
+ if (value === undefined || value === null) return undefined;
16
+ if (Array.isArray(value)) return value.map(String);
17
+ return [String(value)];
18
+ }
package/src/server-old.ts CHANGED
@@ -41,7 +41,7 @@ import {
41
41
  NotImplementedError,
42
42
  } from "./errors";
43
43
  import { logger } from "./logger";
44
- import { normalizeQueryArray } from "./server";
44
+ import { normalizeQueryArray } from "./query_param_utils";
45
45
  import { EnvironmentStore } from "./service/environment_store";
46
46
 
47
47
  const LEGACY_API_PREFIX = "/api/v0";
package/src/server.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  // Pre-load the instrumentation module; the instrumentation module must be loaded before the other imports.
2
+ import type { GivenValue } from "@malloydata/malloy";
2
3
  import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
3
4
  import bodyParser from "body-parser";
4
5
  import cors from "cors";
@@ -41,14 +42,10 @@ import { registerLegacyRoutes } from "./server-old";
41
42
  import { EnvironmentStore } from "./service/environment_store";
42
43
  import { ManifestService } from "./service/manifest_service";
43
44
  import { MaterializationService } from "./service/materialization_service";
45
+ import { normalizeQueryArray } from "./query_param_utils";
44
46
  import { PackageMemoryGovernor } from "./service/package_memory_governor";
45
47
 
46
- /** Normalize an Express query param into a string[] or undefined. */
47
- export function normalizeQueryArray(value: unknown): string[] | undefined {
48
- if (value === undefined || value === null) return undefined;
49
- if (Array.isArray(value)) return value.map(String);
50
- return [String(value)];
51
- }
48
+ export { normalizeQueryArray } from "./query_param_utils";
52
49
 
53
50
  // Parse command line arguments
54
51
  function parseArgs() {
@@ -122,10 +119,12 @@ function parseArgs() {
122
119
  // Zero-config invocation (`npx @malloy-publisher/server`) opts in to
123
120
  // the bundled DuckDB-only sample config so the Quick Start works
124
121
  // without any flags. Any explicit --server_root or --config disables
125
- // this — the user told us where to look. Skip in NODE_ENV=test so
126
- // specs that import this module for utility helpers (e.g.
127
- // db_utils.spec.ts -> normalizeQueryArray) don't get the bundled
128
- // default leaked into their EnvironmentStore construction.
122
+ // this — the user told us where to look. Skip in NODE_ENV=test as a
123
+ // belt-and-suspenders so any spec that ends up evaluating this
124
+ // module doesn't accidentally pin the EnvironmentStore to the
125
+ // bundled malloy-samples config; query-param helpers have been
126
+ // moved to `./query_param_utils` precisely so unit specs no longer
127
+ // need to import this module at all.
129
128
  if (!sawServerRoot && !sawConfig && process.env.NODE_ENV !== "test") {
130
129
  process.env.PUBLISHER_USE_BUNDLED_DEFAULT = "true";
131
130
  }
@@ -1110,6 +1109,18 @@ app.get(
1110
1109
  const bypassFilters =
1111
1110
  req.query.bypass_filters === "true" ? true : undefined;
1112
1111
 
1112
+ let givens: Record<string, GivenValue> | undefined;
1113
+ if (typeof req.query.givens === "string") {
1114
+ try {
1115
+ givens = JSON.parse(req.query.givens);
1116
+ } catch {
1117
+ res.status(400).json({
1118
+ error: "Invalid givens: must be valid JSON",
1119
+ });
1120
+ return;
1121
+ }
1122
+ }
1123
+
1113
1124
  res.status(200).json(
1114
1125
  await modelController.executeNotebookCell(
1115
1126
  req.params.environmentName,
@@ -1118,6 +1129,7 @@ app.get(
1118
1129
  cellIndex,
1119
1130
  filterParams,
1120
1131
  bypassFilters,
1132
+ givens,
1121
1133
  ),
1122
1134
  );
1123
1135
  } catch (error) {
@@ -1178,6 +1190,7 @@ app.post(
1178
1190
  | Record<string, string | string[]>
1179
1191
  | undefined,
1180
1192
  req.body.bypassFilters === true ? true : undefined,
1193
+ req.body.givens as Record<string, GivenValue> | undefined,
1181
1194
  ),
1182
1195
  );
1183
1196
  } catch (error) {
@@ -1221,6 +1234,7 @@ app.post(
1221
1234
  req.params.modelName,
1222
1235
  req.body.source,
1223
1236
  req.body.includeSql === true,
1237
+ req.body.givens as Record<string, GivenValue> | undefined,
1224
1238
  );
1225
1239
  res.status(200).json(result);
1226
1240
  } catch (error) {
@@ -1431,6 +1445,18 @@ app.use(
1431
1445
  },
1432
1446
  );
1433
1447
 
1448
+ // Eagerly construct the package-load worker pool so we fail fast at
1449
+ // boot if PACKAGE_LOAD_WORKERS is misconfigured (e.g. set to 0, the
1450
+ // removed in-process fallback). Surfacing the bad config here is much
1451
+ // friendlier than surfacing it on the first package load, which could
1452
+ // be hours after start.
1453
+ {
1454
+ const { getPackageLoadPool } = await import(
1455
+ "./package_load/package_load_pool"
1456
+ );
1457
+ getPackageLoadPool();
1458
+ }
1459
+
1434
1460
  const mainServer = http.createServer({ maxHeaderSize: 262144 }, app);
1435
1461
 
1436
1462
  mainServer.timeout = 600000;
@@ -12,7 +12,7 @@ mock.module("@google-cloud/bigquery", () => ({
12
12
  }));
13
13
 
14
14
  import { Connection } from "@malloydata/malloy";
15
- import { normalizeQueryArray } from "../server";
15
+ import { normalizeQueryArray } from "../query_param_utils";
16
16
  import {
17
17
  extractErrorDataFromError,
18
18
  getSchemasForConnection,
@@ -1,4 +1,4 @@
1
- import type { LogMessage } from "@malloydata/malloy";
1
+ import type { GivenValue, LogMessage } from "@malloydata/malloy";
2
2
  import { MalloyError, Runtime } from "@malloydata/malloy";
3
3
  import { Mutex } from "async-mutex";
4
4
  import crypto from "crypto";
@@ -238,6 +238,7 @@ export class Environment {
238
238
  modelName: string,
239
239
  source: string,
240
240
  includeSql: boolean = false,
241
+ givens?: Record<string, GivenValue>,
241
242
  ): Promise<{ problems: LogMessage[]; sql?: string }> {
242
243
  assertSafePackageName(packageName);
243
244
  assertSafeRelativeModelPath(modelName);
@@ -308,7 +309,7 @@ export class Environment {
308
309
  if (includeSql) {
309
310
  try {
310
311
  const queryMaterializer = modelMaterializer.loadFinalQuery();
311
- sql = await queryMaterializer.getSQL();
312
+ sql = await queryMaterializer.getSQL({ givens });
312
313
  } catch {
313
314
  // Source may not contain a runnable query (e.g. only source definitions),
314
315
  // in which case we simply omit the sql field.
@@ -1,8 +1,8 @@
1
1
  import { GetObjectCommand, S3 } from "@aws-sdk/client-s3";
2
2
  import { Storage } from "@google-cloud/storage";
3
- import AdmZip from "adm-zip";
4
3
  import { Mutex } from "async-mutex";
5
4
  import crypto from "crypto";
5
+ import extract from "extract-zip";
6
6
  import * as fs from "fs";
7
7
  import * as path from "path";
8
8
  import simpleGit from "simple-git";
@@ -884,6 +884,7 @@ export class EnvironmentStore {
884
884
  }
885
885
 
886
886
  public async unzipEnvironment(absoluteEnvironmentPath: string) {
887
+ const startedAt = Date.now();
887
888
  logger.info(
888
889
  `Detected zip file at "${absoluteEnvironmentPath}". Unzipping...`,
889
890
  );
@@ -897,8 +898,28 @@ export class EnvironmentStore {
897
898
  });
898
899
  await fs.promises.mkdir(unzippedEnvironmentPath, { recursive: true });
899
900
 
900
- const zip = new AdmZip(absoluteEnvironmentPath);
901
- zip.extractAllTo(unzippedEnvironmentPath, true);
901
+ // Stream-extract via yauzl (wrapped by extract-zip). Each entry's
902
+ // inflate and write are dispatched to the libuv thread pool, so the
903
+ // main event loop stays responsive even for very large archives.
904
+ // The previous adm-zip path used fs.readFileSync + zlib.inflateRawSync
905
+ // on the main thread, which parked the loop long enough on multi-
906
+ // hundred-MB packages to fail Kubernetes liveness probes mid-extract.
907
+ let entryCount = 0;
908
+ let totalUncompressedBytes = 0;
909
+ await extract(absoluteEnvironmentPath, {
910
+ dir: path.resolve(unzippedEnvironmentPath),
911
+ onEntry: (entry) => {
912
+ entryCount += 1;
913
+ totalUncompressedBytes += entry.uncompressedSize ?? 0;
914
+ },
915
+ });
916
+
917
+ const mib = (totalUncompressedBytes / (1024 * 1024)).toFixed(1);
918
+ logger.info(
919
+ `Unzipped "${absoluteEnvironmentPath}" -> "${unzippedEnvironmentPath}" ` +
920
+ `(${entryCount} entries, ${mib} MiB uncompressed) in ` +
921
+ `${formatDuration(Date.now() - startedAt)}`,
922
+ );
902
923
 
903
924
  return unzippedEnvironmentPath;
904
925
  }
@@ -133,6 +133,55 @@ import "child_orders.malloy"
133
133
  run: child_orders -> summary
134
134
  `;
135
135
 
136
+ // Model with a given: declaration — view filters rows by the given value
137
+ const MODEL_WITH_GIVENS = `##! experimental.givens
138
+
139
+ given: target_region :: string is 'US'
140
+
141
+ source: orders is duckdb.table('orders') extend {
142
+ primary_key: order_id
143
+
144
+ measure:
145
+ order_count is count()
146
+ total_amount is sum(amount)
147
+
148
+ view: by_given_region is {
149
+ where: region = $target_region
150
+ aggregate: order_count, total_amount
151
+ }
152
+ }
153
+ `;
154
+
155
+ // Model with both a #(filter) annotation and a given: declaration to verify composition
156
+ const MODEL_WITH_GIVENS_AND_FILTER = `##! experimental.givens
157
+
158
+ given: target_region :: string is 'US'
159
+
160
+ #(filter) dimension=status type=equal
161
+ source: orders is duckdb.table('orders') extend {
162
+ primary_key: order_id
163
+
164
+ measure:
165
+ order_count is count()
166
+ total_amount is sum(amount)
167
+
168
+ view: by_given_region is {
169
+ where: region = $target_region
170
+ aggregate: order_count, total_amount
171
+ }
172
+ }
173
+ `;
174
+
175
+ const NOTEBOOK_GIVENS = `>>>markdown
176
+ # Givens Test
177
+
178
+ >>>malloy
179
+ import "orders_givens.malloy"
180
+
181
+ >>>malloy
182
+ run: orders -> by_given_region
183
+ `;
184
+
136
185
  beforeAll(async () => {
137
186
  await fs.mkdir(TEST_DB_DIR, { recursive: true });
138
187
  await fs.mkdir(TEST_PKG_DIR, { recursive: true });
@@ -657,6 +706,67 @@ describe("filter integration", () => {
657
706
  expect(markdownCell.type).toBe("markdown");
658
707
  expect(markdownCell.text).toContain("Test Notebook");
659
708
  });
709
+
710
+ it("applies givens to notebook cell execution", async () => {
711
+ await writeFile("orders_givens.malloy", MODEL_WITH_GIVENS);
712
+ await writeFile("givens_notebook.malloynb", NOTEBOOK_GIVENS);
713
+ const model = await Model.create(
714
+ "test-pkg",
715
+ TEST_PKG_DIR,
716
+ "givens_notebook.malloynb",
717
+ getConnections(),
718
+ );
719
+
720
+ // Cell 2: run: orders -> by_given_region with target_region overridden to 'EU'
721
+ // EU rows: (3,'EU','active',150) and (4,'EU','cancelled',75) → order_count=2, total_amount=225
722
+ const codeCell = await model.executeNotebookCell(
723
+ 2,
724
+ undefined,
725
+ undefined,
726
+ { target_region: "EU" },
727
+ );
728
+ expect(codeCell.result).toBeDefined();
729
+
730
+ const notebookRows = parseNotebookResult(codeCell.result!);
731
+ expect(notebookRows.length).toBe(1);
732
+ expect(Number(notebookRows[0].order_count)).toBe(2);
733
+ expect(Number(notebookRows[0].total_amount)).toBe(225);
734
+ });
735
+
736
+ it("composes givens and filterParams in notebook cell execution", async () => {
737
+ await writeFile(
738
+ "orders_givens_filter.malloy",
739
+ MODEL_WITH_GIVENS_AND_FILTER,
740
+ );
741
+ await writeFile(
742
+ "givens_filter_notebook.malloynb",
743
+ NOTEBOOK_GIVENS.replace(
744
+ "orders_givens.malloy",
745
+ "orders_givens_filter.malloy",
746
+ ),
747
+ );
748
+ const model = await Model.create(
749
+ "test-pkg",
750
+ TEST_PKG_DIR,
751
+ "givens_filter_notebook.malloynb",
752
+ getConnections(),
753
+ );
754
+
755
+ // given restricts to APAC; filterParam restricts to active
756
+ // APAC + active: only (5,'APAC','active',300) → order_count=1, total_amount=300
757
+ const codeCell = await model.executeNotebookCell(
758
+ 2,
759
+ { status: "active" },
760
+ undefined,
761
+ { target_region: "APAC" },
762
+ );
763
+ expect(codeCell.result).toBeDefined();
764
+
765
+ const notebookRows = parseNotebookResult(codeCell.result!);
766
+ expect(notebookRows.length).toBe(1);
767
+ expect(Number(notebookRows[0].order_count)).toBe(1);
768
+ expect(Number(notebookRows[0].total_amount)).toBe(300);
769
+ });
660
770
  });
661
771
 
662
772
  // -----------------------------------------------------------------------