@malloy-publisher/server 0.0.198 → 0.0.199
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build.ts +30 -1
- package/dist/app/api-doc.yaml +51 -0
- package/dist/app/assets/{EnvironmentPage-C7rtH4mC.js → EnvironmentPage-Dpee_Kn6.js} +1 -1
- package/dist/app/assets/{HomePage-DwkH7OrS.js → HomePage-DLRWTNoL.js} +1 -1
- package/dist/app/assets/{MainPage-D38LtZDV.js → MainPage-DsVt5QGM.js} +1 -1
- package/dist/app/assets/{ModelPage-DOol8Mz7.js → ModelPage-AwAugZ37.js} +1 -1
- package/dist/app/assets/{PackagePage-0tgzA_kO.js → PackagePage-XQ-EWGTC.js} +1 -1
- package/dist/app/assets/{RouteError-BaMsOSly.js → RouteError-3Mv8JQw7.js} +1 -1
- package/dist/app/assets/{WorkbookPage-Cx4SePkx.js → WorkbookPage-DHYYpcYc.js} +1 -1
- package/dist/app/assets/{core-CbsC6R_Y.es-Cwf6asf3.js → core-DfcpQGVP.es-DQggNOdX.js} +1 -1
- package/dist/app/assets/{index-DNofXMxi.js → index-BUp81Qdm.js} +1 -1
- package/dist/app/assets/{index-DL6BZTuw.js → index-D1pdwrUW.js} +1 -1
- package/dist/app/assets/{index-U38AyjJL.js → index-Dv5bF4Ii.js} +4 -4
- package/dist/app/assets/{index.umd-B68wGGkM.js → index.umd-CQH4LZU8.js} +1 -1
- package/dist/app/index.html +1 -1
- package/dist/instrumentation.mjs +57 -36
- package/dist/package_load_worker.mjs +12213 -0
- package/dist/server.mjs +2807 -2729
- package/package.json +2 -3
- package/src/controller/compile.controller.ts +3 -1
- package/src/controller/model.controller.ts +8 -1
- package/src/controller/query.controller.ts +3 -0
- package/src/health.spec.ts +90 -0
- package/src/health.ts +88 -45
- package/src/instrumentation.ts +50 -0
- package/src/mcp/tools/execute_query_tool.ts +12 -0
- package/src/package_load/package_load_pool.spec.ts +252 -0
- package/src/package_load/package_load_pool.ts +920 -0
- package/src/package_load/package_load_worker.ts +980 -0
- package/src/package_load/protocol.ts +336 -0
- package/src/query_param_utils.ts +18 -0
- package/src/server-old.ts +1 -1
- package/src/server.ts +36 -10
- package/src/service/db_utils.spec.ts +1 -1
- package/src/service/environment.ts +3 -2
- package/src/service/environment_store.ts +24 -3
- package/src/service/filter_integration.spec.ts +110 -0
- package/src/service/given.ts +80 -0
- package/src/service/givens_integration.spec.ts +192 -0
- package/src/service/model.spec.ts +105 -0
- package/src/service/model.ts +287 -16
- package/src/service/package.spec.ts +10 -0
- package/src/service/package.ts +257 -145
- package/src/service/package_worker_path.spec.ts +196 -0
- package/tests/integration/concurrent_package/concurrent_package.integration.spec.ts +280 -0
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wire protocol between the main thread (`PackageLoadPool`) and a
|
|
3
|
+
* package-load worker thread.
|
|
4
|
+
*
|
|
5
|
+
* Boundary
|
|
6
|
+
* --------
|
|
7
|
+
* The worker performs the **CPU-bound bulk of `Package.create`** off
|
|
8
|
+
* the main event loop:
|
|
9
|
+
*
|
|
10
|
+
* 1. Read `publisher.config.json` (cheap, but already on the worker
|
|
11
|
+
* side of the boundary so the main thread isn't blocked).
|
|
12
|
+
* 2. Compile every `.malloy` / `.malloynb` (the Malloy parser,
|
|
13
|
+
* type-checker, and IR-builder — the dominant CPU cost).
|
|
14
|
+
* 3. Return a structured-clonable POJO carrying every `modelDef`,
|
|
15
|
+
* `sourceInfos`, dataStyles, etc. that the main thread needs to
|
|
16
|
+
* reconstitute a live `Package`.
|
|
17
|
+
*
|
|
18
|
+
* Embedded database probing (`.parquet` / `.csv` schema + row count)
|
|
19
|
+
* stays on the main thread — it reuses the package's existing DuckDB
|
|
20
|
+
* connection (PR #772) and the probe queries are async-IO-bound, not
|
|
21
|
+
* CPU-bound. Keeping all native-DB handles on the main thread also
|
|
22
|
+
* sidesteps Bun crash 0x20131 where duckdb-native cannot be loaded
|
|
23
|
+
* into more than one isolate of the same process.
|
|
24
|
+
*
|
|
25
|
+
* The main thread reconstitutes by:
|
|
26
|
+
* - Building a fresh `MalloyConfig` against its own connection pool
|
|
27
|
+
* (live native handles can't cross the worker boundary).
|
|
28
|
+
* - Lazy-hydrating each model's `ModelMaterializer` from `modelDef`
|
|
29
|
+
* via `Runtime._loadModelFromModelDef` on first query — NO
|
|
30
|
+
* recompile. This is what closes the loop on PR #767's original
|
|
31
|
+
* "first-query recompile on main thread" gap.
|
|
32
|
+
*
|
|
33
|
+
* Per-model compile failures are returned in-band on
|
|
34
|
+
* `SerializedModel.compilationError` so a single bad model doesn't
|
|
35
|
+
* abort the rest of the package load. The main thread decides
|
|
36
|
+
* whether/when to surface as a fatal `Package.create` error (today
|
|
37
|
+
* it throws on the first error; `Package.reloadAllModels` keeps the
|
|
38
|
+
* failed models as placeholders in the package's model map).
|
|
39
|
+
*
|
|
40
|
+
* Whole-package failures (manifest missing, FS errors, worker
|
|
41
|
+
* crashes) come back as `LoadPackageError`. The pool main-thread
|
|
42
|
+
* half (`PackageLoadPool.loadPackage`) rejects with a deserialised
|
|
43
|
+
* Error; `Package.loadViaWorker` then rewraps any non-compile
|
|
44
|
+
* failure as `ServiceUnavailableError` so the HTTP layer responds
|
|
45
|
+
* 503 (transient, retryable) — there is no in-process fallback.
|
|
46
|
+
*
|
|
47
|
+
* Direction summary
|
|
48
|
+
* -----------------
|
|
49
|
+
* main ──▶ worker: LoadPackageRequest (start)
|
|
50
|
+
* worker ──▶ main: LoadPackageResult (success)
|
|
51
|
+
* worker ──▶ main: LoadPackageError (whole-package failure)
|
|
52
|
+
*
|
|
53
|
+
* worker ──▶ main: ConnectionMetadataRequest (proxy non-duckdb lookups)
|
|
54
|
+
* worker ──▶ main: SchemaForTablesRequest (proxy schema fetch)
|
|
55
|
+
* worker ──▶ main: SchemaForSqlRequest (proxy SQL block schema)
|
|
56
|
+
* worker ──▶ main: ReadUrlRequest (proxy non-file URL reads)
|
|
57
|
+
* main ──▶ worker: *Response (correlated by requestId)
|
|
58
|
+
*
|
|
59
|
+
* main ──▶ worker: ShutdownRequest (graceful drain)
|
|
60
|
+
* worker ──▶ main: ReadyMessage (post-init handshake)
|
|
61
|
+
*
|
|
62
|
+
* The protocol uses plain structured-clonable POJOs so the
|
|
63
|
+
* `postMessage` transfer goes through V8's structured clone — much
|
|
64
|
+
* cheaper than `JSON.stringify` for the multi-MB modelDef payloads.
|
|
65
|
+
*/
|
|
66
|
+
|
|
67
|
+
import type {
|
|
68
|
+
Annotation,
|
|
69
|
+
SQLSourceDef,
|
|
70
|
+
TableSourceDef,
|
|
71
|
+
} from "@malloydata/malloy";
|
|
72
|
+
|
|
73
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
74
|
+
// Direction: main ──▶ worker (load-package job)
|
|
75
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Connection metadata the worker needs to construct a stub
|
|
79
|
+
* `InfoConnection`. Resolved lazily — the worker asks the main thread
|
|
80
|
+
* on the first `lookupConnection(name)` call (see
|
|
81
|
+
* {@link ConnectionMetadataRequest}). We don't ship the full list
|
|
82
|
+
* upfront because Malloy only references connections by name as it
|
|
83
|
+
* encounters `<connection>.table('...')` / `<connection>.sql('...')`
|
|
84
|
+
* inside the model.
|
|
85
|
+
*/
|
|
86
|
+
export interface ConnectionMetadata {
|
|
87
|
+
name: string;
|
|
88
|
+
dialectName: string;
|
|
89
|
+
digest: string;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export interface LoadPackageRequest {
|
|
93
|
+
type: "load-package";
|
|
94
|
+
requestId: string;
|
|
95
|
+
/** Absolute path to the package directory on disk. */
|
|
96
|
+
packagePath: string;
|
|
97
|
+
/** Logical package name (used in metric labels + log fields). */
|
|
98
|
+
packageName: string;
|
|
99
|
+
/**
|
|
100
|
+
* Default connection name (passed verbatim to the worker; today
|
|
101
|
+
* always `"duckdb"` for embedded packages, but kept configurable
|
|
102
|
+
* to mirror Malloy's own surface).
|
|
103
|
+
*/
|
|
104
|
+
defaultConnectionName: string | null;
|
|
105
|
+
/** Optional row-build manifest passed through to Malloy Runtime. */
|
|
106
|
+
buildManifest?: unknown;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
110
|
+
// Direction: worker ──▶ main (load-package result)
|
|
111
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Wire shape for one compiled model in the package. Mirrors the
|
|
115
|
+
* data a main-thread `Model` constructor needs without holding a
|
|
116
|
+
* `ModelMaterializer` reference (that binds to live native
|
|
117
|
+
* connection handles and can't cross the worker boundary).
|
|
118
|
+
*
|
|
119
|
+
* `compilationError` is set when this single model failed to
|
|
120
|
+
* compile but the rest of the package is fine; the main thread
|
|
121
|
+
* decides whether to abort `Package.create`.
|
|
122
|
+
*/
|
|
123
|
+
export interface SerializedModel {
|
|
124
|
+
/** Path relative to the package root, forward-slash normalized. */
|
|
125
|
+
modelPath: string;
|
|
126
|
+
modelType: "model" | "notebook";
|
|
127
|
+
/** Set when the model compiled successfully. Wire-typed as
|
|
128
|
+
* `unknown` so the protocol module doesn't drag in the full
|
|
129
|
+
* Malloy type surface; cast to `ModelDef` on receipt. */
|
|
130
|
+
modelDef?: unknown;
|
|
131
|
+
/**
|
|
132
|
+
* Precomputed `modelDefToModelInfo(modelDef)`. Shipped from the
|
|
133
|
+
* worker so the main-thread `Model` constructor doesn't pay the
|
|
134
|
+
* derivation cost on every package load and every subsequent
|
|
135
|
+
* `getModel()` / `getNotebook()` API hit can stringify a cached
|
|
136
|
+
* object instead of recomputing.
|
|
137
|
+
*/
|
|
138
|
+
modelInfo?: unknown;
|
|
139
|
+
sourceInfos?: unknown[];
|
|
140
|
+
sources?: unknown[];
|
|
141
|
+
queries?: unknown[];
|
|
142
|
+
filterMap?: Array<[string, unknown[]]>;
|
|
143
|
+
givens?: unknown[];
|
|
144
|
+
/** Notebook (.malloynb) only — per-cell pre-extracted info. */
|
|
145
|
+
notebookCells?: SerializedNotebookCell[];
|
|
146
|
+
/** Accumulated dataStyles from sibling `.styles.json` files. */
|
|
147
|
+
dataStyles?: unknown;
|
|
148
|
+
/** Wall-clock ms spent compiling this single model in the worker. */
|
|
149
|
+
compileDurationMs?: number;
|
|
150
|
+
/** Set when the model failed to compile. */
|
|
151
|
+
compilationError?: SerializedError;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
export interface SerializedNotebookCell {
|
|
155
|
+
type: "code" | "markdown";
|
|
156
|
+
/** Raw cell text. */
|
|
157
|
+
text: string;
|
|
158
|
+
/**
|
|
159
|
+
* Per-cell ModelDef captured at the cell's point in the
|
|
160
|
+
* `extendModel` chain. The main thread hydrates a per-cell
|
|
161
|
+
* `ModelMaterializer` from this via
|
|
162
|
+
* `Runtime._loadModelFromModelDef`, so cell-level filter
|
|
163
|
+
* refinement can compile new queries against the correct scope
|
|
164
|
+
* without ever recompiling the .malloynb itself.
|
|
165
|
+
*/
|
|
166
|
+
cellModelDef?: unknown;
|
|
167
|
+
/**
|
|
168
|
+
* The final-query QueryDef for this cell, captured during the
|
|
169
|
+
* worker's compile. Main thread hydrates a `QueryMaterializer`
|
|
170
|
+
* via `ModelMaterializer._loadQueryFromQueryDef` — no recompile.
|
|
171
|
+
*/
|
|
172
|
+
cellQueryDef?: unknown;
|
|
173
|
+
newSources?: unknown[];
|
|
174
|
+
queryInfo?: unknown;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
export interface LoadPackageResult {
|
|
178
|
+
type: "load-package-result";
|
|
179
|
+
requestId: string;
|
|
180
|
+
packageMetadata: { name?: string; description?: string };
|
|
181
|
+
models: SerializedModel[];
|
|
182
|
+
/** Wall-clock ms inside the worker for the full package load. */
|
|
183
|
+
loadDurationMs: number;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
export interface LoadPackageError {
|
|
187
|
+
type: "load-package-error";
|
|
188
|
+
requestId: string;
|
|
189
|
+
error: SerializedError;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Error wire-shape. We cannot transfer `Error` instances directly
|
|
194
|
+
* across `postMessage` cleanly (Bun/Node behaviour diverges on stack
|
|
195
|
+
* propagation), so we ship a structured payload and reconstitute on
|
|
196
|
+
* the main thread.
|
|
197
|
+
*/
|
|
198
|
+
export interface SerializedError {
|
|
199
|
+
name: string;
|
|
200
|
+
message: string;
|
|
201
|
+
stack?: string;
|
|
202
|
+
/** Set when the error originated as a Malloy `MalloyError`. */
|
|
203
|
+
malloyProblems?: unknown[];
|
|
204
|
+
/** Set when the error originated as `ModelCompilationError`. */
|
|
205
|
+
isCompilationError?: boolean;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
209
|
+
// Direction: worker ──▶ main (proxy connection metadata)
|
|
210
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
211
|
+
|
|
212
|
+
export interface ConnectionMetadataRequest {
|
|
213
|
+
type: "connection-metadata";
|
|
214
|
+
requestId: string;
|
|
215
|
+
jobId: string;
|
|
216
|
+
connectionName: string;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
export interface ConnectionMetadataResponse {
|
|
220
|
+
type: "connection-metadata-response";
|
|
221
|
+
requestId: string;
|
|
222
|
+
ok: true;
|
|
223
|
+
metadata: ConnectionMetadata;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
227
|
+
// Direction: worker ──▶ main (proxy schema fetches for non-duckdb)
|
|
228
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
229
|
+
|
|
230
|
+
export interface SchemaForTablesRequest {
|
|
231
|
+
type: "schema-for-tables";
|
|
232
|
+
requestId: string;
|
|
233
|
+
/** Job this RPC belongs to (so main routes to the right config). */
|
|
234
|
+
jobId: string;
|
|
235
|
+
connectionName: string;
|
|
236
|
+
tables: Record<string, string>;
|
|
237
|
+
options: {
|
|
238
|
+
refreshTimestamp?: number;
|
|
239
|
+
modelAnnotation?: Annotation;
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
export interface SchemaForTablesResponse {
|
|
244
|
+
type: "schema-for-tables-response";
|
|
245
|
+
requestId: string;
|
|
246
|
+
ok: true;
|
|
247
|
+
schemas: Record<string, TableSourceDef>;
|
|
248
|
+
errors: Record<string, string>;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
export interface SchemaForSqlRequest {
|
|
252
|
+
type: "schema-for-sql";
|
|
253
|
+
requestId: string;
|
|
254
|
+
jobId: string;
|
|
255
|
+
connectionName: string;
|
|
256
|
+
sentence: unknown;
|
|
257
|
+
options: {
|
|
258
|
+
refreshTimestamp?: number;
|
|
259
|
+
modelAnnotation?: Annotation;
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
export interface SchemaForSqlResponse {
|
|
264
|
+
type: "schema-for-sql-response";
|
|
265
|
+
requestId: string;
|
|
266
|
+
ok: true;
|
|
267
|
+
structDef?: SQLSourceDef;
|
|
268
|
+
error?: string;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
export interface RpcErrorResponse {
|
|
272
|
+
type: "rpc-error";
|
|
273
|
+
requestId: string;
|
|
274
|
+
ok: false;
|
|
275
|
+
error: SerializedError;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
279
|
+
// Direction: worker ──▶ main (file read for non-file URLs)
|
|
280
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
281
|
+
|
|
282
|
+
/**
|
|
283
|
+
* Workers read most files directly via `fs` (they share the host's
|
|
284
|
+
* filesystem namespace). This RPC exists for the rare case where the
|
|
285
|
+
* package URL reader has host-specific behaviour (e.g. virtual files,
|
|
286
|
+
* remote URLs) — we delegate back to the main thread's URL reader so
|
|
287
|
+
* compile semantics stay identical to the in-process path.
|
|
288
|
+
*/
|
|
289
|
+
export interface ReadUrlRequest {
|
|
290
|
+
type: "read-url";
|
|
291
|
+
requestId: string;
|
|
292
|
+
jobId: string;
|
|
293
|
+
url: string;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
export interface ReadUrlResponse {
|
|
297
|
+
type: "read-url-response";
|
|
298
|
+
requestId: string;
|
|
299
|
+
ok: true;
|
|
300
|
+
contents: string;
|
|
301
|
+
invalidationKey?: string | number | null;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
305
|
+
// Lifecycle
|
|
306
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
307
|
+
|
|
308
|
+
export interface ShutdownRequest {
|
|
309
|
+
type: "shutdown";
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
export interface ReadyMessage {
|
|
313
|
+
type: "ready";
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
317
|
+
// Union types for routing
|
|
318
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
319
|
+
|
|
320
|
+
export type MainToWorkerMessage =
|
|
321
|
+
| LoadPackageRequest
|
|
322
|
+
| ConnectionMetadataResponse
|
|
323
|
+
| SchemaForTablesResponse
|
|
324
|
+
| SchemaForSqlResponse
|
|
325
|
+
| ReadUrlResponse
|
|
326
|
+
| RpcErrorResponse
|
|
327
|
+
| ShutdownRequest;
|
|
328
|
+
|
|
329
|
+
export type WorkerToMainMessage =
|
|
330
|
+
| LoadPackageResult
|
|
331
|
+
| LoadPackageError
|
|
332
|
+
| ConnectionMetadataRequest
|
|
333
|
+
| SchemaForTablesRequest
|
|
334
|
+
| SchemaForSqlRequest
|
|
335
|
+
| ReadUrlRequest
|
|
336
|
+
| ReadyMessage;
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Express query-param normalization helpers.
|
|
3
|
+
*
|
|
4
|
+
* Kept in a standalone file (no transitive imports) so unit specs can
|
|
5
|
+
* exercise them without dragging in `server.ts` — which transitively
|
|
6
|
+
* constructs `EnvironmentStore` and kicks off an async storage init
|
|
7
|
+
* (clone of `malloy-samples`, package downloads, ...). When that init
|
|
8
|
+
* runs in a `bun test` process it races the test runner's exit and
|
|
9
|
+
* leaves a partially-populated `publisher_data/` on disk, which the
|
|
10
|
+
* next process (integration tests) then trips over.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/** Normalize an Express query param into a string[] or undefined. */
|
|
14
|
+
export function normalizeQueryArray(value: unknown): string[] | undefined {
|
|
15
|
+
if (value === undefined || value === null) return undefined;
|
|
16
|
+
if (Array.isArray(value)) return value.map(String);
|
|
17
|
+
return [String(value)];
|
|
18
|
+
}
|
package/src/server-old.ts
CHANGED
|
@@ -41,7 +41,7 @@ import {
|
|
|
41
41
|
NotImplementedError,
|
|
42
42
|
} from "./errors";
|
|
43
43
|
import { logger } from "./logger";
|
|
44
|
-
import { normalizeQueryArray } from "./
|
|
44
|
+
import { normalizeQueryArray } from "./query_param_utils";
|
|
45
45
|
import { EnvironmentStore } from "./service/environment_store";
|
|
46
46
|
|
|
47
47
|
const LEGACY_API_PREFIX = "/api/v0";
|
package/src/server.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
// Pre-load the instrumentation module; the instrumentation module must be loaded before the other imports.
|
|
2
|
+
import type { GivenValue } from "@malloydata/malloy";
|
|
2
3
|
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
|
|
3
4
|
import bodyParser from "body-parser";
|
|
4
5
|
import cors from "cors";
|
|
@@ -41,14 +42,10 @@ import { registerLegacyRoutes } from "./server-old";
|
|
|
41
42
|
import { EnvironmentStore } from "./service/environment_store";
|
|
42
43
|
import { ManifestService } from "./service/manifest_service";
|
|
43
44
|
import { MaterializationService } from "./service/materialization_service";
|
|
45
|
+
import { normalizeQueryArray } from "./query_param_utils";
|
|
44
46
|
import { PackageMemoryGovernor } from "./service/package_memory_governor";
|
|
45
47
|
|
|
46
|
-
|
|
47
|
-
export function normalizeQueryArray(value: unknown): string[] | undefined {
|
|
48
|
-
if (value === undefined || value === null) return undefined;
|
|
49
|
-
if (Array.isArray(value)) return value.map(String);
|
|
50
|
-
return [String(value)];
|
|
51
|
-
}
|
|
48
|
+
export { normalizeQueryArray } from "./query_param_utils";
|
|
52
49
|
|
|
53
50
|
// Parse command line arguments
|
|
54
51
|
function parseArgs() {
|
|
@@ -122,10 +119,12 @@ function parseArgs() {
|
|
|
122
119
|
// Zero-config invocation (`npx @malloy-publisher/server`) opts in to
|
|
123
120
|
// the bundled DuckDB-only sample config so the Quick Start works
|
|
124
121
|
// without any flags. Any explicit --server_root or --config disables
|
|
125
|
-
// this — the user told us where to look. Skip in NODE_ENV=test
|
|
126
|
-
//
|
|
127
|
-
//
|
|
128
|
-
//
|
|
122
|
+
// this — the user told us where to look. Skip in NODE_ENV=test as a
|
|
123
|
+
// belt-and-suspenders so any spec that ends up evaluating this
|
|
124
|
+
// module doesn't accidentally pin the EnvironmentStore to the
|
|
125
|
+
// bundled malloy-samples config; query-param helpers have been
|
|
126
|
+
// moved to `./query_param_utils` precisely so unit specs no longer
|
|
127
|
+
// need to import this module at all.
|
|
129
128
|
if (!sawServerRoot && !sawConfig && process.env.NODE_ENV !== "test") {
|
|
130
129
|
process.env.PUBLISHER_USE_BUNDLED_DEFAULT = "true";
|
|
131
130
|
}
|
|
@@ -1110,6 +1109,18 @@ app.get(
|
|
|
1110
1109
|
const bypassFilters =
|
|
1111
1110
|
req.query.bypass_filters === "true" ? true : undefined;
|
|
1112
1111
|
|
|
1112
|
+
let givens: Record<string, GivenValue> | undefined;
|
|
1113
|
+
if (typeof req.query.givens === "string") {
|
|
1114
|
+
try {
|
|
1115
|
+
givens = JSON.parse(req.query.givens);
|
|
1116
|
+
} catch {
|
|
1117
|
+
res.status(400).json({
|
|
1118
|
+
error: "Invalid givens: must be valid JSON",
|
|
1119
|
+
});
|
|
1120
|
+
return;
|
|
1121
|
+
}
|
|
1122
|
+
}
|
|
1123
|
+
|
|
1113
1124
|
res.status(200).json(
|
|
1114
1125
|
await modelController.executeNotebookCell(
|
|
1115
1126
|
req.params.environmentName,
|
|
@@ -1118,6 +1129,7 @@ app.get(
|
|
|
1118
1129
|
cellIndex,
|
|
1119
1130
|
filterParams,
|
|
1120
1131
|
bypassFilters,
|
|
1132
|
+
givens,
|
|
1121
1133
|
),
|
|
1122
1134
|
);
|
|
1123
1135
|
} catch (error) {
|
|
@@ -1178,6 +1190,7 @@ app.post(
|
|
|
1178
1190
|
| Record<string, string | string[]>
|
|
1179
1191
|
| undefined,
|
|
1180
1192
|
req.body.bypassFilters === true ? true : undefined,
|
|
1193
|
+
req.body.givens as Record<string, GivenValue> | undefined,
|
|
1181
1194
|
),
|
|
1182
1195
|
);
|
|
1183
1196
|
} catch (error) {
|
|
@@ -1221,6 +1234,7 @@ app.post(
|
|
|
1221
1234
|
req.params.modelName,
|
|
1222
1235
|
req.body.source,
|
|
1223
1236
|
req.body.includeSql === true,
|
|
1237
|
+
req.body.givens as Record<string, GivenValue> | undefined,
|
|
1224
1238
|
);
|
|
1225
1239
|
res.status(200).json(result);
|
|
1226
1240
|
} catch (error) {
|
|
@@ -1431,6 +1445,18 @@ app.use(
|
|
|
1431
1445
|
},
|
|
1432
1446
|
);
|
|
1433
1447
|
|
|
1448
|
+
// Eagerly construct the package-load worker pool so we fail fast at
|
|
1449
|
+
// boot if PACKAGE_LOAD_WORKERS is misconfigured (e.g. set to 0, the
|
|
1450
|
+
// removed in-process fallback). Surfacing the bad config here is much
|
|
1451
|
+
// friendlier than surfacing it on the first package load, which could
|
|
1452
|
+
// be hours after start.
|
|
1453
|
+
{
|
|
1454
|
+
const { getPackageLoadPool } = await import(
|
|
1455
|
+
"./package_load/package_load_pool"
|
|
1456
|
+
);
|
|
1457
|
+
getPackageLoadPool();
|
|
1458
|
+
}
|
|
1459
|
+
|
|
1434
1460
|
const mainServer = http.createServer({ maxHeaderSize: 262144 }, app);
|
|
1435
1461
|
|
|
1436
1462
|
mainServer.timeout = 600000;
|
|
@@ -12,7 +12,7 @@ mock.module("@google-cloud/bigquery", () => ({
|
|
|
12
12
|
}));
|
|
13
13
|
|
|
14
14
|
import { Connection } from "@malloydata/malloy";
|
|
15
|
-
import { normalizeQueryArray } from "../
|
|
15
|
+
import { normalizeQueryArray } from "../query_param_utils";
|
|
16
16
|
import {
|
|
17
17
|
extractErrorDataFromError,
|
|
18
18
|
getSchemasForConnection,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { LogMessage } from "@malloydata/malloy";
|
|
1
|
+
import type { GivenValue, LogMessage } from "@malloydata/malloy";
|
|
2
2
|
import { MalloyError, Runtime } from "@malloydata/malloy";
|
|
3
3
|
import { Mutex } from "async-mutex";
|
|
4
4
|
import crypto from "crypto";
|
|
@@ -238,6 +238,7 @@ export class Environment {
|
|
|
238
238
|
modelName: string,
|
|
239
239
|
source: string,
|
|
240
240
|
includeSql: boolean = false,
|
|
241
|
+
givens?: Record<string, GivenValue>,
|
|
241
242
|
): Promise<{ problems: LogMessage[]; sql?: string }> {
|
|
242
243
|
assertSafePackageName(packageName);
|
|
243
244
|
assertSafeRelativeModelPath(modelName);
|
|
@@ -308,7 +309,7 @@ export class Environment {
|
|
|
308
309
|
if (includeSql) {
|
|
309
310
|
try {
|
|
310
311
|
const queryMaterializer = modelMaterializer.loadFinalQuery();
|
|
311
|
-
sql = await queryMaterializer.getSQL();
|
|
312
|
+
sql = await queryMaterializer.getSQL({ givens });
|
|
312
313
|
} catch {
|
|
313
314
|
// Source may not contain a runnable query (e.g. only source definitions),
|
|
314
315
|
// in which case we simply omit the sql field.
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { GetObjectCommand, S3 } from "@aws-sdk/client-s3";
|
|
2
2
|
import { Storage } from "@google-cloud/storage";
|
|
3
|
-
import AdmZip from "adm-zip";
|
|
4
3
|
import { Mutex } from "async-mutex";
|
|
5
4
|
import crypto from "crypto";
|
|
5
|
+
import extract from "extract-zip";
|
|
6
6
|
import * as fs from "fs";
|
|
7
7
|
import * as path from "path";
|
|
8
8
|
import simpleGit from "simple-git";
|
|
@@ -884,6 +884,7 @@ export class EnvironmentStore {
|
|
|
884
884
|
}
|
|
885
885
|
|
|
886
886
|
public async unzipEnvironment(absoluteEnvironmentPath: string) {
|
|
887
|
+
const startedAt = Date.now();
|
|
887
888
|
logger.info(
|
|
888
889
|
`Detected zip file at "${absoluteEnvironmentPath}". Unzipping...`,
|
|
889
890
|
);
|
|
@@ -897,8 +898,28 @@ export class EnvironmentStore {
|
|
|
897
898
|
});
|
|
898
899
|
await fs.promises.mkdir(unzippedEnvironmentPath, { recursive: true });
|
|
899
900
|
|
|
900
|
-
|
|
901
|
-
|
|
901
|
+
// Stream-extract via yauzl (wrapped by extract-zip). Each entry's
|
|
902
|
+
// inflate and write are dispatched to the libuv thread pool, so the
|
|
903
|
+
// main event loop stays responsive even for very large archives.
|
|
904
|
+
// The previous adm-zip path used fs.readFileSync + zlib.inflateRawSync
|
|
905
|
+
// on the main thread, which parked the loop long enough on multi-
|
|
906
|
+
// hundred-MB packages to fail Kubernetes liveness probes mid-extract.
|
|
907
|
+
let entryCount = 0;
|
|
908
|
+
let totalUncompressedBytes = 0;
|
|
909
|
+
await extract(absoluteEnvironmentPath, {
|
|
910
|
+
dir: path.resolve(unzippedEnvironmentPath),
|
|
911
|
+
onEntry: (entry) => {
|
|
912
|
+
entryCount += 1;
|
|
913
|
+
totalUncompressedBytes += entry.uncompressedSize ?? 0;
|
|
914
|
+
},
|
|
915
|
+
});
|
|
916
|
+
|
|
917
|
+
const mib = (totalUncompressedBytes / (1024 * 1024)).toFixed(1);
|
|
918
|
+
logger.info(
|
|
919
|
+
`Unzipped "${absoluteEnvironmentPath}" -> "${unzippedEnvironmentPath}" ` +
|
|
920
|
+
`(${entryCount} entries, ${mib} MiB uncompressed) in ` +
|
|
921
|
+
`${formatDuration(Date.now() - startedAt)}`,
|
|
922
|
+
);
|
|
902
923
|
|
|
903
924
|
return unzippedEnvironmentPath;
|
|
904
925
|
}
|
|
@@ -133,6 +133,55 @@ import "child_orders.malloy"
|
|
|
133
133
|
run: child_orders -> summary
|
|
134
134
|
`;
|
|
135
135
|
|
|
136
|
+
// Model with a given: declaration — view filters rows by the given value
|
|
137
|
+
const MODEL_WITH_GIVENS = `##! experimental.givens
|
|
138
|
+
|
|
139
|
+
given: target_region :: string is 'US'
|
|
140
|
+
|
|
141
|
+
source: orders is duckdb.table('orders') extend {
|
|
142
|
+
primary_key: order_id
|
|
143
|
+
|
|
144
|
+
measure:
|
|
145
|
+
order_count is count()
|
|
146
|
+
total_amount is sum(amount)
|
|
147
|
+
|
|
148
|
+
view: by_given_region is {
|
|
149
|
+
where: region = $target_region
|
|
150
|
+
aggregate: order_count, total_amount
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
`;
|
|
154
|
+
|
|
155
|
+
// Model with both a #(filter) annotation and a given: declaration to verify composition
|
|
156
|
+
const MODEL_WITH_GIVENS_AND_FILTER = `##! experimental.givens
|
|
157
|
+
|
|
158
|
+
given: target_region :: string is 'US'
|
|
159
|
+
|
|
160
|
+
#(filter) dimension=status type=equal
|
|
161
|
+
source: orders is duckdb.table('orders') extend {
|
|
162
|
+
primary_key: order_id
|
|
163
|
+
|
|
164
|
+
measure:
|
|
165
|
+
order_count is count()
|
|
166
|
+
total_amount is sum(amount)
|
|
167
|
+
|
|
168
|
+
view: by_given_region is {
|
|
169
|
+
where: region = $target_region
|
|
170
|
+
aggregate: order_count, total_amount
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
`;
|
|
174
|
+
|
|
175
|
+
const NOTEBOOK_GIVENS = `>>>markdown
|
|
176
|
+
# Givens Test
|
|
177
|
+
|
|
178
|
+
>>>malloy
|
|
179
|
+
import "orders_givens.malloy"
|
|
180
|
+
|
|
181
|
+
>>>malloy
|
|
182
|
+
run: orders -> by_given_region
|
|
183
|
+
`;
|
|
184
|
+
|
|
136
185
|
beforeAll(async () => {
|
|
137
186
|
await fs.mkdir(TEST_DB_DIR, { recursive: true });
|
|
138
187
|
await fs.mkdir(TEST_PKG_DIR, { recursive: true });
|
|
@@ -657,6 +706,67 @@ describe("filter integration", () => {
|
|
|
657
706
|
expect(markdownCell.type).toBe("markdown");
|
|
658
707
|
expect(markdownCell.text).toContain("Test Notebook");
|
|
659
708
|
});
|
|
709
|
+
|
|
710
|
+
it("applies givens to notebook cell execution", async () => {
|
|
711
|
+
await writeFile("orders_givens.malloy", MODEL_WITH_GIVENS);
|
|
712
|
+
await writeFile("givens_notebook.malloynb", NOTEBOOK_GIVENS);
|
|
713
|
+
const model = await Model.create(
|
|
714
|
+
"test-pkg",
|
|
715
|
+
TEST_PKG_DIR,
|
|
716
|
+
"givens_notebook.malloynb",
|
|
717
|
+
getConnections(),
|
|
718
|
+
);
|
|
719
|
+
|
|
720
|
+
// Cell 2: run: orders -> by_given_region with target_region overridden to 'EU'
|
|
721
|
+
// EU rows: (3,'EU','active',150) and (4,'EU','cancelled',75) → order_count=2, total_amount=225
|
|
722
|
+
const codeCell = await model.executeNotebookCell(
|
|
723
|
+
2,
|
|
724
|
+
undefined,
|
|
725
|
+
undefined,
|
|
726
|
+
{ target_region: "EU" },
|
|
727
|
+
);
|
|
728
|
+
expect(codeCell.result).toBeDefined();
|
|
729
|
+
|
|
730
|
+
const notebookRows = parseNotebookResult(codeCell.result!);
|
|
731
|
+
expect(notebookRows.length).toBe(1);
|
|
732
|
+
expect(Number(notebookRows[0].order_count)).toBe(2);
|
|
733
|
+
expect(Number(notebookRows[0].total_amount)).toBe(225);
|
|
734
|
+
});
|
|
735
|
+
|
|
736
|
+
it("composes givens and filterParams in notebook cell execution", async () => {
|
|
737
|
+
await writeFile(
|
|
738
|
+
"orders_givens_filter.malloy",
|
|
739
|
+
MODEL_WITH_GIVENS_AND_FILTER,
|
|
740
|
+
);
|
|
741
|
+
await writeFile(
|
|
742
|
+
"givens_filter_notebook.malloynb",
|
|
743
|
+
NOTEBOOK_GIVENS.replace(
|
|
744
|
+
"orders_givens.malloy",
|
|
745
|
+
"orders_givens_filter.malloy",
|
|
746
|
+
),
|
|
747
|
+
);
|
|
748
|
+
const model = await Model.create(
|
|
749
|
+
"test-pkg",
|
|
750
|
+
TEST_PKG_DIR,
|
|
751
|
+
"givens_filter_notebook.malloynb",
|
|
752
|
+
getConnections(),
|
|
753
|
+
);
|
|
754
|
+
|
|
755
|
+
// given restricts to APAC; filterParam restricts to active
|
|
756
|
+
// APAC + active: only (5,'APAC','active',300) → order_count=1, total_amount=300
|
|
757
|
+
const codeCell = await model.executeNotebookCell(
|
|
758
|
+
2,
|
|
759
|
+
{ status: "active" },
|
|
760
|
+
undefined,
|
|
761
|
+
{ target_region: "APAC" },
|
|
762
|
+
);
|
|
763
|
+
expect(codeCell.result).toBeDefined();
|
|
764
|
+
|
|
765
|
+
const notebookRows = parseNotebookResult(codeCell.result!);
|
|
766
|
+
expect(notebookRows.length).toBe(1);
|
|
767
|
+
expect(Number(notebookRows[0].order_count)).toBe(1);
|
|
768
|
+
expect(Number(notebookRows[0].total_amount)).toBe(300);
|
|
769
|
+
});
|
|
660
770
|
});
|
|
661
771
|
|
|
662
772
|
// -----------------------------------------------------------------------
|