@malloy-publisher/server 0.0.202 → 0.0.204

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/app/api-doc.yaml +25 -3
  2. package/dist/app/assets/{EnvironmentPage-CNQYDaxR.js → EnvironmentPage-CX06cjOF.js} +1 -1
  3. package/dist/app/assets/HomePage-CNFt_eUU.js +1 -0
  4. package/dist/app/assets/{MainPage-B0kNpkxT.js → MainPage-nUJ9YatG.js} +1 -1
  5. package/dist/app/assets/{PackagePage-yAh0TrOV.js → MaterializationsPage-B5goxVXW.js} +1 -1
  6. package/dist/app/assets/{ModelPage-DcVElc9L.js → ModelPage-Ba7Xh4lL.js} +1 -1
  7. package/dist/app/assets/PackagePage-BaEVdEAG.js +1 -0
  8. package/dist/app/assets/{RouteError-DknUbx_s.js → RouteError-BShQjZio.js} +1 -1
  9. package/dist/app/assets/{WorkbookPage-CCqc8otA.js → WorkbookPage-CBn6ZjJW.js} +1 -1
  10. package/dist/app/assets/{core-B3A61KGJ.es-iOUZ6RJL.js → core-DECXYL4E.es-OaRfXwuQ.js} +1 -1
  11. package/dist/app/assets/{index-W0bOLKGl.js → index-BLfPC1gy.js} +2 -2
  12. package/dist/app/assets/index-DqiJ0bWp.js +455 -0
  13. package/dist/app/assets/index-Dy3YhAZQ.js +1812 -0
  14. package/dist/app/assets/index.umd-DAN9K8yC.js +2469 -0
  15. package/dist/app/index.html +1 -1
  16. package/dist/package_load_worker.mjs +392 -67
  17. package/dist/server.mjs +418 -153
  18. package/package.json +11 -11
  19. package/src/ducklake_version.spec.ts +43 -0
  20. package/src/ducklake_version.ts +26 -0
  21. package/src/errors.ts +18 -1
  22. package/src/package_load/package_load_pool.ts +0 -5
  23. package/src/package_load/package_load_worker.ts +41 -99
  24. package/src/package_load/protocol.ts +1 -7
  25. package/src/service/annotations.spec.ts +118 -0
  26. package/src/service/annotations.ts +91 -0
  27. package/src/service/authorize.spec.ts +132 -0
  28. package/src/service/authorize.ts +241 -0
  29. package/src/service/authorize_integration.spec.ts +838 -0
  30. package/src/service/connection.ts +1 -1
  31. package/src/service/environment.ts +4 -4
  32. package/src/service/environment_store.ts +14 -2
  33. package/src/service/filter.spec.ts +14 -3
  34. package/src/service/filter.ts +5 -1
  35. package/src/service/filter_bypass.spec.ts +418 -0
  36. package/src/service/given.ts +37 -12
  37. package/src/service/givens_integration.spec.ts +34 -7
  38. package/src/service/materialization_service.ts +25 -20
  39. package/src/service/materialized_table_gc.spec.ts +6 -5
  40. package/src/service/materialized_table_gc.ts +2 -50
  41. package/src/service/model.spec.ts +203 -8
  42. package/src/service/model.ts +305 -155
  43. package/src/service/package_worker_path.spec.ts +113 -0
  44. package/src/service/quoting.ts +0 -20
  45. package/src/service/restricted_mode.spec.ts +299 -0
  46. package/src/service/source_extraction.ts +226 -0
  47. package/src/storage/StorageManager.ts +73 -0
  48. package/dist/app/assets/HomePage-DBFTIoD8.js +0 -1
  49. package/dist/app/assets/index-F_o127LC.js +0 -454
  50. package/dist/app/assets/index-QeX_e740.js +0 -1803
  51. package/dist/app/assets/index.umd-CEDRw4TK.js +0 -1145
@@ -167,6 +167,119 @@ describe("Package.create via worker pool", () => {
167
167
  }
168
168
  });
169
169
 
170
+ it("validates and surfaces a valid #(authorize) model through the worker", async () => {
171
+ writeManifest();
172
+ fs.writeFileSync(
173
+ path.join(tempDir, "gated.malloy"),
174
+ `##! experimental.givens
175
+
176
+ given:
177
+ ROLE :: string
178
+
179
+ #(authorize) "$ROLE = 'analyst'"
180
+ source: gated is duckdb.sql("select 1 as id")`,
181
+ );
182
+
183
+ const { malloyConfig, duckdb } = await makeMalloyConfig();
184
+ try {
185
+ const pkg = await Package.create("env", "pkg", tempDir, malloyConfig);
186
+ const model = pkg.getModel("gated.malloy");
187
+ const apiModel = (await model!.getModel()) as {
188
+ sources?: { name?: string; authorize?: string[] }[];
189
+ };
190
+ // The worker compiled the authorize probe (no throw) and surfaced the
191
+ // effective expression list — proves worker-path validation runs.
192
+ expect(apiModel.sources?.[0]?.authorize).toEqual([
193
+ "$ROLE = 'analyst'",
194
+ ]);
195
+ expect(model!.getAuthorize("gated")).toEqual(["$ROLE = 'analyst'"]);
196
+ } finally {
197
+ await duckdb.close();
198
+ }
199
+ });
200
+
201
+ it("rejects a package whose #(authorize) references an unknown given (worker validation)", async () => {
202
+ writeManifest();
203
+ fs.writeFileSync(
204
+ path.join(tempDir, "badgate.malloy"),
205
+ `##! experimental.givens
206
+
207
+ given:
208
+ ROLE :: string
209
+
210
+ #(authorize) "$NOPE = 'x'"
211
+ source: gated is duckdb.sql("select 1 as id")`,
212
+ );
213
+
214
+ const { ModelCompilationError } = await import("../errors");
215
+ const { malloyConfig, duckdb } = await makeMalloyConfig();
216
+ try {
217
+ // Must surface as a 424 ModelCompilationError across the worker
218
+ // boundary, not a generic 500 — the worker serializes it with
219
+ // isCompilationError so the main thread re-wraps it.
220
+ await expect(
221
+ Package.create("env", "pkg", tempDir, malloyConfig),
222
+ ).rejects.toBeInstanceOf(ModelCompilationError);
223
+ } finally {
224
+ await duckdb.close();
225
+ }
226
+ });
227
+
228
+ it("validates a valid #(authorize) source in a .malloynb notebook through the worker", async () => {
229
+ writeManifest();
230
+ fs.writeFileSync(
231
+ path.join(tempDir, "gated.malloynb"),
232
+ `>>>markdown
233
+ # Gated notebook
234
+
235
+ >>>malloy
236
+ ##! experimental.givens
237
+
238
+ given:
239
+ ROLE :: string
240
+
241
+ #(authorize) "$ROLE = 'analyst'"
242
+ source: gated is duckdb.sql("select 1 as id")`,
243
+ );
244
+
245
+ const { malloyConfig, duckdb } = await makeMalloyConfig();
246
+ try {
247
+ const pkg = await Package.create("env", "pkg", tempDir, malloyConfig);
248
+ const model = pkg.getModel("gated.malloynb");
249
+ // compileNotebookModel ran authorize validation (no throw) and
250
+ // surfaced the gate — the notebook compile path was previously
251
+ // unexercised by tests.
252
+ expect(model!.getAuthorize("gated")).toEqual(["$ROLE = 'analyst'"]);
253
+ } finally {
254
+ await duckdb.close();
255
+ }
256
+ });
257
+
258
+ it("rejects a .malloynb notebook whose #(authorize) references an unknown given", async () => {
259
+ writeManifest();
260
+ fs.writeFileSync(
261
+ path.join(tempDir, "badgate.malloynb"),
262
+ `>>>malloy
263
+ ##! experimental.givens
264
+
265
+ given:
266
+ ROLE :: string
267
+
268
+ #(authorize) "$NOPE = 'x'"
269
+ source: gated is duckdb.sql("select 1 as id")`,
270
+ );
271
+
272
+ const { ModelCompilationError } = await import("../errors");
273
+ const { malloyConfig, duckdb } = await makeMalloyConfig();
274
+ try {
275
+ await expect(
276
+ Package.create("env", "pkg", tempDir, malloyConfig),
277
+ ).rejects.toBeInstanceOf(ModelCompilationError);
278
+ } finally {
279
+ await duckdb.close();
280
+ }
281
+ });
282
+
170
283
  // NB: kept last in this describe — swapping the singleton for a
171
284
  // pre-shutdown pool also tears down the shared `pool` (the swap
172
285
  // implementation shuts down the outgoing singleton). Subsequent
@@ -1,23 +1,3 @@
1
- /**
2
- * Minimal identifier-quoting surface. Every `Dialect` in `@malloydata/malloy`
3
- * implements this; we accept the duck type so tests can inject a fake without
4
- * instantiating a full dialect.
5
- */
6
- export interface Quoter {
7
- quoteTablePath(seg: string): string;
8
- }
9
-
10
- /**
11
- * Quote a potentially schema-qualified table path (e.g. "schema.table")
12
- * by quoting each segment individually with the dialect's quoteTablePath.
13
- */
14
- export function quoteTablePath(path: string, dialect: Quoter): string {
15
- return path
16
- .split(".")
17
- .map((seg) => dialect.quoteTablePath(seg))
18
- .join(".");
19
- }
20
-
21
1
  /**
22
2
  * Split a possibly schema-qualified table name into its schema prefix
23
3
  * (including the trailing dot) and the bare table name.
@@ -0,0 +1,299 @@
1
+ /**
2
+ * Restricted-mode containment for untrusted ad-hoc query text.
3
+ *
4
+ * The `query` text that reaches `execute_query` is authored by an untrusted
5
+ * caller (an MCP/LLM client, a UI field, an HTTP body), but it runs against a
6
+ * warehouse connection that can see far more than any one model curates.
7
+ * Compiling that text with `loadRestrictedQuery` keeps the caller inside the
8
+ * model's published surface — its sources, views, dimensions and measures —
9
+ * and stops it from using Malloy as a general-purpose handle to the underlying
10
+ * database or filesystem.
11
+ *
12
+ * These tests are written from the publisher's threat model: each is a way an
13
+ * untrusted query could try to reach data or compute the model never exposed,
14
+ * paired with the assertion that restricted mode blocks it. They are not a
15
+ * re-test of Malloy's per-construct rejection logic — the point is the misuse
16
+ * scenario, not the grammar.
17
+ *
18
+ * The setup: the connection holds a `secrets` table that the `catalog` model
19
+ * never references. The model only exposes `widgets`. Any query that manages to
20
+ * return a row of `secrets` has escaped the curated surface.
21
+ */
22
+
23
+ import { DuckDBConnection } from "@malloydata/db-duckdb";
24
+ import { Connection } from "@malloydata/malloy";
25
+ import { afterAll, beforeAll, describe, expect, it } from "bun:test";
26
+ import fs from "fs/promises";
27
+ import os from "os";
28
+ import path from "path";
29
+ import { Model } from "./model";
30
+
31
+ const TEST_DIR = path.join(os.tmpdir(), "restricted-mode-tests");
32
+ const TEST_DB_DIR = path.join(TEST_DIR, "db");
33
+ const TEST_DB_PATH = path.join(TEST_DB_DIR, "test.duckdb");
34
+ const TEST_PKG_DIR = path.join(TEST_DIR, "pkg");
35
+
36
+ let duckdbConnection: DuckDBConnection;
37
+
38
+ // `widgets` is the curated, model-exposed table. `secrets` lives in the same
39
+ // connection but is never referenced by the model the caller queries — it
40
+ // stands in for any table the deployment did not mean to publish.
41
+ const SEED_SQL = `
42
+ CREATE TABLE IF NOT EXISTS widgets (
43
+ region VARCHAR,
44
+ name VARCHAR
45
+ );
46
+ INSERT INTO widgets VALUES
47
+ ('US', 'Alpha'),
48
+ ('EU', 'Beta'),
49
+ ('APAC', 'Gamma');
50
+
51
+ CREATE TABLE IF NOT EXISTS secrets (
52
+ id VARCHAR,
53
+ ssn VARCHAR
54
+ );
55
+ INSERT INTO secrets VALUES
56
+ ('1', '111-11-1111'),
57
+ ('2', '222-22-2222');
58
+ `;
59
+
60
+ // The model the ad-hoc queries are issued against. It publishes `widgets` and
61
+ // nothing else — `secrets` is deliberately absent.
62
+ const CATALOG_MODEL = `
63
+ source: widgets is duckdb.table('widgets') extend {
64
+ measure: n is count()
65
+ view: by_region is {
66
+ group_by: region
67
+ aggregate: n
68
+ }
69
+ }
70
+ `;
71
+
72
+ // A second model that DOES expose secrets. It is never loaded by the caller;
73
+ // it exists only as the target of an `import` escalation attempt.
74
+ const VAULT_MODEL = `
75
+ source: vault is duckdb.table('secrets') extend {
76
+ measure: n is count()
77
+ }
78
+ `;
79
+
80
+ beforeAll(async () => {
81
+ await fs.mkdir(TEST_DB_DIR, { recursive: true });
82
+ await fs.mkdir(TEST_PKG_DIR, { recursive: true });
83
+ duckdbConnection = new DuckDBConnection("duckdb", TEST_DB_PATH, TEST_DB_DIR);
84
+ for (const stmt of SEED_SQL.trim().split(";").filter(Boolean)) {
85
+ await duckdbConnection.runSQL(stmt.trim() + ";");
86
+ }
87
+ await fs.writeFile(
88
+ path.join(TEST_PKG_DIR, "catalog.malloy"),
89
+ CATALOG_MODEL,
90
+ "utf-8",
91
+ );
92
+ await fs.writeFile(
93
+ path.join(TEST_PKG_DIR, "vault.malloy"),
94
+ VAULT_MODEL,
95
+ "utf-8",
96
+ );
97
+ });
98
+
99
+ afterAll(async () => {
100
+ try {
101
+ await duckdbConnection.close();
102
+ await new Promise((resolve) => setTimeout(resolve, 100));
103
+ await fs.rm(TEST_DIR, { recursive: true, force: true });
104
+ } catch {
105
+ // Ignore cleanup errors
106
+ }
107
+ });
108
+
109
+ function getConnections(): Map<string, Connection> {
110
+ const map = new Map<string, Connection>();
111
+ map.set("duckdb", duckdbConnection);
112
+ return map;
113
+ }
114
+
115
+ type Row = Record<string, unknown>;
116
+
117
+ function asRows(compactResult: unknown): Row[] {
118
+ return compactResult as Row[];
119
+ }
120
+
121
+ async function makeModel(modelPath: string): Promise<Model> {
122
+ return Model.create("test-pkg", TEST_PKG_DIR, modelPath, getConnections());
123
+ }
124
+
125
+ /** Run an ad-hoc query string and return the result rows. */
126
+ async function runAdHoc(model: Model, query: string): Promise<Row[]> {
127
+ const { compactResult } = await model.getQueryResults(
128
+ undefined,
129
+ undefined,
130
+ query,
131
+ );
132
+ return asRows(compactResult);
133
+ }
134
+
135
+ /**
136
+ * Restricted-mode rejections surface as a Malloy compile error: the message
137
+ * quotes the offending source text and states the rule, and the underlying
138
+ * `problems` carry `code: 'restricted-construct-forbidden'`. We accept either
139
+ * signal so the assertion is robust to how `model.ts` re-wraps the error.
140
+ */
141
+ function looksRestricted(error: unknown): boolean {
142
+ const message = ((error as Error)?.message ?? String(error)).toLowerCase();
143
+ if (message.includes("restricted")) return true;
144
+ const problems = (error as { problems?: Array<{ code?: string }> })
145
+ ?.problems;
146
+ return (
147
+ Array.isArray(problems) &&
148
+ problems.some((p) => (p.code ?? "").includes("restricted"))
149
+ );
150
+ }
151
+
152
+ /**
153
+ * Assert an untrusted ad-hoc query is blocked before it can reach unpublished
154
+ * data. If it instead succeeds, report the row count — the caller escaped the
155
+ * curated surface and that is the leak we are guarding against.
156
+ */
157
+ async function expectBlocked(model: Model, query: string): Promise<void> {
158
+ let leakedRows: number | undefined;
159
+ try {
160
+ const rows = await runAdHoc(model, query);
161
+ leakedRows = rows.length;
162
+ } catch (error) {
163
+ expect(looksRestricted(error)).toBe(true);
164
+ return;
165
+ }
166
+ throw new Error(
167
+ `Expected the query to be blocked by restricted mode, but it succeeded ` +
168
+ `and returned ${leakedRows} rows (escaped the curated surface).`,
169
+ );
170
+ }
171
+
172
+ /**
173
+ * Same assertion as `expectBlocked`, but exercised through the named
174
+ * `sourceName`/`queryName` request shape rather than the free-form `query`
175
+ * field — those identifiers are concatenated into a `run: …` string and so are
176
+ * just as caller-controlled.
177
+ */
178
+ async function expectNamedBlocked(
179
+ model: Model,
180
+ sourceName: string | undefined,
181
+ queryName: string,
182
+ ): Promise<void> {
183
+ let leakedRows: number | undefined;
184
+ try {
185
+ const { compactResult } = await model.getQueryResults(
186
+ sourceName,
187
+ queryName,
188
+ );
189
+ leakedRows = asRows(compactResult).length;
190
+ } catch (error) {
191
+ expect(looksRestricted(error)).toBe(true);
192
+ return;
193
+ }
194
+ throw new Error(
195
+ `Expected the named-path request to be blocked by restricted mode, but ` +
196
+ `it succeeded and returned ${leakedRows} rows (escaped the curated surface).`,
197
+ );
198
+ }
199
+
200
+ // ===========================================================================
201
+ // The published surface stays fully usable — restriction must not break the
202
+ // legitimate path it is wrapped around.
203
+ // ===========================================================================
204
+
205
+ describe("the curated model surface stays usable under restriction", () => {
206
+ it("runs an ad-hoc query over a published source", async () => {
207
+ const model = await makeModel("catalog.malloy");
208
+ const rows = await runAdHoc(
209
+ model,
210
+ "run: widgets -> { group_by: region; aggregate: n is count() }",
211
+ );
212
+ expect(rows.length).toBe(3); // US, EU, APAC
213
+ });
214
+
215
+ it("runs a published named view", async () => {
216
+ const model = await makeModel("catalog.malloy");
217
+ const rows = await runAdHoc(model, "run: widgets -> by_region");
218
+ expect(rows.length).toBe(3);
219
+ });
220
+ });
221
+
222
+ // ===========================================================================
223
+ // Misuse vectors: an untrusted query trying to read `secrets`, which the
224
+ // catalog model never published. Each must be blocked.
225
+ // ===========================================================================
226
+
227
+ describe("an untrusted query cannot reach data the model never published", () => {
228
+ // The connection can see every table; the model curated only `widgets`.
229
+ // Naming another table directly would turn the model into a handle on the
230
+ // whole warehouse.
231
+ it("cannot point a source at an arbitrary warehouse table", async () => {
232
+ const model = await makeModel("catalog.malloy");
233
+ await expectBlocked(
234
+ model,
235
+ "run: duckdb.table('secrets') -> { aggregate: c is count() }",
236
+ );
237
+ });
238
+
239
+ // Raw SQL would let the caller run anything the connection's credentials
240
+ // allow — arbitrary reads, cross-table joins, even writes on a writable role.
241
+ it("cannot execute arbitrary SQL against the connection", async () => {
242
+ const model = await makeModel("catalog.malloy");
243
+ await expectBlocked(
244
+ model,
245
+ 'run: duckdb.sql("SELECT id, ssn FROM secrets") -> { group_by: ssn }',
246
+ );
247
+ });
248
+
249
+ // Importing another model would pull in surfaces the queried model chose not
250
+ // to expose (and the file path is caller-controlled).
251
+ it("cannot import another model to borrow its surface", async () => {
252
+ const model = await makeModel("catalog.malloy");
253
+ await expectBlocked(
254
+ model,
255
+ 'import "vault.malloy"\n' +
256
+ "run: vault -> { aggregate: c is count() }",
257
+ );
258
+ });
259
+
260
+ // Combining the curated surface with a raw table — joining `secrets` onto the
261
+ // published `widgets` — must not slip a raw table past the restriction.
262
+ it("cannot smuggle a raw table in through a join on a published source", async () => {
263
+ const model = await makeModel("catalog.malloy");
264
+ await expectBlocked(
265
+ model,
266
+ "source: x is widgets extend {\n" +
267
+ " join_cross: s is duckdb.table('secrets')\n" +
268
+ "}\n" +
269
+ "run: x -> { group_by: s.ssn }",
270
+ );
271
+ });
272
+ });
273
+
274
+ // ===========================================================================
275
+ // The named `sourceName`/`queryName` request shape reaches the same compiler
276
+ // path as ad-hoc text, so it must inherit the same restriction. A real name is
277
+ // a bare identifier; anything that smuggles in a disallowed construct must be
278
+ // blocked, while legitimate published names keep working.
279
+ // ===========================================================================
280
+
281
+ describe("the named source/view path is restricted too", () => {
282
+ it("blocks a disallowed construct supplied through the sourceName/queryName fields", async () => {
283
+ const model = await makeModel("catalog.malloy");
284
+ await expectNamedBlocked(
285
+ model,
286
+ "duckdb.table('secrets')",
287
+ "{ group_by: ssn }",
288
+ );
289
+ });
290
+
291
+ it("still runs a legitimate published source and view by name", async () => {
292
+ const model = await makeModel("catalog.malloy");
293
+ const { compactResult } = await model.getQueryResults(
294
+ "widgets",
295
+ "by_region",
296
+ );
297
+ expect(asRows(compactResult).length).toBe(3);
298
+ });
299
+ });
@@ -0,0 +1,226 @@
1
+ /**
2
+ * Shared source / query introspection extracted from a compiled `ModelDef`.
3
+ *
4
+ * Both the in-process `Model.create` path (`service/model.ts`) and the
5
+ * package-load worker (`package_load/package_load_worker.ts`, which runs in a
6
+ * separate bundle and serializes the result over the worker protocol) need to
7
+ * walk a `ModelDef` and produce the same `sources` / `queries` shapes plus the
8
+ * `#(filter)` `filterMap`. These two call sites used to carry byte-for-byte
9
+ * copies of this logic; keeping them in lockstep by hand was a standing hazard
10
+ * (a change to one silently diverged from the other). This module is the single
11
+ * source of truth — the two callers differ only in how they type the result
12
+ * (generated API types vs. worker wire types — structurally identical, so each
13
+ * casts at its boundary) and in how they report a filter parse failure (the
14
+ * service logs a warning; the worker has no logger and stays silent), which is
15
+ * threaded through the optional `onParseError` callback.
16
+ */
17
+
18
+ import {
19
+ isSourceDef,
20
+ ModelDef,
21
+ NamedModelObject,
22
+ NamedQueryDef,
23
+ StructDef,
24
+ TurtleDef,
25
+ } from "@malloydata/malloy";
26
+ import { annotationTexts, modelAnnotations } from "./annotations";
27
+ import { collectAuthorizeExprs, type AuthorizeMap } from "./authorize";
28
+ import { parseFilters, type FilterDefinition } from "./filter";
29
+
30
+ /** A `#(filter)` definition enriched with the dimension's Malloy type. */
31
+ export interface ExtractedFilter {
32
+ name: string;
33
+ dimension: string;
34
+ type: string;
35
+ implicit: boolean;
36
+ required: boolean;
37
+ dimensionType: string | undefined;
38
+ }
39
+
40
+ export interface ExtractedView {
41
+ name: string;
42
+ annotations: string[] | undefined;
43
+ }
44
+
45
+ /**
46
+ * Structural source shape both callers cast to their own typed view
47
+ * (`ApiSource` in the service, `ApiSourceWire` in the worker). `givens` is
48
+ * attached verbatim from the caller-supplied list, so it stays `unknown` here.
49
+ */
50
+ export interface ExtractedSource {
51
+ name: string;
52
+ annotations: string[] | undefined;
53
+ views: ExtractedView[];
54
+ filters: ExtractedFilter[] | undefined;
55
+ givens: unknown;
56
+ /**
57
+ * Effective `#(authorize)` / `##(authorize)` expressions gating this source:
58
+ * file-level expressions first, then the source's own. Undefined when the
59
+ * source carries no authorize annotations. Surfaced for introspection;
60
+ * enforcement happens server-side.
61
+ */
62
+ authorize: string[] | undefined;
63
+ }
64
+
65
+ export interface ExtractedQuery {
66
+ name: string;
67
+ sourceName: string | undefined;
68
+ annotations: string[] | undefined;
69
+ }
70
+
71
+ /**
72
+ * Extract every source from a compiled model, parsing `#(filter)` annotations
73
+ * along the way.
74
+ *
75
+ * Filters are collected by walking the `annotations.inherits` chain so that
76
+ * filters declared on a base source flow to an extending source. The chain runs
77
+ * child → parent, so we collect child-first then reverse — `parseFilters` uses
78
+ * "last wins" dedup, which lets a child's `#(filter)` override the base's.
79
+ *
80
+ * `givens` is attached unchanged to every source (Malloy exposes givens at the
81
+ * model level, not per-source). `onParseError`, when supplied, is invoked with
82
+ * the source name and error if a source's `#(filter)` annotations fail to parse;
83
+ * filter extraction then continues. Authorize parse errors are NOT routed here —
84
+ * they propagate (a malformed gate fails model load) so a security gate is never
85
+ * silently dropped.
86
+ *
87
+ * Authorize (`#(authorize)` / `##(authorize)`) is collected from the source's
88
+ * own `blockNotes` only — we do NOT walk the `inherits` chain. Note Malloy's
89
+ * behavior for `X is Y extend {...}`: if X declares its own `#(authorize)`,
90
+ * X.blockNotes holds only X's gates (Y's are dropped — the intended "curated
91
+ * re-exposure"); if X declares none, Malloy surfaces Y's blockNotes on X, so
92
+ * the base gate carries to the un-annotated extension (a safe default — a
93
+ * locked base stays locked unless an extension explicitly re-exposes itself).
94
+ * This carry happens through `blockNotes`, not the `inherits` chain, so reading
95
+ * own-blockNotes is sufficient. Joins are a separate concern and are not gated.
96
+ * The effective list per source is the file-level `##(authorize)` expressions
97
+ * (from `modelDef.annotations.notes`) followed by the source's own
98
+ * `#(authorize)` expressions, evaluated as one OR disjunction at request time.
99
+ */
100
+ export function extractSourcesFromModelDef(
101
+ modelDef: ModelDef,
102
+ givens: unknown,
103
+ onParseError?: (sourceName: string, err: unknown) => void,
104
+ ): {
105
+ sources: ExtractedSource[];
106
+ filterMap: Map<string, FilterDefinition[]>;
107
+ authorizeMap: AuthorizeMap;
108
+ } {
109
+ const filterMap = new Map<string, FilterDefinition[]>();
110
+ const authorizeMap: AuthorizeMap = new Map();
111
+
112
+ // File-level ##(authorize) is collected once and prepended to every source.
113
+ // Unlike filters, a malformed authorize annotation is NOT swallowed: the
114
+ // parse error propagates so the model fails to load loudly (caught per-model
115
+ // upstream and turned into a compilationError). Silently dropping a gate —
116
+ // and in the worker path there is no onParseError callback, so it would be
117
+ // truly silent — could leave a source that the author meant to lock looking
118
+ // unrestricted.
119
+ const fileLevelAuthorize = collectAuthorizeExprs(
120
+ (modelAnnotations(modelDef).notes ?? []).map((note) => note.text),
121
+ );
122
+
123
+ const sources: ExtractedSource[] = Object.values(modelDef.contents)
124
+ .filter((obj) => isSourceDef(obj))
125
+ .map((sourceObj) => {
126
+ const struct = sourceObj as StructDef;
127
+ const sourceName = struct.as || struct.name;
128
+ const annotations = annotationTexts(struct.annotations);
129
+
130
+ const collected: string[][] = [];
131
+ let cur = struct.annotations;
132
+ while (cur) {
133
+ if (cur.blockNotes) {
134
+ collected.push(cur.blockNotes.map((note) => note.text));
135
+ }
136
+ cur = cur.inherits;
137
+ }
138
+ const allAnnotations = collected.reverse().flat();
139
+
140
+ let filters: ExtractedFilter[] | undefined;
141
+ if (allAnnotations.length > 0) {
142
+ try {
143
+ const parsed = parseFilters(allAnnotations);
144
+ if (parsed.length > 0) {
145
+ filterMap.set(sourceName, parsed);
146
+ const fields = struct.fields;
147
+ filters = parsed.map((f) => {
148
+ const field = fields.find(
149
+ (fd) => (fd.as || fd.name) === f.dimension,
150
+ );
151
+ return {
152
+ name: f.name,
153
+ dimension: f.dimension,
154
+ type: f.type,
155
+ implicit: f.implicit,
156
+ required: f.required,
157
+ dimensionType: field?.type as string | undefined,
158
+ };
159
+ });
160
+ }
161
+ } catch (err) {
162
+ onParseError?.(sourceName, err);
163
+ }
164
+ }
165
+
166
+ // Authorize: the source's OWN #(authorize) annotations only — no
167
+ // inherits walk. File-level ##(authorize) is prepended so file gates
168
+ // and source gates form one OR disjunction. A malformed annotation
169
+ // propagates (model fails to load) rather than silently dropping the
170
+ // gate — see the file-level note above.
171
+ const ownNotes = (struct.annotations?.blockNotes ?? []).map(
172
+ (note) => note.text,
173
+ );
174
+ const effective = [
175
+ ...fileLevelAuthorize,
176
+ ...collectAuthorizeExprs(ownNotes),
177
+ ];
178
+ let authorize: string[] | undefined;
179
+ if (effective.length > 0) {
180
+ authorizeMap.set(sourceName, effective);
181
+ authorize = effective;
182
+ }
183
+
184
+ const views: ExtractedView[] = struct.fields
185
+ .filter((field) => field.type === "turtle")
186
+ .filter((turtle) =>
187
+ // Filter out non-reduce views (e.g. indexes).
188
+ (turtle as TurtleDef).pipeline
189
+ .map((stage) => stage.type)
190
+ .every((type) => type === "reduce"),
191
+ )
192
+ .map((turtle) => ({
193
+ name: turtle.as || turtle.name,
194
+ annotations: annotationTexts(turtle.annotations),
195
+ }));
196
+
197
+ return {
198
+ name: sourceName,
199
+ annotations,
200
+ views,
201
+ filters,
202
+ givens,
203
+ authorize,
204
+ };
205
+ });
206
+
207
+ return { sources, filterMap, authorizeMap };
208
+ }
209
+
210
+ /** Extract every named query from a compiled model. */
211
+ export function extractQueriesFromModelDef(
212
+ modelDef: ModelDef,
213
+ ): ExtractedQuery[] {
214
+ const isNamedQuery = (obj: NamedModelObject): obj is NamedQueryDef =>
215
+ obj.type === "query";
216
+ return Object.values(modelDef.contents)
217
+ .filter(isNamedQuery)
218
+ .map((queryObj) => ({
219
+ name: queryObj.as || queryObj.name,
220
+ sourceName:
221
+ typeof queryObj.structRef === "string"
222
+ ? queryObj.structRef
223
+ : undefined,
224
+ annotations: annotationTexts(queryObj.annotations),
225
+ }));
226
+ }