@malloy-publisher/server 0.0.203 → 0.0.204
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/app/api-doc.yaml +17 -0
- package/dist/app/assets/{EnvironmentPage-BVQ7glKP.js → EnvironmentPage-CX06cjOF.js} +1 -1
- package/dist/app/assets/HomePage-CNFt_eUU.js +1 -0
- package/dist/app/assets/{MainPage-bYOWcgDP.js → MainPage-nUJ9YatG.js} +1 -1
- package/dist/app/assets/{PackagePage-N1ZBNJul.js → MaterializationsPage-B5goxVXW.js} +1 -1
- package/dist/app/assets/{ModelPage-DT0gjNy1.js → ModelPage-Ba7Xh4lL.js} +1 -1
- package/dist/app/assets/PackagePage-BaEVdEAG.js +1 -0
- package/dist/app/assets/{RouteError-_J-EBz7W.js → RouteError-BShQjZio.js} +1 -1
- package/dist/app/assets/{WorkbookPage-Bjs9Nm-_.js → WorkbookPage-CBn6ZjJW.js} +1 -1
- package/dist/app/assets/{core-BPLlx5VM.es-C2ARtwWI.js → core-DECXYL4E.es-OaRfXwuQ.js} +1 -1
- package/dist/app/assets/{index-CqUWJELr.js → index-BLfPC1gy.js} +2 -2
- package/dist/app/assets/index-DqiJ0bWp.js +455 -0
- package/dist/app/assets/index-Dy3YhAZQ.js +1812 -0
- package/dist/app/assets/index.umd-DAN9K8yC.js +2469 -0
- package/dist/app/index.html +1 -1
- package/dist/package_load_worker.mjs +392 -67
- package/dist/server.mjs +415 -152
- package/package.json +11 -11
- package/src/ducklake_version.spec.ts +43 -0
- package/src/ducklake_version.ts +26 -0
- package/src/errors.ts +18 -1
- package/src/package_load/package_load_pool.ts +0 -5
- package/src/package_load/package_load_worker.ts +41 -99
- package/src/package_load/protocol.ts +1 -7
- package/src/service/annotations.spec.ts +118 -0
- package/src/service/annotations.ts +91 -0
- package/src/service/authorize.spec.ts +132 -0
- package/src/service/authorize.ts +241 -0
- package/src/service/authorize_integration.spec.ts +838 -0
- package/src/service/connection.ts +1 -1
- package/src/service/environment.ts +4 -4
- package/src/service/filter.spec.ts +14 -3
- package/src/service/filter.ts +5 -1
- package/src/service/filter_bypass.spec.ts +418 -0
- package/src/service/given.ts +37 -12
- package/src/service/givens_integration.spec.ts +34 -7
- package/src/service/materialization_service.ts +25 -20
- package/src/service/materialized_table_gc.spec.ts +6 -5
- package/src/service/materialized_table_gc.ts +2 -50
- package/src/service/model.spec.ts +203 -8
- package/src/service/model.ts +305 -155
- package/src/service/package_worker_path.spec.ts +113 -0
- package/src/service/quoting.ts +0 -20
- package/src/service/restricted_mode.spec.ts +299 -0
- package/src/service/source_extraction.ts +226 -0
- package/src/storage/StorageManager.ts +73 -0
- package/dist/app/assets/HomePage-D9drXoZX.js +0 -1
- package/dist/app/assets/index-BeNwIeYQ.js +0 -454
- package/dist/app/assets/index-Dx7qi2LO.js +0 -1803
- package/dist/app/assets/index.umd-BXm2lnUO.js +0 -1145
|
@@ -167,6 +167,119 @@ describe("Package.create via worker pool", () => {
|
|
|
167
167
|
}
|
|
168
168
|
});
|
|
169
169
|
|
|
170
|
+
it("validates and surfaces a valid #(authorize) model through the worker", async () => {
|
|
171
|
+
writeManifest();
|
|
172
|
+
fs.writeFileSync(
|
|
173
|
+
path.join(tempDir, "gated.malloy"),
|
|
174
|
+
`##! experimental.givens
|
|
175
|
+
|
|
176
|
+
given:
|
|
177
|
+
ROLE :: string
|
|
178
|
+
|
|
179
|
+
#(authorize) "$ROLE = 'analyst'"
|
|
180
|
+
source: gated is duckdb.sql("select 1 as id")`,
|
|
181
|
+
);
|
|
182
|
+
|
|
183
|
+
const { malloyConfig, duckdb } = await makeMalloyConfig();
|
|
184
|
+
try {
|
|
185
|
+
const pkg = await Package.create("env", "pkg", tempDir, malloyConfig);
|
|
186
|
+
const model = pkg.getModel("gated.malloy");
|
|
187
|
+
const apiModel = (await model!.getModel()) as {
|
|
188
|
+
sources?: { name?: string; authorize?: string[] }[];
|
|
189
|
+
};
|
|
190
|
+
// The worker compiled the authorize probe (no throw) and surfaced the
|
|
191
|
+
// effective expression list — proves worker-path validation runs.
|
|
192
|
+
expect(apiModel.sources?.[0]?.authorize).toEqual([
|
|
193
|
+
"$ROLE = 'analyst'",
|
|
194
|
+
]);
|
|
195
|
+
expect(model!.getAuthorize("gated")).toEqual(["$ROLE = 'analyst'"]);
|
|
196
|
+
} finally {
|
|
197
|
+
await duckdb.close();
|
|
198
|
+
}
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
it("rejects a package whose #(authorize) references an unknown given (worker validation)", async () => {
|
|
202
|
+
writeManifest();
|
|
203
|
+
fs.writeFileSync(
|
|
204
|
+
path.join(tempDir, "badgate.malloy"),
|
|
205
|
+
`##! experimental.givens
|
|
206
|
+
|
|
207
|
+
given:
|
|
208
|
+
ROLE :: string
|
|
209
|
+
|
|
210
|
+
#(authorize) "$NOPE = 'x'"
|
|
211
|
+
source: gated is duckdb.sql("select 1 as id")`,
|
|
212
|
+
);
|
|
213
|
+
|
|
214
|
+
const { ModelCompilationError } = await import("../errors");
|
|
215
|
+
const { malloyConfig, duckdb } = await makeMalloyConfig();
|
|
216
|
+
try {
|
|
217
|
+
// Must surface as a 424 ModelCompilationError across the worker
|
|
218
|
+
// boundary, not a generic 500 — the worker serializes it with
|
|
219
|
+
// isCompilationError so the main thread re-wraps it.
|
|
220
|
+
await expect(
|
|
221
|
+
Package.create("env", "pkg", tempDir, malloyConfig),
|
|
222
|
+
).rejects.toBeInstanceOf(ModelCompilationError);
|
|
223
|
+
} finally {
|
|
224
|
+
await duckdb.close();
|
|
225
|
+
}
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
it("validates a valid #(authorize) source in a .malloynb notebook through the worker", async () => {
|
|
229
|
+
writeManifest();
|
|
230
|
+
fs.writeFileSync(
|
|
231
|
+
path.join(tempDir, "gated.malloynb"),
|
|
232
|
+
`>>>markdown
|
|
233
|
+
# Gated notebook
|
|
234
|
+
|
|
235
|
+
>>>malloy
|
|
236
|
+
##! experimental.givens
|
|
237
|
+
|
|
238
|
+
given:
|
|
239
|
+
ROLE :: string
|
|
240
|
+
|
|
241
|
+
#(authorize) "$ROLE = 'analyst'"
|
|
242
|
+
source: gated is duckdb.sql("select 1 as id")`,
|
|
243
|
+
);
|
|
244
|
+
|
|
245
|
+
const { malloyConfig, duckdb } = await makeMalloyConfig();
|
|
246
|
+
try {
|
|
247
|
+
const pkg = await Package.create("env", "pkg", tempDir, malloyConfig);
|
|
248
|
+
const model = pkg.getModel("gated.malloynb");
|
|
249
|
+
// compileNotebookModel ran authorize validation (no throw) and
|
|
250
|
+
// surfaced the gate — the notebook compile path was previously
|
|
251
|
+
// unexercised by tests.
|
|
252
|
+
expect(model!.getAuthorize("gated")).toEqual(["$ROLE = 'analyst'"]);
|
|
253
|
+
} finally {
|
|
254
|
+
await duckdb.close();
|
|
255
|
+
}
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
it("rejects a .malloynb notebook whose #(authorize) references an unknown given", async () => {
|
|
259
|
+
writeManifest();
|
|
260
|
+
fs.writeFileSync(
|
|
261
|
+
path.join(tempDir, "badgate.malloynb"),
|
|
262
|
+
`>>>malloy
|
|
263
|
+
##! experimental.givens
|
|
264
|
+
|
|
265
|
+
given:
|
|
266
|
+
ROLE :: string
|
|
267
|
+
|
|
268
|
+
#(authorize) "$NOPE = 'x'"
|
|
269
|
+
source: gated is duckdb.sql("select 1 as id")`,
|
|
270
|
+
);
|
|
271
|
+
|
|
272
|
+
const { ModelCompilationError } = await import("../errors");
|
|
273
|
+
const { malloyConfig, duckdb } = await makeMalloyConfig();
|
|
274
|
+
try {
|
|
275
|
+
await expect(
|
|
276
|
+
Package.create("env", "pkg", tempDir, malloyConfig),
|
|
277
|
+
).rejects.toBeInstanceOf(ModelCompilationError);
|
|
278
|
+
} finally {
|
|
279
|
+
await duckdb.close();
|
|
280
|
+
}
|
|
281
|
+
});
|
|
282
|
+
|
|
170
283
|
// NB: kept last in this describe — swapping the singleton for a
|
|
171
284
|
// pre-shutdown pool also tears down the shared `pool` (the swap
|
|
172
285
|
// implementation shuts down the outgoing singleton). Subsequent
|
package/src/service/quoting.ts
CHANGED
|
@@ -1,23 +1,3 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Minimal identifier-quoting surface. Every `Dialect` in `@malloydata/malloy`
|
|
3
|
-
* implements this; we accept the duck type so tests can inject a fake without
|
|
4
|
-
* instantiating a full dialect.
|
|
5
|
-
*/
|
|
6
|
-
export interface Quoter {
|
|
7
|
-
quoteTablePath(seg: string): string;
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
/**
|
|
11
|
-
* Quote a potentially schema-qualified table path (e.g. "schema.table")
|
|
12
|
-
* by quoting each segment individually with the dialect's quoteTablePath.
|
|
13
|
-
*/
|
|
14
|
-
export function quoteTablePath(path: string, dialect: Quoter): string {
|
|
15
|
-
return path
|
|
16
|
-
.split(".")
|
|
17
|
-
.map((seg) => dialect.quoteTablePath(seg))
|
|
18
|
-
.join(".");
|
|
19
|
-
}
|
|
20
|
-
|
|
21
1
|
/**
|
|
22
2
|
* Split a possibly schema-qualified table name into its schema prefix
|
|
23
3
|
* (including the trailing dot) and the bare table name.
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Restricted-mode containment for untrusted ad-hoc query text.
|
|
3
|
+
*
|
|
4
|
+
* The `query` text that reaches `execute_query` is authored by an untrusted
|
|
5
|
+
* caller (an MCP/LLM client, a UI field, an HTTP body), but it runs against a
|
|
6
|
+
* warehouse connection that can see far more than any one model curates.
|
|
7
|
+
* Compiling that text with `loadRestrictedQuery` keeps the caller inside the
|
|
8
|
+
* model's published surface — its sources, views, dimensions and measures —
|
|
9
|
+
* and stops it from using Malloy as a general-purpose handle to the underlying
|
|
10
|
+
* database or filesystem.
|
|
11
|
+
*
|
|
12
|
+
* These tests are written from the publisher's threat model: each is a way an
|
|
13
|
+
* untrusted query could try to reach data or compute the model never exposed,
|
|
14
|
+
* paired with the assertion that restricted mode blocks it. They are not a
|
|
15
|
+
* re-test of Malloy's per-construct rejection logic — the point is the misuse
|
|
16
|
+
* scenario, not the grammar.
|
|
17
|
+
*
|
|
18
|
+
* The setup: the connection holds a `secrets` table that the `catalog` model
|
|
19
|
+
* never references. The model only exposes `widgets`. Any query that manages to
|
|
20
|
+
* return a row of `secrets` has escaped the curated surface.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import { DuckDBConnection } from "@malloydata/db-duckdb";
|
|
24
|
+
import { Connection } from "@malloydata/malloy";
|
|
25
|
+
import { afterAll, beforeAll, describe, expect, it } from "bun:test";
|
|
26
|
+
import fs from "fs/promises";
|
|
27
|
+
import os from "os";
|
|
28
|
+
import path from "path";
|
|
29
|
+
import { Model } from "./model";
|
|
30
|
+
|
|
31
|
+
const TEST_DIR = path.join(os.tmpdir(), "restricted-mode-tests");
|
|
32
|
+
const TEST_DB_DIR = path.join(TEST_DIR, "db");
|
|
33
|
+
const TEST_DB_PATH = path.join(TEST_DB_DIR, "test.duckdb");
|
|
34
|
+
const TEST_PKG_DIR = path.join(TEST_DIR, "pkg");
|
|
35
|
+
|
|
36
|
+
let duckdbConnection: DuckDBConnection;
|
|
37
|
+
|
|
38
|
+
// `widgets` is the curated, model-exposed table. `secrets` lives in the same
|
|
39
|
+
// connection but is never referenced by the model the caller queries — it
|
|
40
|
+
// stands in for any table the deployment did not mean to publish.
|
|
41
|
+
const SEED_SQL = `
|
|
42
|
+
CREATE TABLE IF NOT EXISTS widgets (
|
|
43
|
+
region VARCHAR,
|
|
44
|
+
name VARCHAR
|
|
45
|
+
);
|
|
46
|
+
INSERT INTO widgets VALUES
|
|
47
|
+
('US', 'Alpha'),
|
|
48
|
+
('EU', 'Beta'),
|
|
49
|
+
('APAC', 'Gamma');
|
|
50
|
+
|
|
51
|
+
CREATE TABLE IF NOT EXISTS secrets (
|
|
52
|
+
id VARCHAR,
|
|
53
|
+
ssn VARCHAR
|
|
54
|
+
);
|
|
55
|
+
INSERT INTO secrets VALUES
|
|
56
|
+
('1', '111-11-1111'),
|
|
57
|
+
('2', '222-22-2222');
|
|
58
|
+
`;
|
|
59
|
+
|
|
60
|
+
// The model the ad-hoc queries are issued against. It publishes `widgets` and
|
|
61
|
+
// nothing else — `secrets` is deliberately absent.
|
|
62
|
+
const CATALOG_MODEL = `
|
|
63
|
+
source: widgets is duckdb.table('widgets') extend {
|
|
64
|
+
measure: n is count()
|
|
65
|
+
view: by_region is {
|
|
66
|
+
group_by: region
|
|
67
|
+
aggregate: n
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
`;
|
|
71
|
+
|
|
72
|
+
// A second model that DOES expose secrets. It is never loaded by the caller;
|
|
73
|
+
// it exists only as the target of an `import` escalation attempt.
|
|
74
|
+
const VAULT_MODEL = `
|
|
75
|
+
source: vault is duckdb.table('secrets') extend {
|
|
76
|
+
measure: n is count()
|
|
77
|
+
}
|
|
78
|
+
`;
|
|
79
|
+
|
|
80
|
+
beforeAll(async () => {
|
|
81
|
+
await fs.mkdir(TEST_DB_DIR, { recursive: true });
|
|
82
|
+
await fs.mkdir(TEST_PKG_DIR, { recursive: true });
|
|
83
|
+
duckdbConnection = new DuckDBConnection("duckdb", TEST_DB_PATH, TEST_DB_DIR);
|
|
84
|
+
for (const stmt of SEED_SQL.trim().split(";").filter(Boolean)) {
|
|
85
|
+
await duckdbConnection.runSQL(stmt.trim() + ";");
|
|
86
|
+
}
|
|
87
|
+
await fs.writeFile(
|
|
88
|
+
path.join(TEST_PKG_DIR, "catalog.malloy"),
|
|
89
|
+
CATALOG_MODEL,
|
|
90
|
+
"utf-8",
|
|
91
|
+
);
|
|
92
|
+
await fs.writeFile(
|
|
93
|
+
path.join(TEST_PKG_DIR, "vault.malloy"),
|
|
94
|
+
VAULT_MODEL,
|
|
95
|
+
"utf-8",
|
|
96
|
+
);
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
afterAll(async () => {
|
|
100
|
+
try {
|
|
101
|
+
await duckdbConnection.close();
|
|
102
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
103
|
+
await fs.rm(TEST_DIR, { recursive: true, force: true });
|
|
104
|
+
} catch {
|
|
105
|
+
// Ignore cleanup errors
|
|
106
|
+
}
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
function getConnections(): Map<string, Connection> {
|
|
110
|
+
const map = new Map<string, Connection>();
|
|
111
|
+
map.set("duckdb", duckdbConnection);
|
|
112
|
+
return map;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
type Row = Record<string, unknown>;
|
|
116
|
+
|
|
117
|
+
function asRows(compactResult: unknown): Row[] {
|
|
118
|
+
return compactResult as Row[];
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
async function makeModel(modelPath: string): Promise<Model> {
|
|
122
|
+
return Model.create("test-pkg", TEST_PKG_DIR, modelPath, getConnections());
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/** Run an ad-hoc query string and return the result rows. */
|
|
126
|
+
async function runAdHoc(model: Model, query: string): Promise<Row[]> {
|
|
127
|
+
const { compactResult } = await model.getQueryResults(
|
|
128
|
+
undefined,
|
|
129
|
+
undefined,
|
|
130
|
+
query,
|
|
131
|
+
);
|
|
132
|
+
return asRows(compactResult);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Restricted-mode rejections surface as a Malloy compile error: the message
|
|
137
|
+
* quotes the offending source text and states the rule, and the underlying
|
|
138
|
+
* `problems` carry `code: 'restricted-construct-forbidden'`. We accept either
|
|
139
|
+
* signal so the assertion is robust to how `model.ts` re-wraps the error.
|
|
140
|
+
*/
|
|
141
|
+
function looksRestricted(error: unknown): boolean {
|
|
142
|
+
const message = ((error as Error)?.message ?? String(error)).toLowerCase();
|
|
143
|
+
if (message.includes("restricted")) return true;
|
|
144
|
+
const problems = (error as { problems?: Array<{ code?: string }> })
|
|
145
|
+
?.problems;
|
|
146
|
+
return (
|
|
147
|
+
Array.isArray(problems) &&
|
|
148
|
+
problems.some((p) => (p.code ?? "").includes("restricted"))
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Assert an untrusted ad-hoc query is blocked before it can reach unpublished
|
|
154
|
+
* data. If it instead succeeds, report the row count — the caller escaped the
|
|
155
|
+
* curated surface and that is the leak we are guarding against.
|
|
156
|
+
*/
|
|
157
|
+
async function expectBlocked(model: Model, query: string): Promise<void> {
|
|
158
|
+
let leakedRows: number | undefined;
|
|
159
|
+
try {
|
|
160
|
+
const rows = await runAdHoc(model, query);
|
|
161
|
+
leakedRows = rows.length;
|
|
162
|
+
} catch (error) {
|
|
163
|
+
expect(looksRestricted(error)).toBe(true);
|
|
164
|
+
return;
|
|
165
|
+
}
|
|
166
|
+
throw new Error(
|
|
167
|
+
`Expected the query to be blocked by restricted mode, but it succeeded ` +
|
|
168
|
+
`and returned ${leakedRows} rows (escaped the curated surface).`,
|
|
169
|
+
);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Same assertion as `expectBlocked`, but exercised through the named
|
|
174
|
+
* `sourceName`/`queryName` request shape rather than the free-form `query`
|
|
175
|
+
* field — those identifiers are concatenated into a `run: …` string and so are
|
|
176
|
+
* just as caller-controlled.
|
|
177
|
+
*/
|
|
178
|
+
async function expectNamedBlocked(
|
|
179
|
+
model: Model,
|
|
180
|
+
sourceName: string | undefined,
|
|
181
|
+
queryName: string,
|
|
182
|
+
): Promise<void> {
|
|
183
|
+
let leakedRows: number | undefined;
|
|
184
|
+
try {
|
|
185
|
+
const { compactResult } = await model.getQueryResults(
|
|
186
|
+
sourceName,
|
|
187
|
+
queryName,
|
|
188
|
+
);
|
|
189
|
+
leakedRows = asRows(compactResult).length;
|
|
190
|
+
} catch (error) {
|
|
191
|
+
expect(looksRestricted(error)).toBe(true);
|
|
192
|
+
return;
|
|
193
|
+
}
|
|
194
|
+
throw new Error(
|
|
195
|
+
`Expected the named-path request to be blocked by restricted mode, but ` +
|
|
196
|
+
`it succeeded and returned ${leakedRows} rows (escaped the curated surface).`,
|
|
197
|
+
);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// ===========================================================================
|
|
201
|
+
// The published surface stays fully usable — restriction must not break the
|
|
202
|
+
// legitimate path it is wrapped around.
|
|
203
|
+
// ===========================================================================
|
|
204
|
+
|
|
205
|
+
describe("the curated model surface stays usable under restriction", () => {
|
|
206
|
+
it("runs an ad-hoc query over a published source", async () => {
|
|
207
|
+
const model = await makeModel("catalog.malloy");
|
|
208
|
+
const rows = await runAdHoc(
|
|
209
|
+
model,
|
|
210
|
+
"run: widgets -> { group_by: region; aggregate: n is count() }",
|
|
211
|
+
);
|
|
212
|
+
expect(rows.length).toBe(3); // US, EU, APAC
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
it("runs a published named view", async () => {
|
|
216
|
+
const model = await makeModel("catalog.malloy");
|
|
217
|
+
const rows = await runAdHoc(model, "run: widgets -> by_region");
|
|
218
|
+
expect(rows.length).toBe(3);
|
|
219
|
+
});
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
// ===========================================================================
|
|
223
|
+
// Misuse vectors: an untrusted query trying to read `secrets`, which the
|
|
224
|
+
// catalog model never published. Each must be blocked.
|
|
225
|
+
// ===========================================================================
|
|
226
|
+
|
|
227
|
+
describe("an untrusted query cannot reach data the model never published", () => {
|
|
228
|
+
// The connection can see every table; the model curated only `widgets`.
|
|
229
|
+
// Naming another table directly would turn the model into a handle on the
|
|
230
|
+
// whole warehouse.
|
|
231
|
+
it("cannot point a source at an arbitrary warehouse table", async () => {
|
|
232
|
+
const model = await makeModel("catalog.malloy");
|
|
233
|
+
await expectBlocked(
|
|
234
|
+
model,
|
|
235
|
+
"run: duckdb.table('secrets') -> { aggregate: c is count() }",
|
|
236
|
+
);
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
// Raw SQL would let the caller run anything the connection's credentials
|
|
240
|
+
// allow — arbitrary reads, cross-table joins, even writes on a writable role.
|
|
241
|
+
it("cannot execute arbitrary SQL against the connection", async () => {
|
|
242
|
+
const model = await makeModel("catalog.malloy");
|
|
243
|
+
await expectBlocked(
|
|
244
|
+
model,
|
|
245
|
+
'run: duckdb.sql("SELECT id, ssn FROM secrets") -> { group_by: ssn }',
|
|
246
|
+
);
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
// Importing another model would pull in surfaces the queried model chose not
|
|
250
|
+
// to expose (and the file path is caller-controlled).
|
|
251
|
+
it("cannot import another model to borrow its surface", async () => {
|
|
252
|
+
const model = await makeModel("catalog.malloy");
|
|
253
|
+
await expectBlocked(
|
|
254
|
+
model,
|
|
255
|
+
'import "vault.malloy"\n' +
|
|
256
|
+
"run: vault -> { aggregate: c is count() }",
|
|
257
|
+
);
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
// Combining the curated surface with a raw table — joining `secrets` onto the
|
|
261
|
+
// published `widgets` — must not slip a raw table past the restriction.
|
|
262
|
+
it("cannot smuggle a raw table in through a join on a published source", async () => {
|
|
263
|
+
const model = await makeModel("catalog.malloy");
|
|
264
|
+
await expectBlocked(
|
|
265
|
+
model,
|
|
266
|
+
"source: x is widgets extend {\n" +
|
|
267
|
+
" join_cross: s is duckdb.table('secrets')\n" +
|
|
268
|
+
"}\n" +
|
|
269
|
+
"run: x -> { group_by: s.ssn }",
|
|
270
|
+
);
|
|
271
|
+
});
|
|
272
|
+
});
|
|
273
|
+
|
|
274
|
+
// ===========================================================================
|
|
275
|
+
// The named `sourceName`/`queryName` request shape reaches the same compiler
|
|
276
|
+
// path as ad-hoc text, so it must inherit the same restriction. A real name is
|
|
277
|
+
// a bare identifier; anything that smuggles in a disallowed construct must be
|
|
278
|
+
// blocked, while legitimate published names keep working.
|
|
279
|
+
// ===========================================================================
|
|
280
|
+
|
|
281
|
+
describe("the named source/view path is restricted too", () => {
|
|
282
|
+
it("blocks a disallowed construct supplied through the sourceName/queryName fields", async () => {
|
|
283
|
+
const model = await makeModel("catalog.malloy");
|
|
284
|
+
await expectNamedBlocked(
|
|
285
|
+
model,
|
|
286
|
+
"duckdb.table('secrets')",
|
|
287
|
+
"{ group_by: ssn }",
|
|
288
|
+
);
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
it("still runs a legitimate published source and view by name", async () => {
|
|
292
|
+
const model = await makeModel("catalog.malloy");
|
|
293
|
+
const { compactResult } = await model.getQueryResults(
|
|
294
|
+
"widgets",
|
|
295
|
+
"by_region",
|
|
296
|
+
);
|
|
297
|
+
expect(asRows(compactResult).length).toBe(3);
|
|
298
|
+
});
|
|
299
|
+
});
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared source / query introspection extracted from a compiled `ModelDef`.
|
|
3
|
+
*
|
|
4
|
+
* Both the in-process `Model.create` path (`service/model.ts`) and the
|
|
5
|
+
* package-load worker (`package_load/package_load_worker.ts`, which runs in a
|
|
6
|
+
* separate bundle and serializes the result over the worker protocol) need to
|
|
7
|
+
* walk a `ModelDef` and produce the same `sources` / `queries` shapes plus the
|
|
8
|
+
* `#(filter)` `filterMap`. These two call sites used to carry byte-for-byte
|
|
9
|
+
* copies of this logic; keeping them in lockstep by hand was a standing hazard
|
|
10
|
+
* (a change to one silently diverged from the other). This module is the single
|
|
11
|
+
* source of truth — the two callers differ only in how they type the result
|
|
12
|
+
* (generated API types vs. worker wire types — structurally identical, so each
|
|
13
|
+
* casts at its boundary) and in how they report a filter parse failure (the
|
|
14
|
+
* service logs a warning; the worker has no logger and stays silent), which is
|
|
15
|
+
* threaded through the optional `onParseError` callback.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import {
|
|
19
|
+
isSourceDef,
|
|
20
|
+
ModelDef,
|
|
21
|
+
NamedModelObject,
|
|
22
|
+
NamedQueryDef,
|
|
23
|
+
StructDef,
|
|
24
|
+
TurtleDef,
|
|
25
|
+
} from "@malloydata/malloy";
|
|
26
|
+
import { annotationTexts, modelAnnotations } from "./annotations";
|
|
27
|
+
import { collectAuthorizeExprs, type AuthorizeMap } from "./authorize";
|
|
28
|
+
import { parseFilters, type FilterDefinition } from "./filter";
|
|
29
|
+
|
|
30
|
+
/** A `#(filter)` definition enriched with the dimension's Malloy type. */
|
|
31
|
+
export interface ExtractedFilter {
|
|
32
|
+
name: string;
|
|
33
|
+
dimension: string;
|
|
34
|
+
type: string;
|
|
35
|
+
implicit: boolean;
|
|
36
|
+
required: boolean;
|
|
37
|
+
dimensionType: string | undefined;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface ExtractedView {
|
|
41
|
+
name: string;
|
|
42
|
+
annotations: string[] | undefined;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Structural source shape both callers cast to their own typed view
|
|
47
|
+
* (`ApiSource` in the service, `ApiSourceWire` in the worker). `givens` is
|
|
48
|
+
* attached verbatim from the caller-supplied list, so it stays `unknown` here.
|
|
49
|
+
*/
|
|
50
|
+
export interface ExtractedSource {
|
|
51
|
+
name: string;
|
|
52
|
+
annotations: string[] | undefined;
|
|
53
|
+
views: ExtractedView[];
|
|
54
|
+
filters: ExtractedFilter[] | undefined;
|
|
55
|
+
givens: unknown;
|
|
56
|
+
/**
|
|
57
|
+
* Effective `#(authorize)` / `##(authorize)` expressions gating this source:
|
|
58
|
+
* file-level expressions first, then the source's own. Undefined when the
|
|
59
|
+
* source carries no authorize annotations. Surfaced for introspection;
|
|
60
|
+
* enforcement happens server-side.
|
|
61
|
+
*/
|
|
62
|
+
authorize: string[] | undefined;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export interface ExtractedQuery {
|
|
66
|
+
name: string;
|
|
67
|
+
sourceName: string | undefined;
|
|
68
|
+
annotations: string[] | undefined;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Extract every source from a compiled model, parsing `#(filter)` annotations
|
|
73
|
+
* along the way.
|
|
74
|
+
*
|
|
75
|
+
* Filters are collected by walking the `annotations.inherits` chain so that
|
|
76
|
+
* filters declared on a base source flow to an extending source. The chain runs
|
|
77
|
+
* child → parent, so we collect child-first then reverse — `parseFilters` uses
|
|
78
|
+
* "last wins" dedup, which lets a child's `#(filter)` override the base's.
|
|
79
|
+
*
|
|
80
|
+
* `givens` is attached unchanged to every source (Malloy exposes givens at the
|
|
81
|
+
* model level, not per-source). `onParseError`, when supplied, is invoked with
|
|
82
|
+
* the source name and error if a source's `#(filter)` annotations fail to parse;
|
|
83
|
+
* filter extraction then continues. Authorize parse errors are NOT routed here —
|
|
84
|
+
* they propagate (a malformed gate fails model load) so a security gate is never
|
|
85
|
+
* silently dropped.
|
|
86
|
+
*
|
|
87
|
+
* Authorize (`#(authorize)` / `##(authorize)`) is collected from the source's
|
|
88
|
+
* own `blockNotes` only — we do NOT walk the `inherits` chain. Note Malloy's
|
|
89
|
+
* behavior for `X is Y extend {...}`: if X declares its own `#(authorize)`,
|
|
90
|
+
* X.blockNotes holds only X's gates (Y's are dropped — the intended "curated
|
|
91
|
+
* re-exposure"); if X declares none, Malloy surfaces Y's blockNotes on X, so
|
|
92
|
+
* the base gate carries to the un-annotated extension (a safe default — a
|
|
93
|
+
* locked base stays locked unless an extension explicitly re-exposes itself).
|
|
94
|
+
* This carry happens through `blockNotes`, not the `inherits` chain, so reading
|
|
95
|
+
* own-blockNotes is sufficient. Joins are a separate concern and are not gated.
|
|
96
|
+
* The effective list per source is the file-level `##(authorize)` expressions
|
|
97
|
+
* (from `modelDef.annotations.notes`) followed by the source's own
|
|
98
|
+
* `#(authorize)` expressions, evaluated as one OR disjunction at request time.
|
|
99
|
+
*/
|
|
100
|
+
export function extractSourcesFromModelDef(
|
|
101
|
+
modelDef: ModelDef,
|
|
102
|
+
givens: unknown,
|
|
103
|
+
onParseError?: (sourceName: string, err: unknown) => void,
|
|
104
|
+
): {
|
|
105
|
+
sources: ExtractedSource[];
|
|
106
|
+
filterMap: Map<string, FilterDefinition[]>;
|
|
107
|
+
authorizeMap: AuthorizeMap;
|
|
108
|
+
} {
|
|
109
|
+
const filterMap = new Map<string, FilterDefinition[]>();
|
|
110
|
+
const authorizeMap: AuthorizeMap = new Map();
|
|
111
|
+
|
|
112
|
+
// File-level ##(authorize) is collected once and prepended to every source.
|
|
113
|
+
// Unlike filters, a malformed authorize annotation is NOT swallowed: the
|
|
114
|
+
// parse error propagates so the model fails to load loudly (caught per-model
|
|
115
|
+
// upstream and turned into a compilationError). Silently dropping a gate —
|
|
116
|
+
// and in the worker path there is no onParseError callback, so it would be
|
|
117
|
+
// truly silent — could leave a source that the author meant to lock looking
|
|
118
|
+
// unrestricted.
|
|
119
|
+
const fileLevelAuthorize = collectAuthorizeExprs(
|
|
120
|
+
(modelAnnotations(modelDef).notes ?? []).map((note) => note.text),
|
|
121
|
+
);
|
|
122
|
+
|
|
123
|
+
const sources: ExtractedSource[] = Object.values(modelDef.contents)
|
|
124
|
+
.filter((obj) => isSourceDef(obj))
|
|
125
|
+
.map((sourceObj) => {
|
|
126
|
+
const struct = sourceObj as StructDef;
|
|
127
|
+
const sourceName = struct.as || struct.name;
|
|
128
|
+
const annotations = annotationTexts(struct.annotations);
|
|
129
|
+
|
|
130
|
+
const collected: string[][] = [];
|
|
131
|
+
let cur = struct.annotations;
|
|
132
|
+
while (cur) {
|
|
133
|
+
if (cur.blockNotes) {
|
|
134
|
+
collected.push(cur.blockNotes.map((note) => note.text));
|
|
135
|
+
}
|
|
136
|
+
cur = cur.inherits;
|
|
137
|
+
}
|
|
138
|
+
const allAnnotations = collected.reverse().flat();
|
|
139
|
+
|
|
140
|
+
let filters: ExtractedFilter[] | undefined;
|
|
141
|
+
if (allAnnotations.length > 0) {
|
|
142
|
+
try {
|
|
143
|
+
const parsed = parseFilters(allAnnotations);
|
|
144
|
+
if (parsed.length > 0) {
|
|
145
|
+
filterMap.set(sourceName, parsed);
|
|
146
|
+
const fields = struct.fields;
|
|
147
|
+
filters = parsed.map((f) => {
|
|
148
|
+
const field = fields.find(
|
|
149
|
+
(fd) => (fd.as || fd.name) === f.dimension,
|
|
150
|
+
);
|
|
151
|
+
return {
|
|
152
|
+
name: f.name,
|
|
153
|
+
dimension: f.dimension,
|
|
154
|
+
type: f.type,
|
|
155
|
+
implicit: f.implicit,
|
|
156
|
+
required: f.required,
|
|
157
|
+
dimensionType: field?.type as string | undefined,
|
|
158
|
+
};
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
} catch (err) {
|
|
162
|
+
onParseError?.(sourceName, err);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Authorize: the source's OWN #(authorize) annotations only — no
|
|
167
|
+
// inherits walk. File-level ##(authorize) is prepended so file gates
|
|
168
|
+
// and source gates form one OR disjunction. A malformed annotation
|
|
169
|
+
// propagates (model fails to load) rather than silently dropping the
|
|
170
|
+
// gate — see the file-level note above.
|
|
171
|
+
const ownNotes = (struct.annotations?.blockNotes ?? []).map(
|
|
172
|
+
(note) => note.text,
|
|
173
|
+
);
|
|
174
|
+
const effective = [
|
|
175
|
+
...fileLevelAuthorize,
|
|
176
|
+
...collectAuthorizeExprs(ownNotes),
|
|
177
|
+
];
|
|
178
|
+
let authorize: string[] | undefined;
|
|
179
|
+
if (effective.length > 0) {
|
|
180
|
+
authorizeMap.set(sourceName, effective);
|
|
181
|
+
authorize = effective;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const views: ExtractedView[] = struct.fields
|
|
185
|
+
.filter((field) => field.type === "turtle")
|
|
186
|
+
.filter((turtle) =>
|
|
187
|
+
// Filter out non-reduce views (e.g. indexes).
|
|
188
|
+
(turtle as TurtleDef).pipeline
|
|
189
|
+
.map((stage) => stage.type)
|
|
190
|
+
.every((type) => type === "reduce"),
|
|
191
|
+
)
|
|
192
|
+
.map((turtle) => ({
|
|
193
|
+
name: turtle.as || turtle.name,
|
|
194
|
+
annotations: annotationTexts(turtle.annotations),
|
|
195
|
+
}));
|
|
196
|
+
|
|
197
|
+
return {
|
|
198
|
+
name: sourceName,
|
|
199
|
+
annotations,
|
|
200
|
+
views,
|
|
201
|
+
filters,
|
|
202
|
+
givens,
|
|
203
|
+
authorize,
|
|
204
|
+
};
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
return { sources, filterMap, authorizeMap };
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/** Extract every named query from a compiled model. */
|
|
211
|
+
export function extractQueriesFromModelDef(
|
|
212
|
+
modelDef: ModelDef,
|
|
213
|
+
): ExtractedQuery[] {
|
|
214
|
+
const isNamedQuery = (obj: NamedModelObject): obj is NamedQueryDef =>
|
|
215
|
+
obj.type === "query";
|
|
216
|
+
return Object.values(modelDef.contents)
|
|
217
|
+
.filter(isNamedQuery)
|
|
218
|
+
.map((queryObj) => ({
|
|
219
|
+
name: queryObj.as || queryObj.name,
|
|
220
|
+
sourceName:
|
|
221
|
+
typeof queryObj.structRef === "string"
|
|
222
|
+
? queryObj.structRef
|
|
223
|
+
: undefined,
|
|
224
|
+
annotations: annotationTexts(queryObj.annotations),
|
|
225
|
+
}));
|
|
226
|
+
}
|