@malloy-publisher/server 0.0.199 → 0.0.200
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/app/api-doc.yaml +76 -111
- package/dist/app/assets/{EnvironmentPage-Dpee_Kn6.js → EnvironmentPage-CgKNjySu.js} +1 -1
- package/dist/app/assets/HomePage-BPIpMBjW.js +1 -0
- package/dist/app/assets/{MainPage-DsVt5QGM.js → MainPage-CAwb8U82.js} +2 -2
- package/dist/app/assets/{ModelPage-AwAugZ37.js → ModelPage-C0Uevsw9.js} +1 -1
- package/dist/app/assets/{PackagePage-XQ-EWGTC.js → PackagePage-Cu-u9k1g.js} +1 -1
- package/dist/app/assets/{RouteError-3Mv8JQw7.js → RouteError-DVwPh2Ql.js} +1 -1
- package/dist/app/assets/{WorkbookPage-DHYYpcYc.js → WorkbookPage-DW38R2Zv.js} +1 -1
- package/dist/app/assets/{core-DfcpQGVP.es-DQggNOdX.js → core-C0vCMRDQ.es-D_ytHhjS.js} +10 -10
- package/dist/app/assets/{index-D1pdwrUW.js → index-BGdcKsFF.js} +1 -1
- package/dist/app/assets/{index-BUp81Qdm.js → index-CTx4v4_3.js} +1 -1
- package/dist/app/assets/index-DE6d5jEy.js +452 -0
- package/dist/app/assets/{index.umd-CQH4LZU8.js → index.umd-C1Mi1uRm.js} +1 -1
- package/dist/app/index.html +1 -1
- package/dist/package_load_worker.mjs +1 -1
- package/dist/server.mjs +1482 -1010
- package/package.json +1 -1
- package/src/config.spec.ts +246 -0
- package/src/config.ts +121 -1
- package/src/constants.ts +84 -1
- package/src/controller/connection.controller.spec.ts +803 -0
- package/src/controller/connection.controller.ts +207 -20
- package/src/controller/model.controller.ts +16 -5
- package/src/controller/query.controller.ts +20 -7
- package/src/controller/watch-mode.controller.ts +11 -2
- package/src/errors.spec.ts +44 -0
- package/src/errors.ts +34 -0
- package/src/heap_check.spec.ts +144 -0
- package/src/heap_check.ts +144 -0
- package/src/mcp/handler_utils.ts +14 -0
- package/src/mcp/tools/execute_query_tool.ts +44 -14
- package/src/oom_guards.integration.spec.ts +261 -0
- package/src/path_safety.ts +9 -3
- package/src/query_cap_metrics.spec.ts +89 -0
- package/src/query_cap_metrics.ts +115 -0
- package/src/query_concurrency.spec.ts +247 -0
- package/src/query_concurrency.ts +236 -0
- package/src/query_timeout.spec.ts +224 -0
- package/src/query_timeout.ts +178 -0
- package/src/server-old.ts +20 -0
- package/src/server.ts +25 -47
- package/src/service/connection.ts +8 -2
- package/src/service/environment.ts +82 -2
- package/src/service/environment_admission.spec.ts +165 -1
- package/src/service/environment_store.spec.ts +103 -0
- package/src/service/environment_store.ts +74 -23
- package/src/service/model.spec.ts +193 -3
- package/src/service/model.ts +80 -12
- package/src/service/model_limits.spec.ts +181 -0
- package/src/service/model_limits.ts +110 -0
- package/src/service/package.spec.ts +2 -6
- package/src/service/package.ts +6 -1
- package/src/service/path_injection.spec.ts +39 -0
- package/src/stream_helpers.spec.ts +280 -0
- package/src/stream_helpers.ts +162 -0
- package/src/test_helpers/metrics_harness.ts +126 -0
- package/dist/app/assets/HomePage-DLRWTNoL.js +0 -1
- package/dist/app/assets/index-Dv5bF4Ii.js +0 -451
package/src/service/package.ts
CHANGED
|
@@ -28,6 +28,7 @@ import {
|
|
|
28
28
|
ServiceUnavailableError,
|
|
29
29
|
} from "../errors";
|
|
30
30
|
import { formatDuration, logger } from "../logger";
|
|
31
|
+
import { assertSafeEnvironmentPath, safeJoinUnderRoot } from "../path_safety";
|
|
31
32
|
import { BuildManifest } from "../storage/DatabaseInterface";
|
|
32
33
|
import { ignoreDotfiles } from "../utils";
|
|
33
34
|
import { Model } from "./model";
|
|
@@ -90,6 +91,7 @@ export class Package {
|
|
|
90
91
|
packagePath: string,
|
|
91
92
|
environmentMalloyConfig: PackageConnectionInput,
|
|
92
93
|
): Promise<Package> {
|
|
94
|
+
assertSafeEnvironmentPath(packagePath);
|
|
93
95
|
const startTime = performance.now();
|
|
94
96
|
await Package.validatePackageManifestExistsOrThrowError(packagePath);
|
|
95
97
|
const manifestValidationTime = performance.now();
|
|
@@ -515,7 +517,10 @@ export class Package {
|
|
|
515
517
|
private static async validatePackageManifestExistsOrThrowError(
|
|
516
518
|
packagePath: string,
|
|
517
519
|
) {
|
|
518
|
-
const packageConfigPath =
|
|
520
|
+
const packageConfigPath = safeJoinUnderRoot(
|
|
521
|
+
packagePath,
|
|
522
|
+
PACKAGE_MANIFEST_NAME,
|
|
523
|
+
);
|
|
519
524
|
try {
|
|
520
525
|
await fs.stat(packageConfigPath);
|
|
521
526
|
} catch {
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { describe, expect, it } from "bun:test";
|
|
2
|
+
import { BadRequestError } from "../errors";
|
|
3
|
+
import { deleteDuckLakeConnectionFile } from "./connection";
|
|
4
|
+
|
|
5
|
+
const TRAVERSAL_NAMES: ReadonlyArray<readonly [string, string]> = [
|
|
6
|
+
["leading traversal", "../etc"],
|
|
7
|
+
["embedded traversal", "foo/../../bar"],
|
|
8
|
+
["slash in name", "foo/bar"],
|
|
9
|
+
["backslash in name", "foo\\bar"],
|
|
10
|
+
["leading dot", ".staging"],
|
|
11
|
+
["bare dot-dot", ".."],
|
|
12
|
+
["bare dot", "."],
|
|
13
|
+
["empty", ""],
|
|
14
|
+
["NUL byte", "foo\0bar"],
|
|
15
|
+
["oversized", "a".repeat(256)],
|
|
16
|
+
["absolute", "/etc/passwd"],
|
|
17
|
+
] as const;
|
|
18
|
+
|
|
19
|
+
describe("deleteDuckLakeConnectionFile path-injection guards", () => {
|
|
20
|
+
it.each(TRAVERSAL_NAMES)(
|
|
21
|
+
"rejects %s as connectionName (%p)",
|
|
22
|
+
async (_label, connectionName) => {
|
|
23
|
+
await expect(
|
|
24
|
+
deleteDuckLakeConnectionFile(connectionName, "/tmp/env"),
|
|
25
|
+
).rejects.toBeInstanceOf(BadRequestError);
|
|
26
|
+
},
|
|
27
|
+
);
|
|
28
|
+
|
|
29
|
+
it.each([
|
|
30
|
+
["relative", "relative/path"],
|
|
31
|
+
["traversal", "/var/lib/../../etc"],
|
|
32
|
+
["NUL byte", "/var/lib/env\0"],
|
|
33
|
+
["bare dot-dot", ".."],
|
|
34
|
+
])("rejects %s as environmentPath (%p)", async (_label, environmentPath) => {
|
|
35
|
+
await expect(
|
|
36
|
+
deleteDuckLakeConnectionFile("conn", environmentPath),
|
|
37
|
+
).rejects.toBeInstanceOf(BadRequestError);
|
|
38
|
+
});
|
|
39
|
+
});
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
QueryRecord,
|
|
3
|
+
RunSQLOptions,
|
|
4
|
+
StreamingConnection,
|
|
5
|
+
} from "@malloydata/malloy";
|
|
6
|
+
import { describe, expect, it } from "bun:test";
|
|
7
|
+
|
|
8
|
+
import { PayloadTooLargeError } from "./errors";
|
|
9
|
+
import { isStreamingConnection, streamSqlWithBudget } from "./stream_helpers";
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Build a fake StreamingConnection backed by a fixed row array. The
|
|
13
|
+
* fake records the abort signal so tests can confirm
|
|
14
|
+
* `streamSqlWithBudget` actually signaled the driver to stop —
|
|
15
|
+
* client-side counting alone is not enough; the whole point of the
|
|
16
|
+
* helper is to terminate fetching early.
|
|
17
|
+
*/
|
|
18
|
+
function fakeStreamingConnection(opts: {
|
|
19
|
+
rows: QueryRecord[];
|
|
20
|
+
/**
|
|
21
|
+
* When true (the default), the fake honors `RunSQLOptions.rowLimit`
|
|
22
|
+
* by slicing — same as real Postgres/DuckDB. Set false to drive
|
|
23
|
+
* the helper's own overflow detection (the (cap+1)-th row check).
|
|
24
|
+
*/
|
|
25
|
+
honorRowLimit?: boolean;
|
|
26
|
+
/**
|
|
27
|
+
* Test hook: set to `true` when the fake observes `signal.aborted`
|
|
28
|
+
* flip via the listener it installed at the start of streaming.
|
|
29
|
+
*/
|
|
30
|
+
abortObserved?: { value: boolean };
|
|
31
|
+
/**
|
|
32
|
+
* Test hook: captures the options the helper passed to
|
|
33
|
+
* `runSQLStream`, so tests can assert it preserved caller-supplied
|
|
34
|
+
* `rowLimit` (and the abortSignal it wired in).
|
|
35
|
+
*/
|
|
36
|
+
capturedOptions?: { value: RunSQLOptions | undefined };
|
|
37
|
+
}): StreamingConnection {
|
|
38
|
+
const { rows, honorRowLimit = true, abortObserved, capturedOptions } = opts;
|
|
39
|
+
return {
|
|
40
|
+
canStream(): true {
|
|
41
|
+
return true;
|
|
42
|
+
},
|
|
43
|
+
async *runSQLStream(
|
|
44
|
+
_sql: string,
|
|
45
|
+
options?: RunSQLOptions,
|
|
46
|
+
): AsyncIterableIterator<QueryRecord> {
|
|
47
|
+
if (capturedOptions) capturedOptions.value = options;
|
|
48
|
+
if (abortObserved) {
|
|
49
|
+
options?.abortSignal?.addEventListener("abort", () => {
|
|
50
|
+
abortObserved.value = true;
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
const limit =
|
|
54
|
+
honorRowLimit && typeof options?.rowLimit === "number"
|
|
55
|
+
? options.rowLimit
|
|
56
|
+
: rows.length;
|
|
57
|
+
for (let i = 0; i < Math.min(rows.length, limit); i += 1) {
|
|
58
|
+
yield rows[i];
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
} as unknown as StreamingConnection;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
describe("isStreamingConnection", () => {
|
|
65
|
+
it("returns true for a connection whose canStream() returns true", () => {
|
|
66
|
+
const conn = fakeStreamingConnection({ rows: [] });
|
|
67
|
+
expect(isStreamingConnection(conn)).toBe(true);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it("returns false for a connection without canStream", () => {
|
|
71
|
+
expect(isStreamingConnection({} as never)).toBe(false);
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
it("returns false for a connection whose canStream() returns false", () => {
|
|
75
|
+
const conn = {
|
|
76
|
+
canStream() {
|
|
77
|
+
return false;
|
|
78
|
+
},
|
|
79
|
+
} as never;
|
|
80
|
+
expect(isStreamingConnection(conn)).toBe(false);
|
|
81
|
+
});
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
describe("streamSqlWithBudget", () => {
|
|
85
|
+
it("returns all rows when both budgets are comfortably above the stream", async () => {
|
|
86
|
+
const rows: QueryRecord[] = [{ a: 1 }, { a: 2 }, { a: 3 }];
|
|
87
|
+
const conn = fakeStreamingConnection({ rows });
|
|
88
|
+
const result = await streamSqlWithBudget(
|
|
89
|
+
conn,
|
|
90
|
+
"SELECT a FROM t",
|
|
91
|
+
{ rowLimit: 10 },
|
|
92
|
+
{ maxRows: 10, maxBytes: 1_000_000 },
|
|
93
|
+
);
|
|
94
|
+
expect(result.rows).toEqual(rows);
|
|
95
|
+
expect(result.totalRows).toBe(3);
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
it("forwards caller-supplied runSQLOptions (rowLimit) to the driver", async () => {
|
|
99
|
+
const captured = { value: undefined as RunSQLOptions | undefined };
|
|
100
|
+
const conn = fakeStreamingConnection({
|
|
101
|
+
rows: [{ a: 1 }, { a: 2 }],
|
|
102
|
+
capturedOptions: captured,
|
|
103
|
+
});
|
|
104
|
+
await streamSqlWithBudget(
|
|
105
|
+
conn,
|
|
106
|
+
"SELECT 1",
|
|
107
|
+
{ rowLimit: 6 },
|
|
108
|
+
{ maxRows: 5, maxBytes: 0 },
|
|
109
|
+
);
|
|
110
|
+
expect(captured.value?.rowLimit).toBe(6);
|
|
111
|
+
// abortSignal must be wired in so the helper can abort the
|
|
112
|
+
// iterator on overflow.
|
|
113
|
+
expect(captured.value?.abortSignal).toBeDefined();
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it("composes the caller-supplied abortSignal with its internal cap-abort signal", async () => {
|
|
117
|
+
// Step 5: the caller's signal is the query timeout. Composing
|
|
118
|
+
// both sources means EITHER an external timeout OR an internal
|
|
119
|
+
// cap overflow terminates the iterator. The combined signal
|
|
120
|
+
// must be a fresh AbortSignal (not either input by reference);
|
|
121
|
+
// aborting either input must mark the composed signal aborted.
|
|
122
|
+
const captured = { value: undefined as RunSQLOptions | undefined };
|
|
123
|
+
const callerAc = new AbortController();
|
|
124
|
+
const conn = fakeStreamingConnection({
|
|
125
|
+
rows: [{ a: 1 }],
|
|
126
|
+
capturedOptions: captured,
|
|
127
|
+
});
|
|
128
|
+
await streamSqlWithBudget(
|
|
129
|
+
conn,
|
|
130
|
+
"SELECT 1",
|
|
131
|
+
{ rowLimit: 10, abortSignal: callerAc.signal },
|
|
132
|
+
{ maxRows: 5, maxBytes: 0 },
|
|
133
|
+
);
|
|
134
|
+
const observed = captured.value?.abortSignal;
|
|
135
|
+
expect(observed).toBeInstanceOf(AbortSignal);
|
|
136
|
+
// Composed signal is a new object (`AbortSignal.any` returns a
|
|
137
|
+
// fresh signal), not the caller's signal by reference.
|
|
138
|
+
expect(observed).not.toBe(callerAc.signal);
|
|
139
|
+
expect(observed?.aborted).toBe(false);
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
it("composed signal aborts when the caller's signal aborts", async () => {
|
|
143
|
+
// Drive the external (caller) signal manually and confirm the
|
|
144
|
+
// composed signal that reached the driver tracks it. This is the
|
|
145
|
+
// half of composition that runWithQueryTimeout depends on for
|
|
146
|
+
// 504 to actually cancel an in-flight query.
|
|
147
|
+
if (typeof AbortSignal.any !== "function") return;
|
|
148
|
+
const captured = { value: undefined as RunSQLOptions | undefined };
|
|
149
|
+
const callerAc = new AbortController();
|
|
150
|
+
const conn = fakeStreamingConnection({
|
|
151
|
+
rows: [{ a: 1 }],
|
|
152
|
+
capturedOptions: captured,
|
|
153
|
+
});
|
|
154
|
+
await streamSqlWithBudget(
|
|
155
|
+
conn,
|
|
156
|
+
"SELECT 1",
|
|
157
|
+
{ rowLimit: 10, abortSignal: callerAc.signal },
|
|
158
|
+
{ maxRows: 5, maxBytes: 0 },
|
|
159
|
+
);
|
|
160
|
+
const observed = captured.value?.abortSignal;
|
|
161
|
+
expect(observed?.aborted).toBe(false);
|
|
162
|
+
callerAc.abort();
|
|
163
|
+
expect(observed?.aborted).toBe(true);
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
it("throws PayloadTooLargeError and aborts the iterator on the (cap+1)-th row", async () => {
|
|
167
|
+
const rows: QueryRecord[] = [
|
|
168
|
+
{ a: 1 },
|
|
169
|
+
{ a: 2 },
|
|
170
|
+
{ a: 3 },
|
|
171
|
+
{ a: 4 },
|
|
172
|
+
{ a: 5 },
|
|
173
|
+
];
|
|
174
|
+
const abortObserved = { value: false };
|
|
175
|
+
const conn = fakeStreamingConnection({
|
|
176
|
+
rows,
|
|
177
|
+
abortObserved,
|
|
178
|
+
honorRowLimit: false,
|
|
179
|
+
});
|
|
180
|
+
await expect(
|
|
181
|
+
streamSqlWithBudget(
|
|
182
|
+
conn,
|
|
183
|
+
"SELECT a FROM t",
|
|
184
|
+
{},
|
|
185
|
+
{ maxRows: 2, maxBytes: 1_000_000 },
|
|
186
|
+
),
|
|
187
|
+
).rejects.toBeInstanceOf(PayloadTooLargeError);
|
|
188
|
+
await expect(
|
|
189
|
+
streamSqlWithBudget(
|
|
190
|
+
conn,
|
|
191
|
+
"SELECT a FROM t",
|
|
192
|
+
{},
|
|
193
|
+
{ maxRows: 2, maxBytes: 1_000_000 },
|
|
194
|
+
),
|
|
195
|
+
).rejects.toThrow("more than 2 rows");
|
|
196
|
+
// The helper must have fired the abort signal so a real driver
|
|
197
|
+
// (pg-query-stream / duckdb) would stop producing rows server-
|
|
198
|
+
// side, not just be discarded client-side.
|
|
199
|
+
expect(abortObserved.value).toBe(true);
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
it("throws PayloadTooLargeError when summed JSON byte size exceeds the cap", async () => {
|
|
203
|
+
const big = "x".repeat(40);
|
|
204
|
+
const rows: QueryRecord[] = [{ s: big }, { s: big }, { s: big }];
|
|
205
|
+
const abortObserved = { value: false };
|
|
206
|
+
const conn = fakeStreamingConnection({
|
|
207
|
+
rows,
|
|
208
|
+
abortObserved,
|
|
209
|
+
honorRowLimit: false,
|
|
210
|
+
});
|
|
211
|
+
await expect(
|
|
212
|
+
streamSqlWithBudget(
|
|
213
|
+
conn,
|
|
214
|
+
"SELECT s FROM t",
|
|
215
|
+
{},
|
|
216
|
+
{ maxRows: 100, maxBytes: 60 },
|
|
217
|
+
),
|
|
218
|
+
).rejects.toThrow("exceeded 60 bytes");
|
|
219
|
+
expect(abortObserved.value).toBe(true);
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
it("returns all rows when the byte cap is disabled (maxBytes = 0)", async () => {
|
|
223
|
+
const big = "x".repeat(10_000);
|
|
224
|
+
const rows: QueryRecord[] = Array.from({ length: 3 }, () => ({ s: big }));
|
|
225
|
+
const conn = fakeStreamingConnection({ rows, honorRowLimit: false });
|
|
226
|
+
const result = await streamSqlWithBudget(
|
|
227
|
+
conn,
|
|
228
|
+
"SELECT s FROM t",
|
|
229
|
+
{},
|
|
230
|
+
{ maxRows: 100, maxBytes: 0 },
|
|
231
|
+
);
|
|
232
|
+
expect(result.rows.length).toBe(3);
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
it("returns all rows when the row cap is disabled (maxRows = 0)", async () => {
|
|
236
|
+
const rows: QueryRecord[] = Array.from({ length: 50 }, (_, i) => ({
|
|
237
|
+
a: i,
|
|
238
|
+
}));
|
|
239
|
+
const conn = fakeStreamingConnection({ rows, honorRowLimit: false });
|
|
240
|
+
const result = await streamSqlWithBudget(
|
|
241
|
+
conn,
|
|
242
|
+
"SELECT a FROM t",
|
|
243
|
+
{},
|
|
244
|
+
{ maxRows: 0, maxBytes: 1_000_000 },
|
|
245
|
+
);
|
|
246
|
+
expect(result.rows.length).toBe(50);
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
it("returns rows when count equals the cap exactly (not an overflow)", async () => {
|
|
250
|
+
const rows: QueryRecord[] = [{ a: 1 }, { a: 2 }, { a: 3 }];
|
|
251
|
+
const conn = fakeStreamingConnection({ rows, honorRowLimit: false });
|
|
252
|
+
const result = await streamSqlWithBudget(
|
|
253
|
+
conn,
|
|
254
|
+
"SELECT a FROM t",
|
|
255
|
+
{},
|
|
256
|
+
{ maxRows: 3, maxBytes: 1_000_000 },
|
|
257
|
+
);
|
|
258
|
+
expect(result.rows.length).toBe(3);
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
it("re-throws non-overflow errors from the driver", async () => {
|
|
262
|
+
const conn = {
|
|
263
|
+
canStream(): true {
|
|
264
|
+
return true;
|
|
265
|
+
},
|
|
266
|
+
// eslint-disable-next-line require-yield
|
|
267
|
+
async *runSQLStream(): AsyncIterableIterator<QueryRecord> {
|
|
268
|
+
throw new Error("connection reset");
|
|
269
|
+
},
|
|
270
|
+
} as unknown as StreamingConnection;
|
|
271
|
+
await expect(
|
|
272
|
+
streamSqlWithBudget(
|
|
273
|
+
conn,
|
|
274
|
+
"SELECT 1",
|
|
275
|
+
{},
|
|
276
|
+
{ maxRows: 10, maxBytes: 1_000 },
|
|
277
|
+
),
|
|
278
|
+
).rejects.toThrow("connection reset");
|
|
279
|
+
});
|
|
280
|
+
});
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Helpers for streaming the ad-hoc connection SQL endpoints
|
|
3
|
+
* (`/environments/.../connections/.../sqlQuery`) so the publisher
|
|
4
|
+
* process never has to hold a whole result set in memory before
|
|
5
|
+
* returning it.
|
|
6
|
+
*
|
|
7
|
+
* The Step 1 row cap (`PUBLISHER_MAX_QUERY_ROWS`) is necessary but
|
|
8
|
+
* not sufficient: row count is a poor proxy for memory pressure
|
|
9
|
+
* because a single 10 MB JSON column blows past the 100k-row cap's
|
|
10
|
+
* safe envelope. The byte cap (`PUBLISHER_MAX_RESPONSE_BYTES`) is
|
|
11
|
+
* the actual memory bound, and bytes can only be enforced by
|
|
12
|
+
* iterating row-at-a-time on `runSQLStream` — `runSQL` returns a
|
|
13
|
+
* fully-buffered result, so by the time we'd count bytes the
|
|
14
|
+
* connector has already done the damage.
|
|
15
|
+
*
|
|
16
|
+
* On streaming-capable connections (Postgres, DuckDB, ...) the
|
|
17
|
+
* controller routes here. On other connections it stays on the
|
|
18
|
+
* Step 1 path; client-side byte counting after the fact would be
|
|
19
|
+
* security theatre.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import type {
|
|
23
|
+
Connection,
|
|
24
|
+
MalloyQueryData,
|
|
25
|
+
QueryRecord,
|
|
26
|
+
RunSQLOptions,
|
|
27
|
+
StreamingConnection,
|
|
28
|
+
} from "@malloydata/malloy";
|
|
29
|
+
|
|
30
|
+
import { PayloadTooLargeError } from "./errors";
|
|
31
|
+
import { recordQueryCapExceeded } from "./query_cap_metrics";
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Runtime check + type narrow for streaming-capable connections.
|
|
35
|
+
* `Connection.canStream` is declared as `this is StreamingConnection`
|
|
36
|
+
* by the Malloy SDK, so a positive result is enough to safely call
|
|
37
|
+
* `runSQLStream`.
|
|
38
|
+
*/
|
|
39
|
+
export function isStreamingConnection(
|
|
40
|
+
connection: Connection,
|
|
41
|
+
): connection is StreamingConnection {
|
|
42
|
+
return (
|
|
43
|
+
typeof (connection as { canStream?: () => boolean }).canStream ===
|
|
44
|
+
"function" && (connection as StreamingConnection).canStream()
|
|
45
|
+
);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export interface StreamBudget {
|
|
49
|
+
/**
|
|
50
|
+
* Maximum number of rows to return. A value of `0` disables the
|
|
51
|
+
* row cap (the caller-supplied `rowLimit` in `runSQLOptions` may
|
|
52
|
+
* still bound the stream, but the helper will not raise on
|
|
53
|
+
* overflow).
|
|
54
|
+
*/
|
|
55
|
+
maxRows: number;
|
|
56
|
+
/**
|
|
57
|
+
* Maximum aggregate JSON-serialized byte size of returned rows.
|
|
58
|
+
* `0` disables the byte cap. Measured as the sum of
|
|
59
|
+
* `Buffer.byteLength(JSON.stringify(row))` for each yielded row
|
|
60
|
+
* — the same bytes the eventual response will contain inside
|
|
61
|
+
* `result.rows`.
|
|
62
|
+
*/
|
|
63
|
+
maxBytes: number;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Drain `runSQLStream` into a `MalloyQueryData`-shaped buffer,
|
|
68
|
+
* enforcing both a row cap and a byte cap. Aborts the underlying
|
|
69
|
+
* iterator via an internal `AbortController` the moment either cap
|
|
70
|
+
* is breached so the driver stops producing rows immediately
|
|
71
|
+
* (Postgres' `pg-query-stream` and DuckDB's streaming iterator both
|
|
72
|
+
* honor `abortSignal`).
|
|
73
|
+
*
|
|
74
|
+
* The row cap is detected by the `cap + 1` sentinel pattern: the
|
|
75
|
+
* controller has already clamped `runSQLOptions.rowLimit` to
|
|
76
|
+
* `min(callerLimit, cap + 1)`, so receiving `cap + 1` rows
|
|
77
|
+
* unambiguously means the request would have overflowed. This
|
|
78
|
+
* matches the non-streaming path's overflow detection so behavior
|
|
79
|
+
* is identical regardless of which connector served the request.
|
|
80
|
+
*
|
|
81
|
+
* On overflow the helper throws `PayloadTooLargeError` directly —
|
|
82
|
+
* the message includes the relevant env-var name so the operator
|
|
83
|
+
* sees a self-contained tuning hint without having to cross-
|
|
84
|
+
* reference the controller.
|
|
85
|
+
*/
|
|
86
|
+
export async function streamSqlWithBudget(
|
|
87
|
+
connection: StreamingConnection,
|
|
88
|
+
sql: string,
|
|
89
|
+
runSQLOptions: RunSQLOptions,
|
|
90
|
+
budget: StreamBudget,
|
|
91
|
+
): Promise<MalloyQueryData> {
|
|
92
|
+
const { maxRows, maxBytes } = budget;
|
|
93
|
+
const capAc = new AbortController();
|
|
94
|
+
const rows: QueryRecord[] = [];
|
|
95
|
+
let byteTotal = 0;
|
|
96
|
+
let overflowMessage: string | undefined;
|
|
97
|
+
|
|
98
|
+
// Compose two abort sources so a caller-supplied signal (the
|
|
99
|
+
// publisher's query timeout) and the internal cap-abort signal
|
|
100
|
+
// *both* cancel the underlying iterator:
|
|
101
|
+
//
|
|
102
|
+
// - If the caller's signal fires first, the controller's
|
|
103
|
+
// `runWithQueryTimeout` will throw `QueryTimeoutError` → 504.
|
|
104
|
+
// - If the cap-abort fires first, we throw
|
|
105
|
+
// `PayloadTooLargeError` → 413.
|
|
106
|
+
//
|
|
107
|
+
// Without composition the streaming branch would silently drop
|
|
108
|
+
// the caller's signal — historically the only way to abort here
|
|
109
|
+
// was the cap, so the legacy controller cleared
|
|
110
|
+
// `runSQLOptions.abortSignal`. Step 5 reverses that: the caller's
|
|
111
|
+
// signal is now authoritative.
|
|
112
|
+
//
|
|
113
|
+
// `AbortSignal.any` is widely available (Node 20+); guard with a
|
|
114
|
+
// typeof check so a stale runtime falls back to the legacy
|
|
115
|
+
// cap-only behavior instead of crashing at module load.
|
|
116
|
+
const externalSignal = runSQLOptions.abortSignal;
|
|
117
|
+
const composedSignal: AbortSignal =
|
|
118
|
+
externalSignal && typeof AbortSignal.any === "function"
|
|
119
|
+
? AbortSignal.any([externalSignal, capAc.signal])
|
|
120
|
+
: capAc.signal;
|
|
121
|
+
|
|
122
|
+
try {
|
|
123
|
+
for await (const row of connection.runSQLStream(sql, {
|
|
124
|
+
...runSQLOptions,
|
|
125
|
+
abortSignal: composedSignal,
|
|
126
|
+
})) {
|
|
127
|
+
rows.push(row);
|
|
128
|
+
if (maxBytes > 0) {
|
|
129
|
+
// Measure exactly what the eventual response body will
|
|
130
|
+
// contain for this row. O(rowSize) per row, duplicated
|
|
131
|
+
// against the final `JSON.stringify(result)` — the
|
|
132
|
+
// early-abort win on overflow dwarfs the bookkeeping
|
|
133
|
+
// cost in the bounded-success case.
|
|
134
|
+
byteTotal += Buffer.byteLength(JSON.stringify(row), "utf8");
|
|
135
|
+
if (byteTotal > maxBytes) {
|
|
136
|
+
recordQueryCapExceeded("bytes", "connection_sql");
|
|
137
|
+
overflowMessage = `Query response exceeded ${maxBytes} bytes (had at least ${byteTotal}). Refine the query (project fewer columns, add a LIMIT, or filter wide values) or raise PUBLISHER_MAX_RESPONSE_BYTES.`;
|
|
138
|
+
capAc.abort();
|
|
139
|
+
break;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
if (maxRows > 0 && rows.length > maxRows) {
|
|
143
|
+
recordQueryCapExceeded("rows", "connection_sql");
|
|
144
|
+
overflowMessage = `Query returned more than ${maxRows} rows. Refine the query (add a LIMIT or more selective WHERE) or raise PUBLISHER_MAX_QUERY_ROWS.`;
|
|
145
|
+
capAc.abort();
|
|
146
|
+
break;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
} catch (err) {
|
|
150
|
+
// `pg-query-stream` surfaces `query.destroy()` (which our
|
|
151
|
+
// abort handler triggers) as a synthetic error in some
|
|
152
|
+
// versions. Swallow it iff we triggered the abort ourselves —
|
|
153
|
+
// otherwise it's a real connection error (or the caller's
|
|
154
|
+
// timeout, which the controller's runWithQueryTimeout will
|
|
155
|
+
// surface as a 504) that the controller must see.
|
|
156
|
+
if (!overflowMessage) throw err;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
if (overflowMessage) throw new PayloadTooLargeError(overflowMessage);
|
|
160
|
+
|
|
161
|
+
return { rows, totalRows: rows.length };
|
|
162
|
+
}
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test helper: spin up an in-memory OpenTelemetry MeterProvider so
|
|
3
|
+
* unit tests can assert that the new guardrails (admission gate,
|
|
4
|
+
* query timeout, query concurrency, heap check) emit the expected
|
|
5
|
+
* counters and gauges.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
*
|
|
9
|
+
* const harness = await startMetricsHarness();
|
|
10
|
+
* // ... exercise production code ...
|
|
11
|
+
* const sums = await harness.collectCounter("publisher_query_timeout_total");
|
|
12
|
+
* expect(sums).toBe(1);
|
|
13
|
+
* await harness.shutdown();
|
|
14
|
+
*
|
|
15
|
+
* The OTel JS API resolves `metrics.getMeter(name)` lazily through a
|
|
16
|
+
* `ProxyMeter`, so registering the MeterProvider here AFTER the
|
|
17
|
+
* production modules have already cached their meter handles works
|
|
18
|
+
* — subsequent `.add()` / observable callbacks route to this
|
|
19
|
+
* harness's provider.
|
|
20
|
+
*
|
|
21
|
+
* The harness drains all of the provider's metrics via
|
|
22
|
+
* `MetricReader.collect()` and reads the cumulative data points
|
|
23
|
+
* directly, bypassing the exporter pipeline so test code doesn't
|
|
24
|
+
* have to thread async callbacks.
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import { metrics } from "@opentelemetry/api";
|
|
28
|
+
import { MeterProvider, MetricReader } from "@opentelemetry/sdk-metrics";
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* The simplest possible MetricReader implementation: a no-op that
|
|
32
|
+
* exists only so the `MeterProvider` has a reader to satisfy its
|
|
33
|
+
* collection invariants. Tests call `collect()` directly via the
|
|
34
|
+
* harness; the reader never pushes anywhere.
|
|
35
|
+
*/
|
|
36
|
+
class CollectingMetricReader extends MetricReader {
|
|
37
|
+
protected override async onForceFlush(): Promise<void> {
|
|
38
|
+
// no-op; tests pull via `collect()` on demand
|
|
39
|
+
}
|
|
40
|
+
protected override async onShutdown(): Promise<void> {
|
|
41
|
+
// no-op
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export interface MetricsHarness {
|
|
46
|
+
readonly provider: MeterProvider;
|
|
47
|
+
/**
|
|
48
|
+
* Force a collection cycle and return the cumulative sum of all
|
|
49
|
+
* data points for the named counter. Returns 0 when the counter
|
|
50
|
+
* has not been emitted yet.
|
|
51
|
+
*
|
|
52
|
+
* `attributeFilter` lets a test scope the sum to a single label
|
|
53
|
+
* set (e.g. only the `environment: "test-env"` data point).
|
|
54
|
+
*/
|
|
55
|
+
collectCounter(
|
|
56
|
+
name: string,
|
|
57
|
+
attributeFilter?: Record<string, string | number | boolean>,
|
|
58
|
+
): Promise<number>;
|
|
59
|
+
/**
|
|
60
|
+
* Force a collection cycle and return the most-recently observed
|
|
61
|
+
* value of the named gauge, or `undefined` if no callback has
|
|
62
|
+
* fired for this gauge yet.
|
|
63
|
+
*/
|
|
64
|
+
collectGauge(name: string): Promise<number | undefined>;
|
|
65
|
+
shutdown(): Promise<void>;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export async function startMetricsHarness(): Promise<MetricsHarness> {
|
|
69
|
+
const reader = new CollectingMetricReader();
|
|
70
|
+
const provider = new MeterProvider({ readers: [reader] });
|
|
71
|
+
// OTel JS's `setGlobalMeterProvider` silently refuses to
|
|
72
|
+
// overwrite an existing global (`registerGlobal` returns false).
|
|
73
|
+
// Tests that run back-to-back would otherwise route their
|
|
74
|
+
// metrics into a dead provider from the previous test. Disable
|
|
75
|
+
// first to clear the slot.
|
|
76
|
+
metrics.disable();
|
|
77
|
+
metrics.setGlobalMeterProvider(provider);
|
|
78
|
+
|
|
79
|
+
return {
|
|
80
|
+
provider,
|
|
81
|
+
async collectCounter(
|
|
82
|
+
name: string,
|
|
83
|
+
attributeFilter?: Record<string, string | number | boolean>,
|
|
84
|
+
): Promise<number> {
|
|
85
|
+
const result = await reader.collect();
|
|
86
|
+
let total = 0;
|
|
87
|
+
for (const rm of result.resourceMetrics.scopeMetrics) {
|
|
88
|
+
for (const metric of rm.metrics) {
|
|
89
|
+
if (metric.descriptor.name !== name) continue;
|
|
90
|
+
for (const dp of metric.dataPoints) {
|
|
91
|
+
if (attributeFilter) {
|
|
92
|
+
const allMatch = Object.entries(attributeFilter).every(
|
|
93
|
+
([k, v]) => dp.attributes?.[k] === v,
|
|
94
|
+
);
|
|
95
|
+
if (!allMatch) continue;
|
|
96
|
+
}
|
|
97
|
+
total += dp.value as number;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return total;
|
|
102
|
+
},
|
|
103
|
+
async collectGauge(name: string): Promise<number | undefined> {
|
|
104
|
+
const result = await reader.collect();
|
|
105
|
+
for (const rm of result.resourceMetrics.scopeMetrics) {
|
|
106
|
+
for (const metric of rm.metrics) {
|
|
107
|
+
if (metric.descriptor.name !== name) continue;
|
|
108
|
+
// Observable gauges yield one data point per
|
|
109
|
+
// attribute set per collection. Return the last
|
|
110
|
+
// observed value across all data points so unlabeled
|
|
111
|
+
// gauges (the common case in this code base) just
|
|
112
|
+
// work without attribute plumbing.
|
|
113
|
+
const last = metric.dataPoints[metric.dataPoints.length - 1];
|
|
114
|
+
return last?.value as number | undefined;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
return undefined;
|
|
118
|
+
},
|
|
119
|
+
async shutdown(): Promise<void> {
|
|
120
|
+
await reader.shutdown();
|
|
121
|
+
// Clear the global so the next harness's
|
|
122
|
+
// `setGlobalMeterProvider` is accepted.
|
|
123
|
+
metrics.disable();
|
|
124
|
+
},
|
|
125
|
+
};
|
|
126
|
+
}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
import{S as t,j as o,L as a}from"./index-Dv5bF4Ii.js";function s(){const n=t();return o.jsx(a,{onClickEnvironment:n})}export{s as default};
|