@malloy-publisher/server 0.0.198 → 0.0.200

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/build.ts +30 -1
  2. package/dist/app/api-doc.yaml +127 -111
  3. package/dist/app/assets/{EnvironmentPage-C7rtH4mC.js → EnvironmentPage-CgKNjySu.js} +1 -1
  4. package/dist/app/assets/HomePage-BPIpMBjW.js +1 -0
  5. package/dist/app/assets/{MainPage-D38LtZDV.js → MainPage-CAwb8U82.js} +2 -2
  6. package/dist/app/assets/{ModelPage-DOol8Mz7.js → ModelPage-C0Uevsw9.js} +1 -1
  7. package/dist/app/assets/{PackagePage-0tgzA_kO.js → PackagePage-Cu-u9k1g.js} +1 -1
  8. package/dist/app/assets/{RouteError-BaMsOSly.js → RouteError-DVwPh2Ql.js} +1 -1
  9. package/dist/app/assets/{WorkbookPage-Cx4SePkx.js → WorkbookPage-DW38R2Zv.js} +1 -1
  10. package/dist/app/assets/{core-CbsC6R_Y.es-Cwf6asf3.js → core-C0vCMRDQ.es-D_ytHhjS.js} +10 -10
  11. package/dist/app/assets/{index-DL6BZTuw.js → index-BGdcKsFF.js} +1 -1
  12. package/dist/app/assets/{index-DNofXMxi.js → index-CTx4v4_3.js} +1 -1
  13. package/dist/app/assets/index-DE6d5jEy.js +452 -0
  14. package/dist/app/assets/{index.umd-B68wGGkM.js → index.umd-C1Mi1uRm.js} +1 -1
  15. package/dist/app/index.html +1 -1
  16. package/dist/instrumentation.mjs +57 -36
  17. package/dist/package_load_worker.mjs +12213 -0
  18. package/dist/server.mjs +4198 -3648
  19. package/package.json +2 -3
  20. package/src/config.spec.ts +246 -0
  21. package/src/config.ts +121 -1
  22. package/src/constants.ts +84 -1
  23. package/src/controller/compile.controller.ts +3 -1
  24. package/src/controller/connection.controller.spec.ts +803 -0
  25. package/src/controller/connection.controller.ts +207 -20
  26. package/src/controller/model.controller.ts +19 -1
  27. package/src/controller/query.controller.ts +22 -6
  28. package/src/controller/watch-mode.controller.ts +11 -2
  29. package/src/errors.spec.ts +44 -0
  30. package/src/errors.ts +34 -0
  31. package/src/health.spec.ts +90 -0
  32. package/src/health.ts +88 -45
  33. package/src/heap_check.spec.ts +144 -0
  34. package/src/heap_check.ts +144 -0
  35. package/src/instrumentation.ts +50 -0
  36. package/src/mcp/handler_utils.ts +14 -0
  37. package/src/mcp/tools/execute_query_tool.ts +52 -10
  38. package/src/oom_guards.integration.spec.ts +261 -0
  39. package/src/package_load/package_load_pool.spec.ts +252 -0
  40. package/src/package_load/package_load_pool.ts +920 -0
  41. package/src/package_load/package_load_worker.ts +980 -0
  42. package/src/package_load/protocol.ts +336 -0
  43. package/src/path_safety.ts +9 -3
  44. package/src/query_cap_metrics.spec.ts +89 -0
  45. package/src/query_cap_metrics.ts +115 -0
  46. package/src/query_concurrency.spec.ts +247 -0
  47. package/src/query_concurrency.ts +236 -0
  48. package/src/query_param_utils.ts +18 -0
  49. package/src/query_timeout.spec.ts +224 -0
  50. package/src/query_timeout.ts +178 -0
  51. package/src/server-old.ts +21 -1
  52. package/src/server.ts +61 -57
  53. package/src/service/connection.ts +8 -2
  54. package/src/service/db_utils.spec.ts +1 -1
  55. package/src/service/environment.ts +85 -4
  56. package/src/service/environment_admission.spec.ts +165 -1
  57. package/src/service/environment_store.spec.ts +103 -0
  58. package/src/service/environment_store.ts +98 -26
  59. package/src/service/filter_integration.spec.ts +110 -0
  60. package/src/service/given.ts +80 -0
  61. package/src/service/givens_integration.spec.ts +192 -0
  62. package/src/service/model.spec.ts +298 -3
  63. package/src/service/model.ts +362 -23
  64. package/src/service/model_limits.spec.ts +181 -0
  65. package/src/service/model_limits.ts +110 -0
  66. package/src/service/package.spec.ts +12 -6
  67. package/src/service/package.ts +263 -146
  68. package/src/service/package_worker_path.spec.ts +196 -0
  69. package/src/service/path_injection.spec.ts +39 -0
  70. package/src/stream_helpers.spec.ts +280 -0
  71. package/src/stream_helpers.ts +162 -0
  72. package/src/test_helpers/metrics_harness.ts +126 -0
  73. package/tests/integration/concurrent_package/concurrent_package.integration.spec.ts +280 -0
  74. package/dist/app/assets/HomePage-DwkH7OrS.js +0 -1
  75. package/dist/app/assets/index-U38AyjJL.js +0 -451
@@ -0,0 +1,196 @@
1
+ /**
2
+ * Integration test: exercise `Package.create` with the package-load
3
+ * worker pool enabled (PACKAGE_LOAD_WORKERS=1).
4
+ *
5
+ * Validates that the worker-load path:
6
+ * - reads the manifest, probes embedded databases, and compiles
7
+ * every model in a single off-thread job
8
+ * - produces a live `Package` whose `Model`s have populated
9
+ * `modelDef` / `sources` / `queries`
10
+ * - hydrates the `ModelMaterializer` from `modelDef` on first
11
+ * query (no recompile) — verified end-to-end by running a
12
+ * query through the resulting Model and getting a result
13
+ *
14
+ * Kept separate from `package.spec.ts` so the existing tests keep
15
+ * running on the in-process path without paying worker startup cost.
16
+ *
17
+ * Pool reuse strategy: one `PackageLoadPool` shared across all
18
+ * cases in this file. Spawning a fresh worker per test crashes Bun
19
+ * (segfault) because DuckDB's native bindings don't tolerate being
20
+ * loaded concurrently into multiple worker isolates of the same Bun
21
+ * process. Production uses one pool; this matches.
22
+ */
23
+ import {
24
+ afterAll,
25
+ afterEach,
26
+ beforeAll,
27
+ beforeEach,
28
+ describe,
29
+ expect,
30
+ it,
31
+ } from "bun:test";
32
+ import * as fs from "fs";
33
+ import * as os from "os";
34
+ import * as path from "path";
35
+ import {
36
+ PackageLoadPool,
37
+ __setPackageLoadPoolForTests,
38
+ } from "../package_load/package_load_pool";
39
+ import { Package } from "./package";
40
+
41
+ const ORIGINAL_ENV = process.env.PACKAGE_LOAD_WORKERS;
42
+
43
+ describe("Package.create via worker pool", () => {
44
+ let tempDir: string;
45
+ let pool: PackageLoadPool;
46
+
47
+ beforeAll(async () => {
48
+ process.env.PACKAGE_LOAD_WORKERS = "1";
49
+ pool = new PackageLoadPool(1);
50
+ // Wire our pool into the module-level singleton so Package.create
51
+ // picks it up via getPackageLoadPool().
52
+ await __setPackageLoadPoolForTests(pool);
53
+ });
54
+
55
+ afterAll(async () => {
56
+ await __setPackageLoadPoolForTests(null);
57
+ if (ORIGINAL_ENV === undefined) {
58
+ delete process.env.PACKAGE_LOAD_WORKERS;
59
+ } else {
60
+ process.env.PACKAGE_LOAD_WORKERS = ORIGINAL_ENV;
61
+ }
62
+ });
63
+
64
+ beforeEach(() => {
65
+ tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "publisher-pkg-worker-"));
66
+ });
67
+
68
+ afterEach(() => {
69
+ if (tempDir) {
70
+ // tempDir gets wiped by Package.create on failure (it's the
71
+ // staging-cleanup path); ignore ENOENT here.
72
+ try {
73
+ fs.rmSync(tempDir, { recursive: true, force: true });
74
+ } catch {
75
+ /* already gone */
76
+ }
77
+ tempDir = "";
78
+ }
79
+ });
80
+
81
+ async function makeMalloyConfig(): Promise<{
82
+ malloyConfig: import("@malloydata/malloy").MalloyConfig;
83
+ duckdb: { close: () => Promise<void> };
84
+ }> {
85
+ const { MalloyConfig, FixedConnectionMap } = await import(
86
+ "@malloydata/malloy"
87
+ );
88
+ const { DuckDBConnection } = await import("@malloydata/db-duckdb");
89
+ const duckdb = new DuckDBConnection("duckdb", ":memory:");
90
+ const connections = new FixedConnectionMap(
91
+ new Map([["duckdb", duckdb]]),
92
+ "duckdb",
93
+ );
94
+ const malloyConfig = new MalloyConfig({ connections: {} });
95
+ malloyConfig.wrapConnections(() => connections);
96
+ return { malloyConfig, duckdb };
97
+ }
98
+
99
+ function writeManifest(): void {
100
+ fs.writeFileSync(
101
+ path.join(tempDir, "publisher.json"),
102
+ JSON.stringify({ name: "pkg", description: "test package" }),
103
+ );
104
+ }
105
+
106
+ it("loads a package end-to-end and serves a query through the hydrated materializer", async () => {
107
+ writeManifest();
108
+ // Define `total_v` as a *view* on the source so the query
109
+ // builder's `run: nums -> total_v` form resolves. (Top-level
110
+ // queries take the `run: total_q` form — orthogonal path that
111
+ // the in-process tests already cover.)
112
+ fs.writeFileSync(
113
+ path.join(tempDir, "trivial.malloy"),
114
+ `source: nums is duckdb.sql("select 1 as a, 2 as b") extend {
115
+ measure: total is a.sum()
116
+ view: total_v is { aggregate: total }
117
+ }`,
118
+ );
119
+
120
+ const { malloyConfig, duckdb } = await makeMalloyConfig();
121
+ try {
122
+ const pkg = await Package.create("env", "pkg", tempDir, malloyConfig);
123
+ expect(pkg).toBeInstanceOf(Package);
124
+ expect(pkg.getModelPaths()).toEqual(["trivial.malloy"]);
125
+
126
+ const model = pkg.getModel("trivial.malloy");
127
+ expect(model).toBeDefined();
128
+ const apiModel = (await model!.getModel()) as {
129
+ modelDef?: string;
130
+ sources?: { name?: string }[];
131
+ };
132
+ expect(apiModel.modelDef).toBeDefined();
133
+ expect(apiModel.sources?.[0]?.name).toBe("nums");
134
+
135
+ // First query against the package — hydrates the
136
+ // ModelMaterializer from the worker's modelDef without a
137
+ // recompile, then runs the SQL against the *main thread's*
138
+ // DuckDB connection (the only one with the in-memory `nums`
139
+ // source loaded via duckdb.sql()).
140
+ const { result } = await model!.getQueryResults(
141
+ "nums",
142
+ "total_v",
143
+ undefined,
144
+ );
145
+ expect(result.data).toBeDefined();
146
+ } finally {
147
+ await duckdb.close();
148
+ }
149
+ });
150
+
151
+ it("propagates a per-model compile failure as a thrown error from Package.create", async () => {
152
+ writeManifest();
153
+ fs.writeFileSync(
154
+ path.join(tempDir, "broken.malloy"),
155
+ `source: bad is duckdb.sql("select 1 as a") extend {
156
+ measure: oops is THIS_FUNC_DOES_NOT_EXIST(a)
157
+ }`,
158
+ );
159
+
160
+ const { malloyConfig, duckdb } = await makeMalloyConfig();
161
+ try {
162
+ await expect(
163
+ Package.create("env", "pkg", tempDir, malloyConfig),
164
+ ).rejects.toBeInstanceOf(Error);
165
+ } finally {
166
+ await duckdb.close();
167
+ }
168
+ });
169
+
170
+ // NB: kept last in this describe — swapping the singleton for a
171
+ // pre-shutdown pool also tears down the shared `pool` (the swap
172
+ // implementation shuts down the outgoing singleton). Subsequent
173
+ // tests in this describe would see a dead pool. afterAll only
174
+ // resets the singleton to null, so this is safe at the tail.
175
+ it("rewraps pool-infrastructure failures as ServiceUnavailableError (HTTP 503)", async () => {
176
+ writeManifest();
177
+ fs.writeFileSync(
178
+ path.join(tempDir, "trivial.malloy"),
179
+ `source: nums is duckdb.sql("select 1 as a")`,
180
+ );
181
+
182
+ const deadPool = new PackageLoadPool(1);
183
+ await deadPool.shutdown();
184
+ await __setPackageLoadPoolForTests(deadPool);
185
+
186
+ const { ServiceUnavailableError } = await import("../errors");
187
+ const { malloyConfig, duckdb } = await makeMalloyConfig();
188
+ try {
189
+ await expect(
190
+ Package.create("env", "pkg", tempDir, malloyConfig),
191
+ ).rejects.toBeInstanceOf(ServiceUnavailableError);
192
+ } finally {
193
+ await duckdb.close();
194
+ }
195
+ });
196
+ });
@@ -0,0 +1,39 @@
1
+ import { describe, expect, it } from "bun:test";
2
+ import { BadRequestError } from "../errors";
3
+ import { deleteDuckLakeConnectionFile } from "./connection";
4
+
5
+ const TRAVERSAL_NAMES: ReadonlyArray<readonly [string, string]> = [
6
+ ["leading traversal", "../etc"],
7
+ ["embedded traversal", "foo/../../bar"],
8
+ ["slash in name", "foo/bar"],
9
+ ["backslash in name", "foo\\bar"],
10
+ ["leading dot", ".staging"],
11
+ ["bare dot-dot", ".."],
12
+ ["bare dot", "."],
13
+ ["empty", ""],
14
+ ["NUL byte", "foo\0bar"],
15
+ ["oversized", "a".repeat(256)],
16
+ ["absolute", "/etc/passwd"],
17
+ ] as const;
18
+
19
+ describe("deleteDuckLakeConnectionFile path-injection guards", () => {
20
+ it.each(TRAVERSAL_NAMES)(
21
+ "rejects %s as connectionName (%p)",
22
+ async (_label, connectionName) => {
23
+ await expect(
24
+ deleteDuckLakeConnectionFile(connectionName, "/tmp/env"),
25
+ ).rejects.toBeInstanceOf(BadRequestError);
26
+ },
27
+ );
28
+
29
+ it.each([
30
+ ["relative", "relative/path"],
31
+ ["traversal", "/var/lib/../../etc"],
32
+ ["NUL byte", "/var/lib/env\0"],
33
+ ["bare dot-dot", ".."],
34
+ ])("rejects %s as environmentPath (%p)", async (_label, environmentPath) => {
35
+ await expect(
36
+ deleteDuckLakeConnectionFile("conn", environmentPath),
37
+ ).rejects.toBeInstanceOf(BadRequestError);
38
+ });
39
+ });
@@ -0,0 +1,280 @@
1
+ import type {
2
+ QueryRecord,
3
+ RunSQLOptions,
4
+ StreamingConnection,
5
+ } from "@malloydata/malloy";
6
+ import { describe, expect, it } from "bun:test";
7
+
8
+ import { PayloadTooLargeError } from "./errors";
9
+ import { isStreamingConnection, streamSqlWithBudget } from "./stream_helpers";
10
+
11
+ /**
12
+ * Build a fake StreamingConnection backed by a fixed row array. The
13
+ * fake records the abort signal so tests can confirm
14
+ * `streamSqlWithBudget` actually signaled the driver to stop —
15
+ * client-side counting alone is not enough; the whole point of the
16
+ * helper is to terminate fetching early.
17
+ */
18
+ function fakeStreamingConnection(opts: {
19
+ rows: QueryRecord[];
20
+ /**
21
+ * When true (the default), the fake honors `RunSQLOptions.rowLimit`
22
+ * by slicing — same as real Postgres/DuckDB. Set false to drive
23
+ * the helper's own overflow detection (the (cap+1)-th row check).
24
+ */
25
+ honorRowLimit?: boolean;
26
+ /**
27
+ * Test hook: set to `true` when the fake observes `signal.aborted`
28
+ * flip via the listener it installed at the start of streaming.
29
+ */
30
+ abortObserved?: { value: boolean };
31
+ /**
32
+ * Test hook: captures the options the helper passed to
33
+ * `runSQLStream`, so tests can assert it preserved caller-supplied
34
+ * `rowLimit` (and the abortSignal it wired in).
35
+ */
36
+ capturedOptions?: { value: RunSQLOptions | undefined };
37
+ }): StreamingConnection {
38
+ const { rows, honorRowLimit = true, abortObserved, capturedOptions } = opts;
39
+ return {
40
+ canStream(): true {
41
+ return true;
42
+ },
43
+ async *runSQLStream(
44
+ _sql: string,
45
+ options?: RunSQLOptions,
46
+ ): AsyncIterableIterator<QueryRecord> {
47
+ if (capturedOptions) capturedOptions.value = options;
48
+ if (abortObserved) {
49
+ options?.abortSignal?.addEventListener("abort", () => {
50
+ abortObserved.value = true;
51
+ });
52
+ }
53
+ const limit =
54
+ honorRowLimit && typeof options?.rowLimit === "number"
55
+ ? options.rowLimit
56
+ : rows.length;
57
+ for (let i = 0; i < Math.min(rows.length, limit); i += 1) {
58
+ yield rows[i];
59
+ }
60
+ },
61
+ } as unknown as StreamingConnection;
62
+ }
63
+
64
+ describe("isStreamingConnection", () => {
65
+ it("returns true for a connection whose canStream() returns true", () => {
66
+ const conn = fakeStreamingConnection({ rows: [] });
67
+ expect(isStreamingConnection(conn)).toBe(true);
68
+ });
69
+
70
+ it("returns false for a connection without canStream", () => {
71
+ expect(isStreamingConnection({} as never)).toBe(false);
72
+ });
73
+
74
+ it("returns false for a connection whose canStream() returns false", () => {
75
+ const conn = {
76
+ canStream() {
77
+ return false;
78
+ },
79
+ } as never;
80
+ expect(isStreamingConnection(conn)).toBe(false);
81
+ });
82
+ });
83
+
84
+ describe("streamSqlWithBudget", () => {
85
+ it("returns all rows when both budgets are comfortably above the stream", async () => {
86
+ const rows: QueryRecord[] = [{ a: 1 }, { a: 2 }, { a: 3 }];
87
+ const conn = fakeStreamingConnection({ rows });
88
+ const result = await streamSqlWithBudget(
89
+ conn,
90
+ "SELECT a FROM t",
91
+ { rowLimit: 10 },
92
+ { maxRows: 10, maxBytes: 1_000_000 },
93
+ );
94
+ expect(result.rows).toEqual(rows);
95
+ expect(result.totalRows).toBe(3);
96
+ });
97
+
98
+ it("forwards caller-supplied runSQLOptions (rowLimit) to the driver", async () => {
99
+ const captured = { value: undefined as RunSQLOptions | undefined };
100
+ const conn = fakeStreamingConnection({
101
+ rows: [{ a: 1 }, { a: 2 }],
102
+ capturedOptions: captured,
103
+ });
104
+ await streamSqlWithBudget(
105
+ conn,
106
+ "SELECT 1",
107
+ { rowLimit: 6 },
108
+ { maxRows: 5, maxBytes: 0 },
109
+ );
110
+ expect(captured.value?.rowLimit).toBe(6);
111
+ // abortSignal must be wired in so the helper can abort the
112
+ // iterator on overflow.
113
+ expect(captured.value?.abortSignal).toBeDefined();
114
+ });
115
+
116
+ it("composes the caller-supplied abortSignal with its internal cap-abort signal", async () => {
117
+ // Step 5: the caller's signal is the query timeout. Composing
118
+ // both sources means EITHER an external timeout OR an internal
119
+ // cap overflow terminates the iterator. The combined signal
120
+ // must be a fresh AbortSignal (not either input by reference);
121
+ // aborting either input must mark the composed signal aborted.
122
+ const captured = { value: undefined as RunSQLOptions | undefined };
123
+ const callerAc = new AbortController();
124
+ const conn = fakeStreamingConnection({
125
+ rows: [{ a: 1 }],
126
+ capturedOptions: captured,
127
+ });
128
+ await streamSqlWithBudget(
129
+ conn,
130
+ "SELECT 1",
131
+ { rowLimit: 10, abortSignal: callerAc.signal },
132
+ { maxRows: 5, maxBytes: 0 },
133
+ );
134
+ const observed = captured.value?.abortSignal;
135
+ expect(observed).toBeInstanceOf(AbortSignal);
136
+ // Composed signal is a new object (`AbortSignal.any` returns a
137
+ // fresh signal), not the caller's signal by reference.
138
+ expect(observed).not.toBe(callerAc.signal);
139
+ expect(observed?.aborted).toBe(false);
140
+ });
141
+
142
+ it("composed signal aborts when the caller's signal aborts", async () => {
143
+ // Drive the external (caller) signal manually and confirm the
144
+ // composed signal that reached the driver tracks it. This is the
145
+ // half of composition that runWithQueryTimeout depends on for
146
+ // 504 to actually cancel an in-flight query.
147
+ if (typeof AbortSignal.any !== "function") return;
148
+ const captured = { value: undefined as RunSQLOptions | undefined };
149
+ const callerAc = new AbortController();
150
+ const conn = fakeStreamingConnection({
151
+ rows: [{ a: 1 }],
152
+ capturedOptions: captured,
153
+ });
154
+ await streamSqlWithBudget(
155
+ conn,
156
+ "SELECT 1",
157
+ { rowLimit: 10, abortSignal: callerAc.signal },
158
+ { maxRows: 5, maxBytes: 0 },
159
+ );
160
+ const observed = captured.value?.abortSignal;
161
+ expect(observed?.aborted).toBe(false);
162
+ callerAc.abort();
163
+ expect(observed?.aborted).toBe(true);
164
+ });
165
+
166
+ it("throws PayloadTooLargeError and aborts the iterator on the (cap+1)-th row", async () => {
167
+ const rows: QueryRecord[] = [
168
+ { a: 1 },
169
+ { a: 2 },
170
+ { a: 3 },
171
+ { a: 4 },
172
+ { a: 5 },
173
+ ];
174
+ const abortObserved = { value: false };
175
+ const conn = fakeStreamingConnection({
176
+ rows,
177
+ abortObserved,
178
+ honorRowLimit: false,
179
+ });
180
+ await expect(
181
+ streamSqlWithBudget(
182
+ conn,
183
+ "SELECT a FROM t",
184
+ {},
185
+ { maxRows: 2, maxBytes: 1_000_000 },
186
+ ),
187
+ ).rejects.toBeInstanceOf(PayloadTooLargeError);
188
+ await expect(
189
+ streamSqlWithBudget(
190
+ conn,
191
+ "SELECT a FROM t",
192
+ {},
193
+ { maxRows: 2, maxBytes: 1_000_000 },
194
+ ),
195
+ ).rejects.toThrow("more than 2 rows");
196
+ // The helper must have fired the abort signal so a real driver
197
+ // (pg-query-stream / duckdb) would stop producing rows server-
198
+ // side, not just be discarded client-side.
199
+ expect(abortObserved.value).toBe(true);
200
+ });
201
+
202
+ it("throws PayloadTooLargeError when summed JSON byte size exceeds the cap", async () => {
203
+ const big = "x".repeat(40);
204
+ const rows: QueryRecord[] = [{ s: big }, { s: big }, { s: big }];
205
+ const abortObserved = { value: false };
206
+ const conn = fakeStreamingConnection({
207
+ rows,
208
+ abortObserved,
209
+ honorRowLimit: false,
210
+ });
211
+ await expect(
212
+ streamSqlWithBudget(
213
+ conn,
214
+ "SELECT s FROM t",
215
+ {},
216
+ { maxRows: 100, maxBytes: 60 },
217
+ ),
218
+ ).rejects.toThrow("exceeded 60 bytes");
219
+ expect(abortObserved.value).toBe(true);
220
+ });
221
+
222
+ it("returns all rows when the byte cap is disabled (maxBytes = 0)", async () => {
223
+ const big = "x".repeat(10_000);
224
+ const rows: QueryRecord[] = Array.from({ length: 3 }, () => ({ s: big }));
225
+ const conn = fakeStreamingConnection({ rows, honorRowLimit: false });
226
+ const result = await streamSqlWithBudget(
227
+ conn,
228
+ "SELECT s FROM t",
229
+ {},
230
+ { maxRows: 100, maxBytes: 0 },
231
+ );
232
+ expect(result.rows.length).toBe(3);
233
+ });
234
+
235
+ it("returns all rows when the row cap is disabled (maxRows = 0)", async () => {
236
+ const rows: QueryRecord[] = Array.from({ length: 50 }, (_, i) => ({
237
+ a: i,
238
+ }));
239
+ const conn = fakeStreamingConnection({ rows, honorRowLimit: false });
240
+ const result = await streamSqlWithBudget(
241
+ conn,
242
+ "SELECT a FROM t",
243
+ {},
244
+ { maxRows: 0, maxBytes: 1_000_000 },
245
+ );
246
+ expect(result.rows.length).toBe(50);
247
+ });
248
+
249
+ it("returns rows when count equals the cap exactly (not an overflow)", async () => {
250
+ const rows: QueryRecord[] = [{ a: 1 }, { a: 2 }, { a: 3 }];
251
+ const conn = fakeStreamingConnection({ rows, honorRowLimit: false });
252
+ const result = await streamSqlWithBudget(
253
+ conn,
254
+ "SELECT a FROM t",
255
+ {},
256
+ { maxRows: 3, maxBytes: 1_000_000 },
257
+ );
258
+ expect(result.rows.length).toBe(3);
259
+ });
260
+
261
+ it("re-throws non-overflow errors from the driver", async () => {
262
+ const conn = {
263
+ canStream(): true {
264
+ return true;
265
+ },
266
+ // eslint-disable-next-line require-yield
267
+ async *runSQLStream(): AsyncIterableIterator<QueryRecord> {
268
+ throw new Error("connection reset");
269
+ },
270
+ } as unknown as StreamingConnection;
271
+ await expect(
272
+ streamSqlWithBudget(
273
+ conn,
274
+ "SELECT 1",
275
+ {},
276
+ { maxRows: 10, maxBytes: 1_000 },
277
+ ),
278
+ ).rejects.toThrow("connection reset");
279
+ });
280
+ });
@@ -0,0 +1,162 @@
1
+ /**
2
+ * Helpers for streaming the ad-hoc connection SQL endpoints
3
+ * (`/environments/.../connections/.../sqlQuery`) so the publisher
4
+ * process never has to hold a whole result set in memory before
5
+ * returning it.
6
+ *
7
+ * The Step 1 row cap (`PUBLISHER_MAX_QUERY_ROWS`) is necessary but
8
+ * not sufficient: row count is a poor proxy for memory pressure
9
+ * because a single 10 MB JSON column blows past the 100k-row cap's
10
+ * safe envelope. The byte cap (`PUBLISHER_MAX_RESPONSE_BYTES`) is
11
+ * the actual memory bound, and bytes can only be enforced by
12
+ * iterating row-at-a-time on `runSQLStream` — `runSQL` returns a
13
+ * fully-buffered result, so by the time we'd count bytes the
14
+ * connector has already done the damage.
15
+ *
16
+ * On streaming-capable connections (Postgres, DuckDB, ...) the
17
+ * controller routes here. On other connections it stays on the
18
+ * Step 1 path; client-side byte counting after the fact would be
19
+ * security theatre.
20
+ */
21
+
22
+ import type {
23
+ Connection,
24
+ MalloyQueryData,
25
+ QueryRecord,
26
+ RunSQLOptions,
27
+ StreamingConnection,
28
+ } from "@malloydata/malloy";
29
+
30
+ import { PayloadTooLargeError } from "./errors";
31
+ import { recordQueryCapExceeded } from "./query_cap_metrics";
32
+
33
+ /**
34
+ * Runtime check + type narrow for streaming-capable connections.
35
+ * `Connection.canStream` is declared as `this is StreamingConnection`
36
+ * by the Malloy SDK, so a positive result is enough to safely call
37
+ * `runSQLStream`.
38
+ */
39
+ export function isStreamingConnection(
40
+ connection: Connection,
41
+ ): connection is StreamingConnection {
42
+ return (
43
+ typeof (connection as { canStream?: () => boolean }).canStream ===
44
+ "function" && (connection as StreamingConnection).canStream()
45
+ );
46
+ }
47
+
48
+ export interface StreamBudget {
49
+ /**
50
+ * Maximum number of rows to return. A value of `0` disables the
51
+ * row cap (the caller-supplied `rowLimit` in `runSQLOptions` may
52
+ * still bound the stream, but the helper will not raise on
53
+ * overflow).
54
+ */
55
+ maxRows: number;
56
+ /**
57
+ * Maximum aggregate JSON-serialized byte size of returned rows.
58
+ * `0` disables the byte cap. Measured as the sum of
59
+ * `Buffer.byteLength(JSON.stringify(row))` for each yielded row
60
+ * — the same bytes the eventual response will contain inside
61
+ * `result.rows`.
62
+ */
63
+ maxBytes: number;
64
+ }
65
+
66
+ /**
67
+ * Drain `runSQLStream` into a `MalloyQueryData`-shaped buffer,
68
+ * enforcing both a row cap and a byte cap. Aborts the underlying
69
+ * iterator via an internal `AbortController` the moment either cap
70
+ * is breached so the driver stops producing rows immediately
71
+ * (Postgres' `pg-query-stream` and DuckDB's streaming iterator both
72
+ * honor `abortSignal`).
73
+ *
74
+ * The row cap is detected by the `cap + 1` sentinel pattern: the
75
+ * controller has already clamped `runSQLOptions.rowLimit` to
76
+ * `min(callerLimit, cap + 1)`, so receiving `cap + 1` rows
77
+ * unambiguously means the request would have overflowed. This
78
+ * matches the non-streaming path's overflow detection so behavior
79
+ * is identical regardless of which connector served the request.
80
+ *
81
+ * On overflow the helper throws `PayloadTooLargeError` directly —
82
+ * the message includes the relevant env-var name so the operator
83
+ * sees a self-contained tuning hint without having to cross-
84
+ * reference the controller.
85
+ */
86
+ export async function streamSqlWithBudget(
87
+ connection: StreamingConnection,
88
+ sql: string,
89
+ runSQLOptions: RunSQLOptions,
90
+ budget: StreamBudget,
91
+ ): Promise<MalloyQueryData> {
92
+ const { maxRows, maxBytes } = budget;
93
+ const capAc = new AbortController();
94
+ const rows: QueryRecord[] = [];
95
+ let byteTotal = 0;
96
+ let overflowMessage: string | undefined;
97
+
98
+ // Compose two abort sources so a caller-supplied signal (the
99
+ // publisher's query timeout) and the internal cap-abort signal
100
+ // *both* cancel the underlying iterator:
101
+ //
102
+ // - If the caller's signal fires first, the controller's
103
+ // `runWithQueryTimeout` will throw `QueryTimeoutError` → 504.
104
+ // - If the cap-abort fires first, we throw
105
+ // `PayloadTooLargeError` → 413.
106
+ //
107
+ // Without composition the streaming branch would silently drop
108
+ // the caller's signal — historically the only way to abort here
109
+ // was the cap, so the legacy controller cleared
110
+ // `runSQLOptions.abortSignal`. Step 5 reverses that: the caller's
111
+ // signal is now authoritative.
112
+ //
113
+ // `AbortSignal.any` is widely available (Node 20+); guard with a
114
+ // typeof check so a stale runtime falls back to the legacy
115
+ // cap-only behavior instead of crashing at module load.
116
+ const externalSignal = runSQLOptions.abortSignal;
117
+ const composedSignal: AbortSignal =
118
+ externalSignal && typeof AbortSignal.any === "function"
119
+ ? AbortSignal.any([externalSignal, capAc.signal])
120
+ : capAc.signal;
121
+
122
+ try {
123
+ for await (const row of connection.runSQLStream(sql, {
124
+ ...runSQLOptions,
125
+ abortSignal: composedSignal,
126
+ })) {
127
+ rows.push(row);
128
+ if (maxBytes > 0) {
129
+ // Measure exactly what the eventual response body will
130
+ // contain for this row. O(rowSize) per row, duplicated
131
+ // against the final `JSON.stringify(result)` — the
132
+ // early-abort win on overflow dwarfs the bookkeeping
133
+ // cost in the bounded-success case.
134
+ byteTotal += Buffer.byteLength(JSON.stringify(row), "utf8");
135
+ if (byteTotal > maxBytes) {
136
+ recordQueryCapExceeded("bytes", "connection_sql");
137
+ overflowMessage = `Query response exceeded ${maxBytes} bytes (had at least ${byteTotal}). Refine the query (project fewer columns, add a LIMIT, or filter wide values) or raise PUBLISHER_MAX_RESPONSE_BYTES.`;
138
+ capAc.abort();
139
+ break;
140
+ }
141
+ }
142
+ if (maxRows > 0 && rows.length > maxRows) {
143
+ recordQueryCapExceeded("rows", "connection_sql");
144
+ overflowMessage = `Query returned more than ${maxRows} rows. Refine the query (add a LIMIT or more selective WHERE) or raise PUBLISHER_MAX_QUERY_ROWS.`;
145
+ capAc.abort();
146
+ break;
147
+ }
148
+ }
149
+ } catch (err) {
150
+ // `pg-query-stream` surfaces `query.destroy()` (which our
151
+ // abort handler triggers) as a synthetic error in some
152
+ // versions. Swallow it iff we triggered the abort ourselves —
153
+ // otherwise it's a real connection error (or the caller's
154
+ // timeout, which the controller's runWithQueryTimeout will
155
+ // surface as a 504) that the controller must see.
156
+ if (!overflowMessage) throw err;
157
+ }
158
+
159
+ if (overflowMessage) throw new PayloadTooLargeError(overflowMessage);
160
+
161
+ return { rows, totalRows: rows.length };
162
+ }