@malloy-publisher/server 0.0.198 → 0.0.200
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build.ts +30 -1
- package/dist/app/api-doc.yaml +127 -111
- package/dist/app/assets/{EnvironmentPage-C7rtH4mC.js → EnvironmentPage-CgKNjySu.js} +1 -1
- package/dist/app/assets/HomePage-BPIpMBjW.js +1 -0
- package/dist/app/assets/{MainPage-D38LtZDV.js → MainPage-CAwb8U82.js} +2 -2
- package/dist/app/assets/{ModelPage-DOol8Mz7.js → ModelPage-C0Uevsw9.js} +1 -1
- package/dist/app/assets/{PackagePage-0tgzA_kO.js → PackagePage-Cu-u9k1g.js} +1 -1
- package/dist/app/assets/{RouteError-BaMsOSly.js → RouteError-DVwPh2Ql.js} +1 -1
- package/dist/app/assets/{WorkbookPage-Cx4SePkx.js → WorkbookPage-DW38R2Zv.js} +1 -1
- package/dist/app/assets/{core-CbsC6R_Y.es-Cwf6asf3.js → core-C0vCMRDQ.es-D_ytHhjS.js} +10 -10
- package/dist/app/assets/{index-DL6BZTuw.js → index-BGdcKsFF.js} +1 -1
- package/dist/app/assets/{index-DNofXMxi.js → index-CTx4v4_3.js} +1 -1
- package/dist/app/assets/index-DE6d5jEy.js +452 -0
- package/dist/app/assets/{index.umd-B68wGGkM.js → index.umd-C1Mi1uRm.js} +1 -1
- package/dist/app/index.html +1 -1
- package/dist/instrumentation.mjs +57 -36
- package/dist/package_load_worker.mjs +12213 -0
- package/dist/server.mjs +4198 -3648
- package/package.json +2 -3
- package/src/config.spec.ts +246 -0
- package/src/config.ts +121 -1
- package/src/constants.ts +84 -1
- package/src/controller/compile.controller.ts +3 -1
- package/src/controller/connection.controller.spec.ts +803 -0
- package/src/controller/connection.controller.ts +207 -20
- package/src/controller/model.controller.ts +19 -1
- package/src/controller/query.controller.ts +22 -6
- package/src/controller/watch-mode.controller.ts +11 -2
- package/src/errors.spec.ts +44 -0
- package/src/errors.ts +34 -0
- package/src/health.spec.ts +90 -0
- package/src/health.ts +88 -45
- package/src/heap_check.spec.ts +144 -0
- package/src/heap_check.ts +144 -0
- package/src/instrumentation.ts +50 -0
- package/src/mcp/handler_utils.ts +14 -0
- package/src/mcp/tools/execute_query_tool.ts +52 -10
- package/src/oom_guards.integration.spec.ts +261 -0
- package/src/package_load/package_load_pool.spec.ts +252 -0
- package/src/package_load/package_load_pool.ts +920 -0
- package/src/package_load/package_load_worker.ts +980 -0
- package/src/package_load/protocol.ts +336 -0
- package/src/path_safety.ts +9 -3
- package/src/query_cap_metrics.spec.ts +89 -0
- package/src/query_cap_metrics.ts +115 -0
- package/src/query_concurrency.spec.ts +247 -0
- package/src/query_concurrency.ts +236 -0
- package/src/query_param_utils.ts +18 -0
- package/src/query_timeout.spec.ts +224 -0
- package/src/query_timeout.ts +178 -0
- package/src/server-old.ts +21 -1
- package/src/server.ts +61 -57
- package/src/service/connection.ts +8 -2
- package/src/service/db_utils.spec.ts +1 -1
- package/src/service/environment.ts +85 -4
- package/src/service/environment_admission.spec.ts +165 -1
- package/src/service/environment_store.spec.ts +103 -0
- package/src/service/environment_store.ts +98 -26
- package/src/service/filter_integration.spec.ts +110 -0
- package/src/service/given.ts +80 -0
- package/src/service/givens_integration.spec.ts +192 -0
- package/src/service/model.spec.ts +298 -3
- package/src/service/model.ts +362 -23
- package/src/service/model_limits.spec.ts +181 -0
- package/src/service/model_limits.ts +110 -0
- package/src/service/package.spec.ts +12 -6
- package/src/service/package.ts +263 -146
- package/src/service/package_worker_path.spec.ts +196 -0
- package/src/service/path_injection.spec.ts +39 -0
- package/src/stream_helpers.spec.ts +280 -0
- package/src/stream_helpers.ts +162 -0
- package/src/test_helpers/metrics_harness.ts +126 -0
- package/tests/integration/concurrent_package/concurrent_package.integration.spec.ts +280 -0
- package/dist/app/assets/HomePage-DwkH7OrS.js +0 -1
- package/dist/app/assets/index-U38AyjJL.js +0 -451
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration test: exercise `Package.create` with the package-load
|
|
3
|
+
* worker pool enabled (PACKAGE_LOAD_WORKERS=1).
|
|
4
|
+
*
|
|
5
|
+
* Validates that the worker-load path:
|
|
6
|
+
* - reads the manifest, probes embedded databases, and compiles
|
|
7
|
+
* every model in a single off-thread job
|
|
8
|
+
* - produces a live `Package` whose `Model`s have populated
|
|
9
|
+
* `modelDef` / `sources` / `queries`
|
|
10
|
+
* - hydrates the `ModelMaterializer` from `modelDef` on first
|
|
11
|
+
* query (no recompile) — verified end-to-end by running a
|
|
12
|
+
* query through the resulting Model and getting a result
|
|
13
|
+
*
|
|
14
|
+
* Kept separate from `package.spec.ts` so the existing tests keep
|
|
15
|
+
* running on the in-process path without paying worker startup cost.
|
|
16
|
+
*
|
|
17
|
+
* Pool reuse strategy: one `PackageLoadPool` shared across all
|
|
18
|
+
* cases in this file. Spawning a fresh worker per test crashes Bun
|
|
19
|
+
* (segfault) because DuckDB's native bindings don't tolerate being
|
|
20
|
+
* loaded concurrently into multiple worker isolates of the same Bun
|
|
21
|
+
* process. Production uses one pool; this matches.
|
|
22
|
+
*/
|
|
23
|
+
import {
|
|
24
|
+
afterAll,
|
|
25
|
+
afterEach,
|
|
26
|
+
beforeAll,
|
|
27
|
+
beforeEach,
|
|
28
|
+
describe,
|
|
29
|
+
expect,
|
|
30
|
+
it,
|
|
31
|
+
} from "bun:test";
|
|
32
|
+
import * as fs from "fs";
|
|
33
|
+
import * as os from "os";
|
|
34
|
+
import * as path from "path";
|
|
35
|
+
import {
|
|
36
|
+
PackageLoadPool,
|
|
37
|
+
__setPackageLoadPoolForTests,
|
|
38
|
+
} from "../package_load/package_load_pool";
|
|
39
|
+
import { Package } from "./package";
|
|
40
|
+
|
|
41
|
+
const ORIGINAL_ENV = process.env.PACKAGE_LOAD_WORKERS;
|
|
42
|
+
|
|
43
|
+
describe("Package.create via worker pool", () => {
|
|
44
|
+
let tempDir: string;
|
|
45
|
+
let pool: PackageLoadPool;
|
|
46
|
+
|
|
47
|
+
beforeAll(async () => {
|
|
48
|
+
process.env.PACKAGE_LOAD_WORKERS = "1";
|
|
49
|
+
pool = new PackageLoadPool(1);
|
|
50
|
+
// Wire our pool into the module-level singleton so Package.create
|
|
51
|
+
// picks it up via getPackageLoadPool().
|
|
52
|
+
await __setPackageLoadPoolForTests(pool);
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
afterAll(async () => {
|
|
56
|
+
await __setPackageLoadPoolForTests(null);
|
|
57
|
+
if (ORIGINAL_ENV === undefined) {
|
|
58
|
+
delete process.env.PACKAGE_LOAD_WORKERS;
|
|
59
|
+
} else {
|
|
60
|
+
process.env.PACKAGE_LOAD_WORKERS = ORIGINAL_ENV;
|
|
61
|
+
}
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
beforeEach(() => {
|
|
65
|
+
tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "publisher-pkg-worker-"));
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
afterEach(() => {
|
|
69
|
+
if (tempDir) {
|
|
70
|
+
// tempDir gets wiped by Package.create on failure (it's the
|
|
71
|
+
// staging-cleanup path); ignore ENOENT here.
|
|
72
|
+
try {
|
|
73
|
+
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
74
|
+
} catch {
|
|
75
|
+
/* already gone */
|
|
76
|
+
}
|
|
77
|
+
tempDir = "";
|
|
78
|
+
}
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
async function makeMalloyConfig(): Promise<{
|
|
82
|
+
malloyConfig: import("@malloydata/malloy").MalloyConfig;
|
|
83
|
+
duckdb: { close: () => Promise<void> };
|
|
84
|
+
}> {
|
|
85
|
+
const { MalloyConfig, FixedConnectionMap } = await import(
|
|
86
|
+
"@malloydata/malloy"
|
|
87
|
+
);
|
|
88
|
+
const { DuckDBConnection } = await import("@malloydata/db-duckdb");
|
|
89
|
+
const duckdb = new DuckDBConnection("duckdb", ":memory:");
|
|
90
|
+
const connections = new FixedConnectionMap(
|
|
91
|
+
new Map([["duckdb", duckdb]]),
|
|
92
|
+
"duckdb",
|
|
93
|
+
);
|
|
94
|
+
const malloyConfig = new MalloyConfig({ connections: {} });
|
|
95
|
+
malloyConfig.wrapConnections(() => connections);
|
|
96
|
+
return { malloyConfig, duckdb };
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function writeManifest(): void {
|
|
100
|
+
fs.writeFileSync(
|
|
101
|
+
path.join(tempDir, "publisher.json"),
|
|
102
|
+
JSON.stringify({ name: "pkg", description: "test package" }),
|
|
103
|
+
);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
it("loads a package end-to-end and serves a query through the hydrated materializer", async () => {
|
|
107
|
+
writeManifest();
|
|
108
|
+
// Define `total_v` as a *view* on the source so the query
|
|
109
|
+
// builder's `run: nums -> total_v` form resolves. (Top-level
|
|
110
|
+
// queries take the `run: total_q` form — orthogonal path that
|
|
111
|
+
// the in-process tests already cover.)
|
|
112
|
+
fs.writeFileSync(
|
|
113
|
+
path.join(tempDir, "trivial.malloy"),
|
|
114
|
+
`source: nums is duckdb.sql("select 1 as a, 2 as b") extend {
|
|
115
|
+
measure: total is a.sum()
|
|
116
|
+
view: total_v is { aggregate: total }
|
|
117
|
+
}`,
|
|
118
|
+
);
|
|
119
|
+
|
|
120
|
+
const { malloyConfig, duckdb } = await makeMalloyConfig();
|
|
121
|
+
try {
|
|
122
|
+
const pkg = await Package.create("env", "pkg", tempDir, malloyConfig);
|
|
123
|
+
expect(pkg).toBeInstanceOf(Package);
|
|
124
|
+
expect(pkg.getModelPaths()).toEqual(["trivial.malloy"]);
|
|
125
|
+
|
|
126
|
+
const model = pkg.getModel("trivial.malloy");
|
|
127
|
+
expect(model).toBeDefined();
|
|
128
|
+
const apiModel = (await model!.getModel()) as {
|
|
129
|
+
modelDef?: string;
|
|
130
|
+
sources?: { name?: string }[];
|
|
131
|
+
};
|
|
132
|
+
expect(apiModel.modelDef).toBeDefined();
|
|
133
|
+
expect(apiModel.sources?.[0]?.name).toBe("nums");
|
|
134
|
+
|
|
135
|
+
// First query against the package — hydrates the
|
|
136
|
+
// ModelMaterializer from the worker's modelDef without a
|
|
137
|
+
// recompile, then runs the SQL against the *main thread's*
|
|
138
|
+
// DuckDB connection (the only one with the in-memory `nums`
|
|
139
|
+
// source loaded via duckdb.sql()).
|
|
140
|
+
const { result } = await model!.getQueryResults(
|
|
141
|
+
"nums",
|
|
142
|
+
"total_v",
|
|
143
|
+
undefined,
|
|
144
|
+
);
|
|
145
|
+
expect(result.data).toBeDefined();
|
|
146
|
+
} finally {
|
|
147
|
+
await duckdb.close();
|
|
148
|
+
}
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
it("propagates a per-model compile failure as a thrown error from Package.create", async () => {
|
|
152
|
+
writeManifest();
|
|
153
|
+
fs.writeFileSync(
|
|
154
|
+
path.join(tempDir, "broken.malloy"),
|
|
155
|
+
`source: bad is duckdb.sql("select 1 as a") extend {
|
|
156
|
+
measure: oops is THIS_FUNC_DOES_NOT_EXIST(a)
|
|
157
|
+
}`,
|
|
158
|
+
);
|
|
159
|
+
|
|
160
|
+
const { malloyConfig, duckdb } = await makeMalloyConfig();
|
|
161
|
+
try {
|
|
162
|
+
await expect(
|
|
163
|
+
Package.create("env", "pkg", tempDir, malloyConfig),
|
|
164
|
+
).rejects.toBeInstanceOf(Error);
|
|
165
|
+
} finally {
|
|
166
|
+
await duckdb.close();
|
|
167
|
+
}
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
// NB: kept last in this describe — swapping the singleton for a
|
|
171
|
+
// pre-shutdown pool also tears down the shared `pool` (the swap
|
|
172
|
+
// implementation shuts down the outgoing singleton). Subsequent
|
|
173
|
+
// tests in this describe would see a dead pool. afterAll only
|
|
174
|
+
// resets the singleton to null, so this is safe at the tail.
|
|
175
|
+
it("rewraps pool-infrastructure failures as ServiceUnavailableError (HTTP 503)", async () => {
|
|
176
|
+
writeManifest();
|
|
177
|
+
fs.writeFileSync(
|
|
178
|
+
path.join(tempDir, "trivial.malloy"),
|
|
179
|
+
`source: nums is duckdb.sql("select 1 as a")`,
|
|
180
|
+
);
|
|
181
|
+
|
|
182
|
+
const deadPool = new PackageLoadPool(1);
|
|
183
|
+
await deadPool.shutdown();
|
|
184
|
+
await __setPackageLoadPoolForTests(deadPool);
|
|
185
|
+
|
|
186
|
+
const { ServiceUnavailableError } = await import("../errors");
|
|
187
|
+
const { malloyConfig, duckdb } = await makeMalloyConfig();
|
|
188
|
+
try {
|
|
189
|
+
await expect(
|
|
190
|
+
Package.create("env", "pkg", tempDir, malloyConfig),
|
|
191
|
+
).rejects.toBeInstanceOf(ServiceUnavailableError);
|
|
192
|
+
} finally {
|
|
193
|
+
await duckdb.close();
|
|
194
|
+
}
|
|
195
|
+
});
|
|
196
|
+
});
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { describe, expect, it } from "bun:test";
|
|
2
|
+
import { BadRequestError } from "../errors";
|
|
3
|
+
import { deleteDuckLakeConnectionFile } from "./connection";
|
|
4
|
+
|
|
5
|
+
const TRAVERSAL_NAMES: ReadonlyArray<readonly [string, string]> = [
|
|
6
|
+
["leading traversal", "../etc"],
|
|
7
|
+
["embedded traversal", "foo/../../bar"],
|
|
8
|
+
["slash in name", "foo/bar"],
|
|
9
|
+
["backslash in name", "foo\\bar"],
|
|
10
|
+
["leading dot", ".staging"],
|
|
11
|
+
["bare dot-dot", ".."],
|
|
12
|
+
["bare dot", "."],
|
|
13
|
+
["empty", ""],
|
|
14
|
+
["NUL byte", "foo\0bar"],
|
|
15
|
+
["oversized", "a".repeat(256)],
|
|
16
|
+
["absolute", "/etc/passwd"],
|
|
17
|
+
] as const;
|
|
18
|
+
|
|
19
|
+
describe("deleteDuckLakeConnectionFile path-injection guards", () => {
|
|
20
|
+
it.each(TRAVERSAL_NAMES)(
|
|
21
|
+
"rejects %s as connectionName (%p)",
|
|
22
|
+
async (_label, connectionName) => {
|
|
23
|
+
await expect(
|
|
24
|
+
deleteDuckLakeConnectionFile(connectionName, "/tmp/env"),
|
|
25
|
+
).rejects.toBeInstanceOf(BadRequestError);
|
|
26
|
+
},
|
|
27
|
+
);
|
|
28
|
+
|
|
29
|
+
it.each([
|
|
30
|
+
["relative", "relative/path"],
|
|
31
|
+
["traversal", "/var/lib/../../etc"],
|
|
32
|
+
["NUL byte", "/var/lib/env\0"],
|
|
33
|
+
["bare dot-dot", ".."],
|
|
34
|
+
])("rejects %s as environmentPath (%p)", async (_label, environmentPath) => {
|
|
35
|
+
await expect(
|
|
36
|
+
deleteDuckLakeConnectionFile("conn", environmentPath),
|
|
37
|
+
).rejects.toBeInstanceOf(BadRequestError);
|
|
38
|
+
});
|
|
39
|
+
});
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
QueryRecord,
|
|
3
|
+
RunSQLOptions,
|
|
4
|
+
StreamingConnection,
|
|
5
|
+
} from "@malloydata/malloy";
|
|
6
|
+
import { describe, expect, it } from "bun:test";
|
|
7
|
+
|
|
8
|
+
import { PayloadTooLargeError } from "./errors";
|
|
9
|
+
import { isStreamingConnection, streamSqlWithBudget } from "./stream_helpers";
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Build a fake StreamingConnection backed by a fixed row array. The
|
|
13
|
+
* fake records the abort signal so tests can confirm
|
|
14
|
+
* `streamSqlWithBudget` actually signaled the driver to stop —
|
|
15
|
+
* client-side counting alone is not enough; the whole point of the
|
|
16
|
+
* helper is to terminate fetching early.
|
|
17
|
+
*/
|
|
18
|
+
function fakeStreamingConnection(opts: {
|
|
19
|
+
rows: QueryRecord[];
|
|
20
|
+
/**
|
|
21
|
+
* When true (the default), the fake honors `RunSQLOptions.rowLimit`
|
|
22
|
+
* by slicing — same as real Postgres/DuckDB. Set false to drive
|
|
23
|
+
* the helper's own overflow detection (the (cap+1)-th row check).
|
|
24
|
+
*/
|
|
25
|
+
honorRowLimit?: boolean;
|
|
26
|
+
/**
|
|
27
|
+
* Test hook: set to `true` when the fake observes `signal.aborted`
|
|
28
|
+
* flip via the listener it installed at the start of streaming.
|
|
29
|
+
*/
|
|
30
|
+
abortObserved?: { value: boolean };
|
|
31
|
+
/**
|
|
32
|
+
* Test hook: captures the options the helper passed to
|
|
33
|
+
* `runSQLStream`, so tests can assert it preserved caller-supplied
|
|
34
|
+
* `rowLimit` (and the abortSignal it wired in).
|
|
35
|
+
*/
|
|
36
|
+
capturedOptions?: { value: RunSQLOptions | undefined };
|
|
37
|
+
}): StreamingConnection {
|
|
38
|
+
const { rows, honorRowLimit = true, abortObserved, capturedOptions } = opts;
|
|
39
|
+
return {
|
|
40
|
+
canStream(): true {
|
|
41
|
+
return true;
|
|
42
|
+
},
|
|
43
|
+
async *runSQLStream(
|
|
44
|
+
_sql: string,
|
|
45
|
+
options?: RunSQLOptions,
|
|
46
|
+
): AsyncIterableIterator<QueryRecord> {
|
|
47
|
+
if (capturedOptions) capturedOptions.value = options;
|
|
48
|
+
if (abortObserved) {
|
|
49
|
+
options?.abortSignal?.addEventListener("abort", () => {
|
|
50
|
+
abortObserved.value = true;
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
const limit =
|
|
54
|
+
honorRowLimit && typeof options?.rowLimit === "number"
|
|
55
|
+
? options.rowLimit
|
|
56
|
+
: rows.length;
|
|
57
|
+
for (let i = 0; i < Math.min(rows.length, limit); i += 1) {
|
|
58
|
+
yield rows[i];
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
} as unknown as StreamingConnection;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
describe("isStreamingConnection", () => {
|
|
65
|
+
it("returns true for a connection whose canStream() returns true", () => {
|
|
66
|
+
const conn = fakeStreamingConnection({ rows: [] });
|
|
67
|
+
expect(isStreamingConnection(conn)).toBe(true);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it("returns false for a connection without canStream", () => {
|
|
71
|
+
expect(isStreamingConnection({} as never)).toBe(false);
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
it("returns false for a connection whose canStream() returns false", () => {
|
|
75
|
+
const conn = {
|
|
76
|
+
canStream() {
|
|
77
|
+
return false;
|
|
78
|
+
},
|
|
79
|
+
} as never;
|
|
80
|
+
expect(isStreamingConnection(conn)).toBe(false);
|
|
81
|
+
});
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
describe("streamSqlWithBudget", () => {
|
|
85
|
+
it("returns all rows when both budgets are comfortably above the stream", async () => {
|
|
86
|
+
const rows: QueryRecord[] = [{ a: 1 }, { a: 2 }, { a: 3 }];
|
|
87
|
+
const conn = fakeStreamingConnection({ rows });
|
|
88
|
+
const result = await streamSqlWithBudget(
|
|
89
|
+
conn,
|
|
90
|
+
"SELECT a FROM t",
|
|
91
|
+
{ rowLimit: 10 },
|
|
92
|
+
{ maxRows: 10, maxBytes: 1_000_000 },
|
|
93
|
+
);
|
|
94
|
+
expect(result.rows).toEqual(rows);
|
|
95
|
+
expect(result.totalRows).toBe(3);
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
it("forwards caller-supplied runSQLOptions (rowLimit) to the driver", async () => {
|
|
99
|
+
const captured = { value: undefined as RunSQLOptions | undefined };
|
|
100
|
+
const conn = fakeStreamingConnection({
|
|
101
|
+
rows: [{ a: 1 }, { a: 2 }],
|
|
102
|
+
capturedOptions: captured,
|
|
103
|
+
});
|
|
104
|
+
await streamSqlWithBudget(
|
|
105
|
+
conn,
|
|
106
|
+
"SELECT 1",
|
|
107
|
+
{ rowLimit: 6 },
|
|
108
|
+
{ maxRows: 5, maxBytes: 0 },
|
|
109
|
+
);
|
|
110
|
+
expect(captured.value?.rowLimit).toBe(6);
|
|
111
|
+
// abortSignal must be wired in so the helper can abort the
|
|
112
|
+
// iterator on overflow.
|
|
113
|
+
expect(captured.value?.abortSignal).toBeDefined();
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it("composes the caller-supplied abortSignal with its internal cap-abort signal", async () => {
|
|
117
|
+
// Step 5: the caller's signal is the query timeout. Composing
|
|
118
|
+
// both sources means EITHER an external timeout OR an internal
|
|
119
|
+
// cap overflow terminates the iterator. The combined signal
|
|
120
|
+
// must be a fresh AbortSignal (not either input by reference);
|
|
121
|
+
// aborting either input must mark the composed signal aborted.
|
|
122
|
+
const captured = { value: undefined as RunSQLOptions | undefined };
|
|
123
|
+
const callerAc = new AbortController();
|
|
124
|
+
const conn = fakeStreamingConnection({
|
|
125
|
+
rows: [{ a: 1 }],
|
|
126
|
+
capturedOptions: captured,
|
|
127
|
+
});
|
|
128
|
+
await streamSqlWithBudget(
|
|
129
|
+
conn,
|
|
130
|
+
"SELECT 1",
|
|
131
|
+
{ rowLimit: 10, abortSignal: callerAc.signal },
|
|
132
|
+
{ maxRows: 5, maxBytes: 0 },
|
|
133
|
+
);
|
|
134
|
+
const observed = captured.value?.abortSignal;
|
|
135
|
+
expect(observed).toBeInstanceOf(AbortSignal);
|
|
136
|
+
// Composed signal is a new object (`AbortSignal.any` returns a
|
|
137
|
+
// fresh signal), not the caller's signal by reference.
|
|
138
|
+
expect(observed).not.toBe(callerAc.signal);
|
|
139
|
+
expect(observed?.aborted).toBe(false);
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
it("composed signal aborts when the caller's signal aborts", async () => {
|
|
143
|
+
// Drive the external (caller) signal manually and confirm the
|
|
144
|
+
// composed signal that reached the driver tracks it. This is the
|
|
145
|
+
// half of composition that runWithQueryTimeout depends on for
|
|
146
|
+
// 504 to actually cancel an in-flight query.
|
|
147
|
+
if (typeof AbortSignal.any !== "function") return;
|
|
148
|
+
const captured = { value: undefined as RunSQLOptions | undefined };
|
|
149
|
+
const callerAc = new AbortController();
|
|
150
|
+
const conn = fakeStreamingConnection({
|
|
151
|
+
rows: [{ a: 1 }],
|
|
152
|
+
capturedOptions: captured,
|
|
153
|
+
});
|
|
154
|
+
await streamSqlWithBudget(
|
|
155
|
+
conn,
|
|
156
|
+
"SELECT 1",
|
|
157
|
+
{ rowLimit: 10, abortSignal: callerAc.signal },
|
|
158
|
+
{ maxRows: 5, maxBytes: 0 },
|
|
159
|
+
);
|
|
160
|
+
const observed = captured.value?.abortSignal;
|
|
161
|
+
expect(observed?.aborted).toBe(false);
|
|
162
|
+
callerAc.abort();
|
|
163
|
+
expect(observed?.aborted).toBe(true);
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
it("throws PayloadTooLargeError and aborts the iterator on the (cap+1)-th row", async () => {
|
|
167
|
+
const rows: QueryRecord[] = [
|
|
168
|
+
{ a: 1 },
|
|
169
|
+
{ a: 2 },
|
|
170
|
+
{ a: 3 },
|
|
171
|
+
{ a: 4 },
|
|
172
|
+
{ a: 5 },
|
|
173
|
+
];
|
|
174
|
+
const abortObserved = { value: false };
|
|
175
|
+
const conn = fakeStreamingConnection({
|
|
176
|
+
rows,
|
|
177
|
+
abortObserved,
|
|
178
|
+
honorRowLimit: false,
|
|
179
|
+
});
|
|
180
|
+
await expect(
|
|
181
|
+
streamSqlWithBudget(
|
|
182
|
+
conn,
|
|
183
|
+
"SELECT a FROM t",
|
|
184
|
+
{},
|
|
185
|
+
{ maxRows: 2, maxBytes: 1_000_000 },
|
|
186
|
+
),
|
|
187
|
+
).rejects.toBeInstanceOf(PayloadTooLargeError);
|
|
188
|
+
await expect(
|
|
189
|
+
streamSqlWithBudget(
|
|
190
|
+
conn,
|
|
191
|
+
"SELECT a FROM t",
|
|
192
|
+
{},
|
|
193
|
+
{ maxRows: 2, maxBytes: 1_000_000 },
|
|
194
|
+
),
|
|
195
|
+
).rejects.toThrow("more than 2 rows");
|
|
196
|
+
// The helper must have fired the abort signal so a real driver
|
|
197
|
+
// (pg-query-stream / duckdb) would stop producing rows server-
|
|
198
|
+
// side, not just be discarded client-side.
|
|
199
|
+
expect(abortObserved.value).toBe(true);
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
it("throws PayloadTooLargeError when summed JSON byte size exceeds the cap", async () => {
|
|
203
|
+
const big = "x".repeat(40);
|
|
204
|
+
const rows: QueryRecord[] = [{ s: big }, { s: big }, { s: big }];
|
|
205
|
+
const abortObserved = { value: false };
|
|
206
|
+
const conn = fakeStreamingConnection({
|
|
207
|
+
rows,
|
|
208
|
+
abortObserved,
|
|
209
|
+
honorRowLimit: false,
|
|
210
|
+
});
|
|
211
|
+
await expect(
|
|
212
|
+
streamSqlWithBudget(
|
|
213
|
+
conn,
|
|
214
|
+
"SELECT s FROM t",
|
|
215
|
+
{},
|
|
216
|
+
{ maxRows: 100, maxBytes: 60 },
|
|
217
|
+
),
|
|
218
|
+
).rejects.toThrow("exceeded 60 bytes");
|
|
219
|
+
expect(abortObserved.value).toBe(true);
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
it("returns all rows when the byte cap is disabled (maxBytes = 0)", async () => {
|
|
223
|
+
const big = "x".repeat(10_000);
|
|
224
|
+
const rows: QueryRecord[] = Array.from({ length: 3 }, () => ({ s: big }));
|
|
225
|
+
const conn = fakeStreamingConnection({ rows, honorRowLimit: false });
|
|
226
|
+
const result = await streamSqlWithBudget(
|
|
227
|
+
conn,
|
|
228
|
+
"SELECT s FROM t",
|
|
229
|
+
{},
|
|
230
|
+
{ maxRows: 100, maxBytes: 0 },
|
|
231
|
+
);
|
|
232
|
+
expect(result.rows.length).toBe(3);
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
it("returns all rows when the row cap is disabled (maxRows = 0)", async () => {
|
|
236
|
+
const rows: QueryRecord[] = Array.from({ length: 50 }, (_, i) => ({
|
|
237
|
+
a: i,
|
|
238
|
+
}));
|
|
239
|
+
const conn = fakeStreamingConnection({ rows, honorRowLimit: false });
|
|
240
|
+
const result = await streamSqlWithBudget(
|
|
241
|
+
conn,
|
|
242
|
+
"SELECT a FROM t",
|
|
243
|
+
{},
|
|
244
|
+
{ maxRows: 0, maxBytes: 1_000_000 },
|
|
245
|
+
);
|
|
246
|
+
expect(result.rows.length).toBe(50);
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
it("returns rows when count equals the cap exactly (not an overflow)", async () => {
|
|
250
|
+
const rows: QueryRecord[] = [{ a: 1 }, { a: 2 }, { a: 3 }];
|
|
251
|
+
const conn = fakeStreamingConnection({ rows, honorRowLimit: false });
|
|
252
|
+
const result = await streamSqlWithBudget(
|
|
253
|
+
conn,
|
|
254
|
+
"SELECT a FROM t",
|
|
255
|
+
{},
|
|
256
|
+
{ maxRows: 3, maxBytes: 1_000_000 },
|
|
257
|
+
);
|
|
258
|
+
expect(result.rows.length).toBe(3);
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
it("re-throws non-overflow errors from the driver", async () => {
|
|
262
|
+
const conn = {
|
|
263
|
+
canStream(): true {
|
|
264
|
+
return true;
|
|
265
|
+
},
|
|
266
|
+
// eslint-disable-next-line require-yield
|
|
267
|
+
async *runSQLStream(): AsyncIterableIterator<QueryRecord> {
|
|
268
|
+
throw new Error("connection reset");
|
|
269
|
+
},
|
|
270
|
+
} as unknown as StreamingConnection;
|
|
271
|
+
await expect(
|
|
272
|
+
streamSqlWithBudget(
|
|
273
|
+
conn,
|
|
274
|
+
"SELECT 1",
|
|
275
|
+
{},
|
|
276
|
+
{ maxRows: 10, maxBytes: 1_000 },
|
|
277
|
+
),
|
|
278
|
+
).rejects.toThrow("connection reset");
|
|
279
|
+
});
|
|
280
|
+
});
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Helpers for streaming the ad-hoc connection SQL endpoints
|
|
3
|
+
* (`/environments/.../connections/.../sqlQuery`) so the publisher
|
|
4
|
+
* process never has to hold a whole result set in memory before
|
|
5
|
+
* returning it.
|
|
6
|
+
*
|
|
7
|
+
* The Step 1 row cap (`PUBLISHER_MAX_QUERY_ROWS`) is necessary but
|
|
8
|
+
* not sufficient: row count is a poor proxy for memory pressure
|
|
9
|
+
* because a single 10 MB JSON column blows past the 100k-row cap's
|
|
10
|
+
* safe envelope. The byte cap (`PUBLISHER_MAX_RESPONSE_BYTES`) is
|
|
11
|
+
* the actual memory bound, and bytes can only be enforced by
|
|
12
|
+
* iterating row-at-a-time on `runSQLStream` — `runSQL` returns a
|
|
13
|
+
* fully-buffered result, so by the time we'd count bytes the
|
|
14
|
+
* connector has already done the damage.
|
|
15
|
+
*
|
|
16
|
+
* On streaming-capable connections (Postgres, DuckDB, ...) the
|
|
17
|
+
* controller routes here. On other connections it stays on the
|
|
18
|
+
* Step 1 path; client-side byte counting after the fact would be
|
|
19
|
+
* security theatre.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import type {
|
|
23
|
+
Connection,
|
|
24
|
+
MalloyQueryData,
|
|
25
|
+
QueryRecord,
|
|
26
|
+
RunSQLOptions,
|
|
27
|
+
StreamingConnection,
|
|
28
|
+
} from "@malloydata/malloy";
|
|
29
|
+
|
|
30
|
+
import { PayloadTooLargeError } from "./errors";
|
|
31
|
+
import { recordQueryCapExceeded } from "./query_cap_metrics";
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Runtime check + type narrow for streaming-capable connections.
|
|
35
|
+
* `Connection.canStream` is declared as `this is StreamingConnection`
|
|
36
|
+
* by the Malloy SDK, so a positive result is enough to safely call
|
|
37
|
+
* `runSQLStream`.
|
|
38
|
+
*/
|
|
39
|
+
export function isStreamingConnection(
|
|
40
|
+
connection: Connection,
|
|
41
|
+
): connection is StreamingConnection {
|
|
42
|
+
return (
|
|
43
|
+
typeof (connection as { canStream?: () => boolean }).canStream ===
|
|
44
|
+
"function" && (connection as StreamingConnection).canStream()
|
|
45
|
+
);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export interface StreamBudget {
|
|
49
|
+
/**
|
|
50
|
+
* Maximum number of rows to return. A value of `0` disables the
|
|
51
|
+
* row cap (the caller-supplied `rowLimit` in `runSQLOptions` may
|
|
52
|
+
* still bound the stream, but the helper will not raise on
|
|
53
|
+
* overflow).
|
|
54
|
+
*/
|
|
55
|
+
maxRows: number;
|
|
56
|
+
/**
|
|
57
|
+
* Maximum aggregate JSON-serialized byte size of returned rows.
|
|
58
|
+
* `0` disables the byte cap. Measured as the sum of
|
|
59
|
+
* `Buffer.byteLength(JSON.stringify(row))` for each yielded row
|
|
60
|
+
* — the same bytes the eventual response will contain inside
|
|
61
|
+
* `result.rows`.
|
|
62
|
+
*/
|
|
63
|
+
maxBytes: number;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Drain `runSQLStream` into a `MalloyQueryData`-shaped buffer,
|
|
68
|
+
* enforcing both a row cap and a byte cap. Aborts the underlying
|
|
69
|
+
* iterator via an internal `AbortController` the moment either cap
|
|
70
|
+
* is breached so the driver stops producing rows immediately
|
|
71
|
+
* (Postgres' `pg-query-stream` and DuckDB's streaming iterator both
|
|
72
|
+
* honor `abortSignal`).
|
|
73
|
+
*
|
|
74
|
+
* The row cap is detected by the `cap + 1` sentinel pattern: the
|
|
75
|
+
* controller has already clamped `runSQLOptions.rowLimit` to
|
|
76
|
+
* `min(callerLimit, cap + 1)`, so receiving `cap + 1` rows
|
|
77
|
+
* unambiguously means the request would have overflowed. This
|
|
78
|
+
* matches the non-streaming path's overflow detection so behavior
|
|
79
|
+
* is identical regardless of which connector served the request.
|
|
80
|
+
*
|
|
81
|
+
* On overflow the helper throws `PayloadTooLargeError` directly —
|
|
82
|
+
* the message includes the relevant env-var name so the operator
|
|
83
|
+
* sees a self-contained tuning hint without having to cross-
|
|
84
|
+
* reference the controller.
|
|
85
|
+
*/
|
|
86
|
+
export async function streamSqlWithBudget(
|
|
87
|
+
connection: StreamingConnection,
|
|
88
|
+
sql: string,
|
|
89
|
+
runSQLOptions: RunSQLOptions,
|
|
90
|
+
budget: StreamBudget,
|
|
91
|
+
): Promise<MalloyQueryData> {
|
|
92
|
+
const { maxRows, maxBytes } = budget;
|
|
93
|
+
const capAc = new AbortController();
|
|
94
|
+
const rows: QueryRecord[] = [];
|
|
95
|
+
let byteTotal = 0;
|
|
96
|
+
let overflowMessage: string | undefined;
|
|
97
|
+
|
|
98
|
+
// Compose two abort sources so a caller-supplied signal (the
|
|
99
|
+
// publisher's query timeout) and the internal cap-abort signal
|
|
100
|
+
// *both* cancel the underlying iterator:
|
|
101
|
+
//
|
|
102
|
+
// - If the caller's signal fires first, the controller's
|
|
103
|
+
// `runWithQueryTimeout` will throw `QueryTimeoutError` → 504.
|
|
104
|
+
// - If the cap-abort fires first, we throw
|
|
105
|
+
// `PayloadTooLargeError` → 413.
|
|
106
|
+
//
|
|
107
|
+
// Without composition the streaming branch would silently drop
|
|
108
|
+
// the caller's signal — historically the only way to abort here
|
|
109
|
+
// was the cap, so the legacy controller cleared
|
|
110
|
+
// `runSQLOptions.abortSignal`. Step 5 reverses that: the caller's
|
|
111
|
+
// signal is now authoritative.
|
|
112
|
+
//
|
|
113
|
+
// `AbortSignal.any` is widely available (Node 20+); guard with a
|
|
114
|
+
// typeof check so a stale runtime falls back to the legacy
|
|
115
|
+
// cap-only behavior instead of crashing at module load.
|
|
116
|
+
const externalSignal = runSQLOptions.abortSignal;
|
|
117
|
+
const composedSignal: AbortSignal =
|
|
118
|
+
externalSignal && typeof AbortSignal.any === "function"
|
|
119
|
+
? AbortSignal.any([externalSignal, capAc.signal])
|
|
120
|
+
: capAc.signal;
|
|
121
|
+
|
|
122
|
+
try {
|
|
123
|
+
for await (const row of connection.runSQLStream(sql, {
|
|
124
|
+
...runSQLOptions,
|
|
125
|
+
abortSignal: composedSignal,
|
|
126
|
+
})) {
|
|
127
|
+
rows.push(row);
|
|
128
|
+
if (maxBytes > 0) {
|
|
129
|
+
// Measure exactly what the eventual response body will
|
|
130
|
+
// contain for this row. O(rowSize) per row, duplicated
|
|
131
|
+
// against the final `JSON.stringify(result)` — the
|
|
132
|
+
// early-abort win on overflow dwarfs the bookkeeping
|
|
133
|
+
// cost in the bounded-success case.
|
|
134
|
+
byteTotal += Buffer.byteLength(JSON.stringify(row), "utf8");
|
|
135
|
+
if (byteTotal > maxBytes) {
|
|
136
|
+
recordQueryCapExceeded("bytes", "connection_sql");
|
|
137
|
+
overflowMessage = `Query response exceeded ${maxBytes} bytes (had at least ${byteTotal}). Refine the query (project fewer columns, add a LIMIT, or filter wide values) or raise PUBLISHER_MAX_RESPONSE_BYTES.`;
|
|
138
|
+
capAc.abort();
|
|
139
|
+
break;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
if (maxRows > 0 && rows.length > maxRows) {
|
|
143
|
+
recordQueryCapExceeded("rows", "connection_sql");
|
|
144
|
+
overflowMessage = `Query returned more than ${maxRows} rows. Refine the query (add a LIMIT or more selective WHERE) or raise PUBLISHER_MAX_QUERY_ROWS.`;
|
|
145
|
+
capAc.abort();
|
|
146
|
+
break;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
} catch (err) {
|
|
150
|
+
// `pg-query-stream` surfaces `query.destroy()` (which our
|
|
151
|
+
// abort handler triggers) as a synthetic error in some
|
|
152
|
+
// versions. Swallow it iff we triggered the abort ourselves —
|
|
153
|
+
// otherwise it's a real connection error (or the caller's
|
|
154
|
+
// timeout, which the controller's runWithQueryTimeout will
|
|
155
|
+
// surface as a 504) that the controller must see.
|
|
156
|
+
if (!overflowMessage) throw err;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
if (overflowMessage) throw new PayloadTooLargeError(overflowMessage);
|
|
160
|
+
|
|
161
|
+
return { rows, totalRows: rows.length };
|
|
162
|
+
}
|