@malloy-publisher/server 0.0.199 → 0.0.200

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/dist/app/api-doc.yaml +76 -111
  2. package/dist/app/assets/{EnvironmentPage-Dpee_Kn6.js → EnvironmentPage-CgKNjySu.js} +1 -1
  3. package/dist/app/assets/HomePage-BPIpMBjW.js +1 -0
  4. package/dist/app/assets/{MainPage-DsVt5QGM.js → MainPage-CAwb8U82.js} +2 -2
  5. package/dist/app/assets/{ModelPage-AwAugZ37.js → ModelPage-C0Uevsw9.js} +1 -1
  6. package/dist/app/assets/{PackagePage-XQ-EWGTC.js → PackagePage-Cu-u9k1g.js} +1 -1
  7. package/dist/app/assets/{RouteError-3Mv8JQw7.js → RouteError-DVwPh2Ql.js} +1 -1
  8. package/dist/app/assets/{WorkbookPage-DHYYpcYc.js → WorkbookPage-DW38R2Zv.js} +1 -1
  9. package/dist/app/assets/{core-DfcpQGVP.es-DQggNOdX.js → core-C0vCMRDQ.es-D_ytHhjS.js} +10 -10
  10. package/dist/app/assets/{index-D1pdwrUW.js → index-BGdcKsFF.js} +1 -1
  11. package/dist/app/assets/{index-BUp81Qdm.js → index-CTx4v4_3.js} +1 -1
  12. package/dist/app/assets/index-DE6d5jEy.js +452 -0
  13. package/dist/app/assets/{index.umd-CQH4LZU8.js → index.umd-C1Mi1uRm.js} +1 -1
  14. package/dist/app/index.html +1 -1
  15. package/dist/package_load_worker.mjs +1 -1
  16. package/dist/server.mjs +1482 -1010
  17. package/package.json +1 -1
  18. package/src/config.spec.ts +246 -0
  19. package/src/config.ts +121 -1
  20. package/src/constants.ts +84 -1
  21. package/src/controller/connection.controller.spec.ts +803 -0
  22. package/src/controller/connection.controller.ts +207 -20
  23. package/src/controller/model.controller.ts +16 -5
  24. package/src/controller/query.controller.ts +20 -7
  25. package/src/controller/watch-mode.controller.ts +11 -2
  26. package/src/errors.spec.ts +44 -0
  27. package/src/errors.ts +34 -0
  28. package/src/heap_check.spec.ts +144 -0
  29. package/src/heap_check.ts +144 -0
  30. package/src/mcp/handler_utils.ts +14 -0
  31. package/src/mcp/tools/execute_query_tool.ts +44 -14
  32. package/src/oom_guards.integration.spec.ts +261 -0
  33. package/src/path_safety.ts +9 -3
  34. package/src/query_cap_metrics.spec.ts +89 -0
  35. package/src/query_cap_metrics.ts +115 -0
  36. package/src/query_concurrency.spec.ts +247 -0
  37. package/src/query_concurrency.ts +236 -0
  38. package/src/query_timeout.spec.ts +224 -0
  39. package/src/query_timeout.ts +178 -0
  40. package/src/server-old.ts +20 -0
  41. package/src/server.ts +25 -47
  42. package/src/service/connection.ts +8 -2
  43. package/src/service/environment.ts +82 -2
  44. package/src/service/environment_admission.spec.ts +165 -1
  45. package/src/service/environment_store.spec.ts +103 -0
  46. package/src/service/environment_store.ts +74 -23
  47. package/src/service/model.spec.ts +193 -3
  48. package/src/service/model.ts +80 -12
  49. package/src/service/model_limits.spec.ts +181 -0
  50. package/src/service/model_limits.ts +110 -0
  51. package/src/service/package.spec.ts +2 -6
  52. package/src/service/package.ts +6 -1
  53. package/src/service/path_injection.spec.ts +39 -0
  54. package/src/stream_helpers.spec.ts +280 -0
  55. package/src/stream_helpers.ts +162 -0
  56. package/src/test_helpers/metrics_harness.ts +126 -0
  57. package/dist/app/assets/HomePage-DLRWTNoL.js +0 -1
  58. package/dist/app/assets/index-Dv5bF4Ii.js +0 -451
@@ -0,0 +1,115 @@
1
+ /**
2
+ * Centralized telemetry for row-cap / byte-cap rejections (HTTP 413).
3
+ *
4
+ * Without this, operators tuning {@link PUBLISHER_MAX_QUERY_ROWS} /
5
+ * {@link PUBLISHER_MAX_RESPONSE_BYTES} can only see undifferentiated
6
+ * `http_server_requests_total{status_code="413"}` — they can't tell
7
+ * which cap is firing or which query surface is hottest. The counter
8
+ * here carries `cap_type` (`rows` / `bytes`) and `source`
9
+ * (`connection_sql` / `model_query` / `notebook_cell`) so a single
10
+ * dashboard panel can answer "what should I tune and on which
11
+ * endpoint?".
12
+ *
13
+ * Observable gauges expose the current effective caps so dashboards
14
+ * can render `actual_rows_returned / max_rows` utilization without
15
+ * a separate config feed — same pattern the memory governor uses
16
+ * for high/low water bytes and the concurrency middleware uses for
17
+ * its slot cap.
18
+ *
19
+ * Lazy init for the same reason as `query_timeout.ts` /
20
+ * `query_concurrency.ts`: instruments created before
21
+ * `setGlobalMeterProvider` bind to a NoOp meter
22
+ * (https://github.com/open-telemetry/opentelemetry-js/issues/3505).
23
+ * The first throw site initializes the counter; the gauges are
24
+ * installed alongside on the same call so the production hot path
25
+ * is one boolean check after the first 413.
26
+ */
27
+
28
+ import { metrics, type Counter } from "@opentelemetry/api";
29
+
30
+ import { getMaxQueryRows, getMaxResponseBytes } from "./config";
31
+
32
+ export type QueryCapType = "rows" | "bytes";
33
+ export type QueryCapSource = "connection_sql" | "model_query" | "notebook_cell";
34
+
35
+ let capExceededCounter: Counter | null = null;
36
+ let configGaugesInstalled = false;
37
+
38
+ function ensureCapTelemetry(): Counter {
39
+ if (capExceededCounter && configGaugesInstalled) {
40
+ return capExceededCounter;
41
+ }
42
+ const meter = metrics.getMeter("publisher");
43
+ if (!capExceededCounter) {
44
+ capExceededCounter = meter.createCounter(
45
+ "publisher_query_cap_exceeded_total",
46
+ {
47
+ description:
48
+ "Queries rejected with 413 because the row or byte cap was exceeded. Labels: cap_type ('rows'|'bytes'), source ('connection_sql'|'model_query'|'notebook_cell').",
49
+ },
50
+ );
51
+ }
52
+ if (!configGaugesInstalled) {
53
+ // Live config readouts so dashboards can render
54
+ // "actual / max" utilization for the row and byte caps the
55
+ // same way `publisher_memory_*_bytes` does for the governor.
56
+ // Read on every scrape so a runtime env-var change is
57
+ // visible without a restart; an env-var parse failure
58
+ // reports -1 so misconfig is visible rather than silently
59
+ // dropped (mirrors `publisher_query_timeout_ms`).
60
+ meter
61
+ .createObservableGauge("publisher_max_query_rows", {
62
+ description:
63
+ "Current effective PUBLISHER_MAX_QUERY_ROWS cap (0 = disabled, -1 = misconfigured)",
64
+ })
65
+ .addCallback((observation) => {
66
+ try {
67
+ observation.observe(getMaxQueryRows());
68
+ } catch {
69
+ observation.observe(-1);
70
+ }
71
+ });
72
+ meter
73
+ .createObservableGauge("publisher_max_response_bytes", {
74
+ description:
75
+ "Current effective PUBLISHER_MAX_RESPONSE_BYTES cap (0 = disabled, -1 = misconfigured)",
76
+ unit: "By",
77
+ })
78
+ .addCallback((observation) => {
79
+ try {
80
+ observation.observe(getMaxResponseBytes());
81
+ } catch {
82
+ observation.observe(-1);
83
+ }
84
+ });
85
+ configGaugesInstalled = true;
86
+ }
87
+ return capExceededCounter;
88
+ }
89
+
90
+ /**
91
+ * Record a single 413 cap-exceeded event. Call BEFORE throwing
92
+ * `PayloadTooLargeError` so the metric ticks even if a downstream
93
+ * `catch` swallows the error (MCP tools surface failures as content
94
+ * payloads rather than letting them bubble to the HTTP error
95
+ * mapper).
96
+ *
97
+ * `cap_type` must be one of `rows` / `bytes`; `source` identifies
98
+ * the query surface that detected the overflow.
99
+ */
100
+ export function recordQueryCapExceeded(
101
+ capType: QueryCapType,
102
+ source: QueryCapSource,
103
+ ): void {
104
+ ensureCapTelemetry().add(1, { cap_type: capType, source });
105
+ }
106
+
107
+ /**
108
+ * Visible for tests. Drops the cached instruments so a fresh
109
+ * `MeterProvider` (installed via `startMetricsHarness`) can capture
110
+ * future emissions. Do NOT call from production code.
111
+ */
112
+ export function resetQueryCapTelemetryForTesting(): void {
113
+ capExceededCounter = null;
114
+ configGaugesInstalled = false;
115
+ }
@@ -0,0 +1,247 @@
1
+ import { afterEach, beforeEach, describe, expect, it } from "bun:test";
2
+ import { EventEmitter } from "events";
3
+ import type { NextFunction, Request, Response } from "express";
4
+
5
+ import { ServiceUnavailableError } from "./errors";
6
+ import {
7
+ getActiveQueryCount,
8
+ queryConcurrencyMiddleware,
9
+ resetActiveQueryCountForTesting,
10
+ resetQueryConcurrencyTelemetryForTesting,
11
+ } from "./query_concurrency";
12
+ import {
13
+ startMetricsHarness,
14
+ type MetricsHarness,
15
+ } from "./test_helpers/metrics_harness";
16
+
17
+ function makeReq(path = "/api/v0/test"): Request {
18
+ return { path } as unknown as Request;
19
+ }
20
+
21
+ /**
22
+ * Minimal Response stub: just needs `on` to capture the release
23
+ * listeners and `emit` for tests to fire them. Wraps an
24
+ * EventEmitter so the on/emit semantics match real Express
25
+ * responses (multiple listeners, listener order, etc.).
26
+ */
27
+ function makeRes(): Response & {
28
+ fireFinish: () => void;
29
+ fireClose: () => void;
30
+ } {
31
+ const ee = new EventEmitter();
32
+ const res = ee as unknown as Response & {
33
+ fireFinish: () => void;
34
+ fireClose: () => void;
35
+ };
36
+ res.fireFinish = (): void => {
37
+ ee.emit("finish");
38
+ };
39
+ res.fireClose = (): void => {
40
+ ee.emit("close");
41
+ };
42
+ return res;
43
+ }
44
+
45
+ function callMiddleware(
46
+ req: Request,
47
+ res: Response,
48
+ ): { next: NextFunction; error: { value: unknown } } {
49
+ const errorBox: { value: unknown } = { value: undefined };
50
+ const next: NextFunction = (err) => {
51
+ errorBox.value = err;
52
+ };
53
+ queryConcurrencyMiddleware(req, res, next);
54
+ return { next, error: errorBox };
55
+ }
56
+
57
+ describe("queryConcurrencyMiddleware", () => {
58
+ beforeEach(() => {
59
+ // Belt-and-suspenders: every test starts from a clean gauge.
60
+ resetActiveQueryCountForTesting();
61
+ delete process.env.PUBLISHER_MAX_CONCURRENT_QUERIES;
62
+ });
63
+ afterEach(() => {
64
+ delete process.env.PUBLISHER_MAX_CONCURRENT_QUERIES;
65
+ resetActiveQueryCountForTesting();
66
+ });
67
+
68
+ it("passes through when the limit is 0 (opt-out)", () => {
69
+ process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "0";
70
+ const res = makeRes();
71
+ const { error } = callMiddleware(makeReq(), res);
72
+ expect(error.value).toBeUndefined();
73
+ // Crucially: the counter stays at zero so opt-out really is
74
+ // opt-out — not "still tracks, just never rejects".
75
+ expect(getActiveQueryCount()).toBe(0);
76
+ });
77
+
78
+ it("admits the first request under the cap and increments the gauge", () => {
79
+ process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "2";
80
+ const { error } = callMiddleware(makeReq(), makeRes());
81
+ expect(error.value).toBeUndefined();
82
+ expect(getActiveQueryCount()).toBe(1);
83
+ });
84
+
85
+ it("rejects the (cap+1)-th in-flight request with ServiceUnavailableError", () => {
86
+ process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "2";
87
+ callMiddleware(makeReq(), makeRes());
88
+ callMiddleware(makeReq(), makeRes());
89
+ // Two in flight; the third must be turned away.
90
+ const { error } = callMiddleware(makeReq(), makeRes());
91
+ expect(error.value).toBeInstanceOf(ServiceUnavailableError);
92
+ expect((error.value as Error).message).toContain(
93
+ "PUBLISHER_MAX_CONCURRENT_QUERIES",
94
+ );
95
+ // Gauge is unchanged by the rejection (we never claimed the
96
+ // slot), so subsequent legitimate completions don't go
97
+ // negative.
98
+ expect(getActiveQueryCount()).toBe(2);
99
+ });
100
+
101
+ it("decrements on response 'finish' (normal completion)", () => {
102
+ process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "2";
103
+ const res = makeRes();
104
+ callMiddleware(makeReq(), res);
105
+ expect(getActiveQueryCount()).toBe(1);
106
+ res.fireFinish();
107
+ expect(getActiveQueryCount()).toBe(0);
108
+ });
109
+
110
+ it("decrements on response 'close' (client disconnect)", () => {
111
+ // A client tearing down the socket before the response
112
+ // finishes is the failure case that, without 'close'
113
+ // handling, would leak slots until the process restart.
114
+ process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "2";
115
+ const res = makeRes();
116
+ callMiddleware(makeReq(), res);
117
+ res.fireClose();
118
+ expect(getActiveQueryCount()).toBe(0);
119
+ });
120
+
121
+ it("decrements only once even when both 'finish' and 'close' fire", () => {
122
+ // Express + Node fire both events in some versions when a
123
+ // long-poll response wraps up just as the client disconnects.
124
+ // The release must be idempotent or the counter goes negative
125
+ // and we hand out one extra slot than the operator configured.
126
+ process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "1";
127
+ const res = makeRes();
128
+ callMiddleware(makeReq(), res);
129
+ res.fireFinish();
130
+ res.fireClose();
131
+ expect(getActiveQueryCount()).toBe(0);
132
+
133
+ // A second request after the double-fire must still be
134
+ // admitted (proving the counter didn't underflow into a
135
+ // permanently-rejecting state).
136
+ const second = makeRes();
137
+ const { error } = callMiddleware(makeReq(), second);
138
+ expect(error.value).toBeUndefined();
139
+ expect(getActiveQueryCount()).toBe(1);
140
+ });
141
+
142
+ it("re-admits after an in-flight request completes (gauge rolls forward)", () => {
143
+ process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "1";
144
+ const firstRes = makeRes();
145
+ callMiddleware(makeReq(), firstRes);
146
+ // Second request is over the cap and rejected.
147
+ const second = callMiddleware(makeReq(), makeRes());
148
+ expect(second.error.value).toBeInstanceOf(ServiceUnavailableError);
149
+
150
+ // First request finishes; the next call should now be
151
+ // admitted — proving the cap is not a one-shot fuse.
152
+ firstRes.fireFinish();
153
+ const third = callMiddleware(makeReq(), makeRes());
154
+ expect(third.error.value).toBeUndefined();
155
+ expect(getActiveQueryCount()).toBe(1);
156
+ });
157
+
158
+ it("reads the env var on every call so the limit can change without restart", () => {
159
+ // Operators can adjust PUBLISHER_MAX_CONCURRENT_QUERIES at
160
+ // runtime (e.g. via a config-reload SIGHUP wired elsewhere).
161
+ // The middleware must respect the new value on the next
162
+ // request, not cache the original module-load value.
163
+ process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "1";
164
+ callMiddleware(makeReq(), makeRes());
165
+ // At the cap.
166
+ const denied = callMiddleware(makeReq(), makeRes());
167
+ expect(denied.error.value).toBeInstanceOf(ServiceUnavailableError);
168
+
169
+ // Operator bumps the cap; the next request is admitted.
170
+ process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "3";
171
+ const admitted = callMiddleware(makeReq(), makeRes());
172
+ expect(admitted.error.value).toBeUndefined();
173
+ expect(getActiveQueryCount()).toBe(2);
174
+ });
175
+
176
+ describe("telemetry", () => {
177
+ let harness: MetricsHarness;
178
+ beforeEach(async () => {
179
+ harness = await startMetricsHarness();
180
+ resetActiveQueryCountForTesting();
181
+ resetQueryConcurrencyTelemetryForTesting();
182
+ });
183
+ afterEach(async () => {
184
+ delete process.env.PUBLISHER_MAX_CONCURRENT_QUERIES;
185
+ resetActiveQueryCountForTesting();
186
+ resetQueryConcurrencyTelemetryForTesting();
187
+ await harness.shutdown();
188
+ });
189
+
190
+ it("publisher_query_concurrency_rejections_total ticks on each 503", async () => {
191
+ process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "1";
192
+ callMiddleware(makeReq("/api/v0/test"), makeRes());
193
+ // At the cap; this one must be rejected.
194
+ const denied = callMiddleware(makeReq("/api/v0/test"), makeRes());
195
+ expect(denied.error.value).toBeInstanceOf(ServiceUnavailableError);
196
+ expect(
197
+ await harness.collectCounter(
198
+ "publisher_query_concurrency_rejections_total",
199
+ ),
200
+ ).toBe(1);
201
+ });
202
+
203
+ it("publisher_query_active_slots gauge reflects the live in-flight count", async () => {
204
+ process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "5";
205
+ callMiddleware(makeReq(), makeRes());
206
+ callMiddleware(makeReq(), makeRes());
207
+ // The middleware's lazy telemetry init runs on every
208
+ // request, so by now the gauge callback should be
209
+ // attached and the next scrape reads 2.
210
+ expect(
211
+ await harness.collectGauge("publisher_query_active_slots"),
212
+ ).toBe(2);
213
+ });
214
+
215
+ it("publisher_query_active_slots gauge follows releases (decrements on res.finish)", async () => {
216
+ process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "5";
217
+ const res = makeRes();
218
+ callMiddleware(makeReq(), res);
219
+ expect(
220
+ await harness.collectGauge("publisher_query_active_slots"),
221
+ ).toBe(1);
222
+ res.fireFinish();
223
+ // A scrape after release must reflect the new value;
224
+ // otherwise an operator can't tell "leaking slot" from
225
+ // "real load".
226
+ expect(
227
+ await harness.collectGauge("publisher_query_active_slots"),
228
+ ).toBe(0);
229
+ });
230
+
231
+ it("publisher_query_max_slots gauge reports the current cap", async () => {
232
+ process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "17";
233
+ callMiddleware(makeReq(), makeRes());
234
+ expect(await harness.collectGauge("publisher_query_max_slots")).toBe(
235
+ 17,
236
+ );
237
+ });
238
+
239
+ it("publisher_query_max_slots gauge reports 0 when concurrency is opted out", async () => {
240
+ process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "0";
241
+ callMiddleware(makeReq(), makeRes());
242
+ expect(await harness.collectGauge("publisher_query_max_slots")).toBe(
243
+ 0,
244
+ );
245
+ });
246
+ });
247
+ });
@@ -0,0 +1,236 @@
1
+ import { metrics, type Counter } from "@opentelemetry/api";
2
+ import type { NextFunction, Request, RequestHandler, Response } from "express";
3
+
4
+ import { getMaxConcurrentQueries } from "./config";
5
+ import { ServiceUnavailableError } from "./errors";
6
+ import { logger } from "./logger";
7
+
8
+ /**
9
+ * Process-wide query gauge — the only state owned by this module.
10
+ * Incremented when a gated handler accepts a request; decremented
11
+ * exactly once when the response finishes (whether the handler
12
+ * succeeded, errored, or the client disconnected mid-flight).
13
+ *
14
+ * Module-scoped so all gated routes share one slot pool: the
15
+ * concurrency cap bounds aggregate query load, not per-route load.
16
+ */
17
+ let active = 0;
18
+
19
+ /**
20
+ * OpenTelemetry instruments. Lazy-initialized on first use so unit
21
+ * tests that install a real MeterProvider AFTER the module is
22
+ * imported still receive their data — OTel JS's `ProxyMeter`
23
+ * binds counters created before `setGlobalMeterProvider` to NoOp
24
+ * instruments. Module-scoped state so all gated routes share one
25
+ * counter / one slot pool.
26
+ */
27
+ let queryConcurrencyRejectionsCounter: Counter | null = null;
28
+ let concurrencyTelemetryInitialized = false;
29
+ function ensureConcurrencyTelemetry(): Counter {
30
+ if (queryConcurrencyRejectionsCounter && concurrencyTelemetryInitialized) {
31
+ return queryConcurrencyRejectionsCounter;
32
+ }
33
+ const meter = metrics.getMeter("publisher");
34
+ queryConcurrencyRejectionsCounter = meter.createCounter(
35
+ "publisher_query_concurrency_rejections_total",
36
+ {
37
+ description:
38
+ "Queries rejected with 503 because the per-pod PUBLISHER_MAX_CONCURRENT_QUERIES cap was reached",
39
+ },
40
+ );
41
+ if (!concurrencyTelemetryInitialized) {
42
+ // Live count of in-flight queries holding a slot. Observable
43
+ // gauge so a scrape always sees the current value, not the
44
+ // value at the last admission/release event.
45
+ meter
46
+ .createObservableGauge("publisher_query_active_slots", {
47
+ description:
48
+ "In-flight queries currently holding a per-pod concurrency slot",
49
+ })
50
+ .addCallback((observation) => observation.observe(active));
51
+
52
+ // Configured cap, exposed so dashboards can render
53
+ // utilization (`active / max`) without needing a separate
54
+ // config feed. Read on every scrape so a runtime env-var
55
+ // change is reflected immediately; the parse is cheap. A
56
+ // `0` cap means "opt-out" (the middleware is a pass-through)
57
+ // and is reported verbatim.
58
+ meter
59
+ .createObservableGauge("publisher_query_max_slots", {
60
+ description:
61
+ "Current effective PUBLISHER_MAX_CONCURRENT_QUERIES (0 = disabled)",
62
+ })
63
+ .addCallback((observation) => {
64
+ try {
65
+ observation.observe(getMaxConcurrentQueries());
66
+ } catch {
67
+ // A misconfigured env var should fail the next
68
+ // request that observes it, not the metric scrape.
69
+ // Surface as -1 so the misconfig is visible.
70
+ observation.observe(-1);
71
+ }
72
+ });
73
+ concurrencyTelemetryInitialized = true;
74
+ }
75
+ return queryConcurrencyRejectionsCounter;
76
+ }
77
+
78
+ /**
79
+ * Visible for tests. Drops the cached instruments so a fresh
80
+ * MeterProvider can capture them on the next request. Do NOT
81
+ * call from production code.
82
+ */
83
+ export function resetQueryConcurrencyTelemetryForTesting(): void {
84
+ queryConcurrencyRejectionsCounter = null;
85
+ concurrencyTelemetryInitialized = false;
86
+ }
87
+
88
+ /**
89
+ * Visible for tests / metrics. Don't mutate from outside.
90
+ */
91
+ export function getActiveQueryCount(): number {
92
+ return active;
93
+ }
94
+
95
+ /**
96
+ * Visible for tests so a unit test that crashes mid-handler can
97
+ * reset between cases without spinning a fresh module loader.
98
+ */
99
+ export function resetActiveQueryCountForTesting(): void {
100
+ active = 0;
101
+ }
102
+
103
+ /**
104
+ * Express middleware that bounds the number of concurrently
105
+ * in-flight query requests per pod.
106
+ *
107
+ * Defense-in-depth on top of the per-request caps from Steps 1–5:
108
+ * - Row/byte caps bound a single response.
109
+ * - Memory governor (Step 4) sheds load when RSS crosses the
110
+ * high-water mark.
111
+ * - Query timeout (Step 5) prevents one query from monopolising a
112
+ * slot indefinitely.
113
+ * This middleware caps the *number of slots in flight* at any one
114
+ * moment so a burst of well-behaved but expensive queries can't all
115
+ * land simultaneously and stampede aggregate memory.
116
+ *
117
+ * Behavior:
118
+ * - When `active >= limit`, the request is rejected with HTTP 503
119
+ * and the response body identifies the cap so an operator's
120
+ * grep finds the rationale immediately.
121
+ * - When admitted, a single-shot decrement is registered on both
122
+ * `finish` (normal completion) and `close` (client disconnect).
123
+ * The handler must release exactly once even if both events
124
+ * fire.
125
+ * - `limit === 0` opts out (the middleware becomes a pass-through);
126
+ * `getMaxConcurrentQueries()` is read per-request so config
127
+ * changes propagate without a server restart. The per-request
128
+ * read is a single env-var parse — cheap.
129
+ *
130
+ * Failure-mode notes:
131
+ * - If the response never emits `finish`/`close` for some reason
132
+ * (e.g. a runtime crash that bypasses Express' normal
133
+ * teardown), the slot leaks until process restart. This is the
134
+ * same failure mode as any active-request counter; in practice
135
+ * `close` always fires on socket teardown.
136
+ * - We do NOT queue. A backed-up queue would hide load and inflate
137
+ * p99 latency; failing fast lets the upstream LB retry against
138
+ * a less-loaded pod.
139
+ */
140
+ /**
141
+ * Handle on an acquired concurrency slot. The caller MUST invoke
142
+ * `release()` exactly once when the work is done (success, error,
143
+ * or cancellation). `release()` is idempotent — calling it twice
144
+ * is a no-op rather than a double-decrement, so wrappers that
145
+ * register both `finish` and `close` listeners stay safe.
146
+ */
147
+ export interface QuerySlotHandle {
148
+ release: () => void;
149
+ }
150
+
151
+ /**
152
+ * Synchronous slot acquisition shared by the HTTP middleware and
153
+ * the MCP `executeQuery` tool. Throws {@link ServiceUnavailableError}
154
+ * (which controllers map to HTTP 503) when the pod is at its cap;
155
+ * returns a {@link QuerySlotHandle} on success. The `routeLabel`
156
+ * argument is used only for the rejection counter
157
+ * (`publisher_query_concurrency_rejections_total`) so dashboards
158
+ * can identify the hottest surface — keep its cardinality bounded
159
+ * (Express route patterns, fixed strings like `mcp:executeQuery`).
160
+ *
161
+ * Production callers should prefer {@link queryConcurrencyMiddleware}
162
+ * on HTTP routes (it wires the release to `res.finish`/`close`
163
+ * automatically). Direct callers (MCP) take responsibility for
164
+ * release in their own try/finally.
165
+ */
166
+ export function tryAcquireQuerySlot(routeLabel: string): QuerySlotHandle {
167
+ // Lazy-init runs on every call so the active/max gauges show up
168
+ // even on pods where the cap is never reached.
169
+ ensureConcurrencyTelemetry();
170
+
171
+ const limit = getMaxConcurrentQueries();
172
+ if (limit <= 0) {
173
+ // Opt-out: no slot bookkeeping. Useful for OSS deployments
174
+ // that already have an upstream concurrency bound.
175
+ return { release: () => undefined };
176
+ }
177
+
178
+ if (active >= limit) {
179
+ ensureConcurrencyTelemetry().add(1, {
180
+ "http.route": routeLabel,
181
+ limit,
182
+ });
183
+ logger.warn(
184
+ `Rejecting query: ${active}/${limit} slots in use (PUBLISHER_MAX_CONCURRENT_QUERIES).`,
185
+ { route: routeLabel },
186
+ );
187
+ throw new ServiceUnavailableError(
188
+ `Publisher pod is at its maximum concurrent query cap (${limit}). Retry after in-flight queries complete, or raise PUBLISHER_MAX_CONCURRENT_QUERIES.`,
189
+ );
190
+ }
191
+
192
+ active += 1;
193
+ let released = false;
194
+ return {
195
+ release: () => {
196
+ if (released) return;
197
+ released = true;
198
+ active = Math.max(0, active - 1);
199
+ },
200
+ };
201
+ }
202
+
203
+ export function queryConcurrencyMiddleware(
204
+ req: Request,
205
+ res: Response,
206
+ next: NextFunction,
207
+ ): void {
208
+ let handle: QuerySlotHandle;
209
+ try {
210
+ // `req.route?.path` gives the Express-registered pattern
211
+ // (e.g. `/api/v0/environments/:environmentName/.../sqlQuery`)
212
+ // rather than the concrete URL, keeping label cardinality
213
+ // bounded.
214
+ handle = tryAcquireQuerySlot(req.route?.path ?? req.path);
215
+ } catch (error) {
216
+ next(error);
217
+ return;
218
+ }
219
+ // Both events fire on different code paths; we want to release
220
+ // on whichever comes first and ignore the second:
221
+ // - `finish`: normal completion (response fully flushed).
222
+ // - `close`: client disconnected before completion (or after,
223
+ // in some Express/Node versions; hence the idempotency).
224
+ res.on("finish", handle.release);
225
+ res.on("close", handle.release);
226
+ next();
227
+ }
228
+
229
+ /**
230
+ * Convenience for the route-registration call site: produces a
231
+ * single middleware reference so registrations stay readable.
232
+ * Returning a typed `RequestHandler` keeps Express' overloads happy.
233
+ */
234
+ export function queryConcurrency(): RequestHandler {
235
+ return queryConcurrencyMiddleware;
236
+ }