npm - @malloy-publisher/server - Versions diffs - 0.0.199 → 0.0.200 - Mend

@malloy-publisher/server 0.0.199 → 0.0.200

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

package/dist/app/api-doc.yaml +76 -111
package/dist/app/assets/{EnvironmentPage-Dpee_Kn6.js → EnvironmentPage-CgKNjySu.js} +1 -1
package/dist/app/assets/HomePage-BPIpMBjW.js +1 -0
package/dist/app/assets/{MainPage-DsVt5QGM.js → MainPage-CAwb8U82.js} +2 -2
package/dist/app/assets/{ModelPage-AwAugZ37.js → ModelPage-C0Uevsw9.js} +1 -1
package/dist/app/assets/{PackagePage-XQ-EWGTC.js → PackagePage-Cu-u9k1g.js} +1 -1
package/dist/app/assets/{RouteError-3Mv8JQw7.js → RouteError-DVwPh2Ql.js} +1 -1
package/dist/app/assets/{WorkbookPage-DHYYpcYc.js → WorkbookPage-DW38R2Zv.js} +1 -1
package/dist/app/assets/{core-DfcpQGVP.es-DQggNOdX.js → core-C0vCMRDQ.es-D_ytHhjS.js} +10 -10
package/dist/app/assets/{index-D1pdwrUW.js → index-BGdcKsFF.js} +1 -1
package/dist/app/assets/{index-BUp81Qdm.js → index-CTx4v4_3.js} +1 -1
package/dist/app/assets/index-DE6d5jEy.js +452 -0
package/dist/app/assets/{index.umd-CQH4LZU8.js → index.umd-C1Mi1uRm.js} +1 -1
package/dist/app/index.html +1 -1
package/dist/package_load_worker.mjs +1 -1
package/dist/server.mjs +1482 -1010
package/package.json +1 -1
package/src/config.spec.ts +246 -0
package/src/config.ts +121 -1
package/src/constants.ts +84 -1
package/src/controller/connection.controller.spec.ts +803 -0
package/src/controller/connection.controller.ts +207 -20
package/src/controller/model.controller.ts +16 -5
package/src/controller/query.controller.ts +20 -7
package/src/controller/watch-mode.controller.ts +11 -2
package/src/errors.spec.ts +44 -0
package/src/errors.ts +34 -0
package/src/heap_check.spec.ts +144 -0
package/src/heap_check.ts +144 -0
package/src/mcp/handler_utils.ts +14 -0
package/src/mcp/tools/execute_query_tool.ts +44 -14
package/src/oom_guards.integration.spec.ts +261 -0
package/src/path_safety.ts +9 -3
package/src/query_cap_metrics.spec.ts +89 -0
package/src/query_cap_metrics.ts +115 -0
package/src/query_concurrency.spec.ts +247 -0
package/src/query_concurrency.ts +236 -0
package/src/query_timeout.spec.ts +224 -0
package/src/query_timeout.ts +178 -0
package/src/server-old.ts +20 -0
package/src/server.ts +25 -47
package/src/service/connection.ts +8 -2
package/src/service/environment.ts +82 -2
package/src/service/environment_admission.spec.ts +165 -1
package/src/service/environment_store.spec.ts +103 -0
package/src/service/environment_store.ts +74 -23
package/src/service/model.spec.ts +193 -3
package/src/service/model.ts +80 -12
package/src/service/model_limits.spec.ts +181 -0
package/src/service/model_limits.ts +110 -0
package/src/service/package.spec.ts +2 -6
package/src/service/package.ts +6 -1
package/src/service/path_injection.spec.ts +39 -0
package/src/stream_helpers.spec.ts +280 -0
package/src/stream_helpers.ts +162 -0
package/src/test_helpers/metrics_harness.ts +126 -0
package/dist/app/assets/HomePage-DLRWTNoL.js +0 -1
package/dist/app/assets/index-Dv5bF4Ii.js +0 -451

package/src/query_cap_metrics.ts ADDED Viewed

@@ -0,0 +1,115 @@
+/**
+ * Centralized telemetry for row-cap / byte-cap rejections (HTTP 413).
+ *
+ * Without this, operators tuning {@link PUBLISHER_MAX_QUERY_ROWS} /
+ * {@link PUBLISHER_MAX_RESPONSE_BYTES} can only see undifferentiated
+ * `http_server_requests_total{status_code="413"}` — they can't tell
+ * which cap is firing or which query surface is hottest. The counter
+ * here carries `cap_type` (`rows` / `bytes`) and `source`
+ * (`connection_sql` / `model_query` / `notebook_cell`) so a single
+ * dashboard panel can answer "what should I tune and on which
+ * endpoint?".
+ *
+ * Observable gauges expose the current effective caps so dashboards
+ * can render `actual_rows_returned / max_rows` utilization without
+ * a separate config feed — same pattern the memory governor uses
+ * for high/low water bytes and the concurrency middleware uses for
+ * its slot cap.
+ *
+ * Lazy init for the same reason as `query_timeout.ts` /
+ * `query_concurrency.ts`: instruments created before
+ * `setGlobalMeterProvider` bind to a NoOp meter
+ * (https://github.com/open-telemetry/opentelemetry-js/issues/3505).
+ * The first throw site initializes the counter; the gauges are
+ * installed alongside on the same call so the production hot path
+ * is one boolean check after the first 413.
+ */
+import { metrics, type Counter } from "@opentelemetry/api";
+import { getMaxQueryRows, getMaxResponseBytes } from "./config";
+export type QueryCapType = "rows" | "bytes";
+export type QueryCapSource = "connection_sql" | "model_query" | "notebook_cell";
+let capExceededCounter: Counter | null = null;
+let configGaugesInstalled = false;
+function ensureCapTelemetry(): Counter {
+   if (capExceededCounter && configGaugesInstalled) {
+      return capExceededCounter;
+   }
+   const meter = metrics.getMeter("publisher");
+   if (!capExceededCounter) {
+      capExceededCounter = meter.createCounter(
+         "publisher_query_cap_exceeded_total",
+         {
+            description:
+               "Queries rejected with 413 because the row or byte cap was exceeded. Labels: cap_type ('rows'|'bytes'), source ('connection_sql'|'model_query'|'notebook_cell').",
+         },
+      );
+   }
+   if (!configGaugesInstalled) {
+      // Live config readouts so dashboards can render
+      // "actual / max" utilization for the row and byte caps the
+      // same way `publisher_memory_*_bytes` does for the governor.
+      // Read on every scrape so a runtime env-var change is
+      // visible without a restart; an env-var parse failure
+      // reports -1 so misconfig is visible rather than silently
+      // dropped (mirrors `publisher_query_timeout_ms`).
+      meter
+         .createObservableGauge("publisher_max_query_rows", {
+            description:
+               "Current effective PUBLISHER_MAX_QUERY_ROWS cap (0 = disabled, -1 = misconfigured)",
+         })
+         .addCallback((observation) => {
+            try {
+               observation.observe(getMaxQueryRows());
+            } catch {
+               observation.observe(-1);
+            }
+         });
+      meter
+         .createObservableGauge("publisher_max_response_bytes", {
+            description:
+               "Current effective PUBLISHER_MAX_RESPONSE_BYTES cap (0 = disabled, -1 = misconfigured)",
+            unit: "By",
+         })
+         .addCallback((observation) => {
+            try {
+               observation.observe(getMaxResponseBytes());
+            } catch {
+               observation.observe(-1);
+            }
+         });
+      configGaugesInstalled = true;
+   }
+   return capExceededCounter;
+}
+/**
+ * Record a single 413 cap-exceeded event. Call BEFORE throwing
+ * `PayloadTooLargeError` so the metric ticks even if a downstream
+ * `catch` swallows the error (MCP tools surface failures as content
+ * payloads rather than letting them bubble to the HTTP error
+ * mapper).
+ *
+ * `cap_type` must be one of `rows` / `bytes`; `source` identifies
+ * the query surface that detected the overflow.
+ */
+export function recordQueryCapExceeded(
+   capType: QueryCapType,
+   source: QueryCapSource,
+): void {
+   ensureCapTelemetry().add(1, { cap_type: capType, source });
+}
+/**
+ * Visible for tests. Drops the cached instruments so a fresh
+ * `MeterProvider` (installed via `startMetricsHarness`) can capture
+ * future emissions. Do NOT call from production code.
+ */
+export function resetQueryCapTelemetryForTesting(): void {
+   capExceededCounter = null;
+   configGaugesInstalled = false;
+}

package/src/query_concurrency.spec.ts ADDED Viewed

@@ -0,0 +1,247 @@
+import { afterEach, beforeEach, describe, expect, it } from "bun:test";
+import { EventEmitter } from "events";
+import type { NextFunction, Request, Response } from "express";
+import { ServiceUnavailableError } from "./errors";
+import {
+   getActiveQueryCount,
+   queryConcurrencyMiddleware,
+   resetActiveQueryCountForTesting,
+   resetQueryConcurrencyTelemetryForTesting,
+} from "./query_concurrency";
+import {
+   startMetricsHarness,
+   type MetricsHarness,
+} from "./test_helpers/metrics_harness";
+function makeReq(path = "/api/v0/test"): Request {
+   return { path } as unknown as Request;
+}
+/**
+ * Minimal Response stub: just needs `on` to capture the release
+ * listeners and `emit` for tests to fire them. Wraps an
+ * EventEmitter so the on/emit semantics match real Express
+ * responses (multiple listeners, listener order, etc.).
+ */
+function makeRes(): Response & {
+   fireFinish: () => void;
+   fireClose: () => void;
+} {
+   const ee = new EventEmitter();
+   const res = ee as unknown as Response & {
+      fireFinish: () => void;
+      fireClose: () => void;
+   };
+   res.fireFinish = (): void => {
+      ee.emit("finish");
+   };
+   res.fireClose = (): void => {
+      ee.emit("close");
+   };
+   return res;
+}
+function callMiddleware(
+   req: Request,
+   res: Response,
+): { next: NextFunction; error: { value: unknown } } {
+   const errorBox: { value: unknown } = { value: undefined };
+   const next: NextFunction = (err) => {
+      errorBox.value = err;
+   };
+   queryConcurrencyMiddleware(req, res, next);
+   return { next, error: errorBox };
+}
+describe("queryConcurrencyMiddleware", () => {
+   beforeEach(() => {
+      // Belt-and-suspenders: every test starts from a clean gauge.
+      resetActiveQueryCountForTesting();
+      delete process.env.PUBLISHER_MAX_CONCURRENT_QUERIES;
+   });
+   afterEach(() => {
+      delete process.env.PUBLISHER_MAX_CONCURRENT_QUERIES;
+      resetActiveQueryCountForTesting();
+   });
+   it("passes through when the limit is 0 (opt-out)", () => {
+      process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "0";
+      const res = makeRes();
+      const { error } = callMiddleware(makeReq(), res);
+      expect(error.value).toBeUndefined();
+      // Crucially: the counter stays at zero so opt-out really is
+      // opt-out — not "still tracks, just never rejects".
+      expect(getActiveQueryCount()).toBe(0);
+   });
+   it("admits the first request under the cap and increments the gauge", () => {
+      process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "2";
+      const { error } = callMiddleware(makeReq(), makeRes());
+      expect(error.value).toBeUndefined();
+      expect(getActiveQueryCount()).toBe(1);
+   });
+   it("rejects the (cap+1)-th in-flight request with ServiceUnavailableError", () => {
+      process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "2";
+      callMiddleware(makeReq(), makeRes());
+      callMiddleware(makeReq(), makeRes());
+      // Two in flight; the third must be turned away.
+      const { error } = callMiddleware(makeReq(), makeRes());
+      expect(error.value).toBeInstanceOf(ServiceUnavailableError);
+      expect((error.value as Error).message).toContain(
+         "PUBLISHER_MAX_CONCURRENT_QUERIES",
+      );
+      // Gauge is unchanged by the rejection (we never claimed the
+      // slot), so subsequent legitimate completions don't go
+      // negative.
+      expect(getActiveQueryCount()).toBe(2);
+   });
+   it("decrements on response 'finish' (normal completion)", () => {
+      process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "2";
+      const res = makeRes();
+      callMiddleware(makeReq(), res);
+      expect(getActiveQueryCount()).toBe(1);
+      res.fireFinish();
+      expect(getActiveQueryCount()).toBe(0);
+   });
+   it("decrements on response 'close' (client disconnect)", () => {
+      // A client tearing down the socket before the response
+      // finishes is the failure case that, without 'close'
+      // handling, would leak slots until the process restart.
+      process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "2";
+      const res = makeRes();
+      callMiddleware(makeReq(), res);
+      res.fireClose();
+      expect(getActiveQueryCount()).toBe(0);
+   });
+   it("decrements only once even when both 'finish' and 'close' fire", () => {
+      // Express + Node fire both events in some versions when a
+      // long-poll response wraps up just as the client disconnects.
+      // The release must be idempotent or the counter goes negative
+      // and we hand out one extra slot than the operator configured.
+      process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "1";
+      const res = makeRes();
+      callMiddleware(makeReq(), res);
+      res.fireFinish();
+      res.fireClose();
+      expect(getActiveQueryCount()).toBe(0);
+      // A second request after the double-fire must still be
+      // admitted (proving the counter didn't underflow into a
+      // permanently-rejecting state).
+      const second = makeRes();
+      const { error } = callMiddleware(makeReq(), second);
+      expect(error.value).toBeUndefined();
+      expect(getActiveQueryCount()).toBe(1);
+   });
+   it("re-admits after an in-flight request completes (gauge rolls forward)", () => {
+      process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "1";
+      const firstRes = makeRes();
+      callMiddleware(makeReq(), firstRes);
+      // Second request is over the cap and rejected.
+      const second = callMiddleware(makeReq(), makeRes());
+      expect(second.error.value).toBeInstanceOf(ServiceUnavailableError);
+      // First request finishes; the next call should now be
+      // admitted — proving the cap is not a one-shot fuse.
+      firstRes.fireFinish();
+      const third = callMiddleware(makeReq(), makeRes());
+      expect(third.error.value).toBeUndefined();
+      expect(getActiveQueryCount()).toBe(1);
+   });
+   it("reads the env var on every call so the limit can change without restart", () => {
+      // Operators can adjust PUBLISHER_MAX_CONCURRENT_QUERIES at
+      // runtime (e.g. via a config-reload SIGHUP wired elsewhere).
+      // The middleware must respect the new value on the next
+      // request, not cache the original module-load value.
+      process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "1";
+      callMiddleware(makeReq(), makeRes());
+      // At the cap.
+      const denied = callMiddleware(makeReq(), makeRes());
+      expect(denied.error.value).toBeInstanceOf(ServiceUnavailableError);
+      // Operator bumps the cap; the next request is admitted.
+      process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "3";
+      const admitted = callMiddleware(makeReq(), makeRes());
+      expect(admitted.error.value).toBeUndefined();
+      expect(getActiveQueryCount()).toBe(2);
+   });
+   describe("telemetry", () => {
+      let harness: MetricsHarness;
+      beforeEach(async () => {
+         harness = await startMetricsHarness();
+         resetActiveQueryCountForTesting();
+         resetQueryConcurrencyTelemetryForTesting();
+      });
+      afterEach(async () => {
+         delete process.env.PUBLISHER_MAX_CONCURRENT_QUERIES;
+         resetActiveQueryCountForTesting();
+         resetQueryConcurrencyTelemetryForTesting();
+         await harness.shutdown();
+      });
+      it("publisher_query_concurrency_rejections_total ticks on each 503", async () => {
+         process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "1";
+         callMiddleware(makeReq("/api/v0/test"), makeRes());
+         // At the cap; this one must be rejected.
+         const denied = callMiddleware(makeReq("/api/v0/test"), makeRes());
+         expect(denied.error.value).toBeInstanceOf(ServiceUnavailableError);
+         expect(
+            await harness.collectCounter(
+               "publisher_query_concurrency_rejections_total",
+            ),
+         ).toBe(1);
+      });
+      it("publisher_query_active_slots gauge reflects the live in-flight count", async () => {
+         process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "5";
+         callMiddleware(makeReq(), makeRes());
+         callMiddleware(makeReq(), makeRes());
+         // The middleware's lazy telemetry init runs on every
+         // request, so by now the gauge callback should be
+         // attached and the next scrape reads 2.
+         expect(
+            await harness.collectGauge("publisher_query_active_slots"),
+         ).toBe(2);
+      });
+      it("publisher_query_active_slots gauge follows releases (decrements on res.finish)", async () => {
+         process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "5";
+         const res = makeRes();
+         callMiddleware(makeReq(), res);
+         expect(
+            await harness.collectGauge("publisher_query_active_slots"),
+         ).toBe(1);
+         res.fireFinish();
+         // A scrape after release must reflect the new value;
+         // otherwise an operator can't tell "leaking slot" from
+         // "real load".
+         expect(
+            await harness.collectGauge("publisher_query_active_slots"),
+         ).toBe(0);
+      });
+      it("publisher_query_max_slots gauge reports the current cap", async () => {
+         process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "17";
+         callMiddleware(makeReq(), makeRes());
+         expect(await harness.collectGauge("publisher_query_max_slots")).toBe(
+            17,
+         );
+      });
+      it("publisher_query_max_slots gauge reports 0 when concurrency is opted out", async () => {
+         process.env.PUBLISHER_MAX_CONCURRENT_QUERIES = "0";
+         callMiddleware(makeReq(), makeRes());
+         expect(await harness.collectGauge("publisher_query_max_slots")).toBe(
+            0,
+         );
+      });
+   });
+});

package/src/query_concurrency.ts ADDED Viewed

@@ -0,0 +1,236 @@
+import { metrics, type Counter } from "@opentelemetry/api";
+import type { NextFunction, Request, RequestHandler, Response } from "express";
+import { getMaxConcurrentQueries } from "./config";
+import { ServiceUnavailableError } from "./errors";
+import { logger } from "./logger";
+/**
+ * Process-wide query gauge — the only state owned by this module.
+ * Incremented when a gated handler accepts a request; decremented
+ * exactly once when the response finishes (whether the handler
+ * succeeded, errored, or the client disconnected mid-flight).
+ *
+ * Module-scoped so all gated routes share one slot pool: the
+ * concurrency cap bounds aggregate query load, not per-route load.
+ */
+let active = 0;
+/**
+ * OpenTelemetry instruments. Lazy-initialized on first use so unit
+ * tests that install a real MeterProvider AFTER the module is
+ * imported still receive their data — OTel JS's `ProxyMeter`
+ * binds counters created before `setGlobalMeterProvider` to NoOp
+ * instruments. Module-scoped state so all gated routes share one
+ * counter / one slot pool.
+ */
+let queryConcurrencyRejectionsCounter: Counter | null = null;
+let concurrencyTelemetryInitialized = false;
+function ensureConcurrencyTelemetry(): Counter {
+   if (queryConcurrencyRejectionsCounter && concurrencyTelemetryInitialized) {
+      return queryConcurrencyRejectionsCounter;
+   }
+   const meter = metrics.getMeter("publisher");
+   queryConcurrencyRejectionsCounter = meter.createCounter(
+      "publisher_query_concurrency_rejections_total",
+      {
+         description:
+            "Queries rejected with 503 because the per-pod PUBLISHER_MAX_CONCURRENT_QUERIES cap was reached",
+      },
+   );
+   if (!concurrencyTelemetryInitialized) {
+      // Live count of in-flight queries holding a slot. Observable
+      // gauge so a scrape always sees the current value, not the
+      // value at the last admission/release event.
+      meter
+         .createObservableGauge("publisher_query_active_slots", {
+            description:
+               "In-flight queries currently holding a per-pod concurrency slot",
+         })
+         .addCallback((observation) => observation.observe(active));
+      // Configured cap, exposed so dashboards can render
+      // utilization (`active / max`) without needing a separate
+      // config feed. Read on every scrape so a runtime env-var
+      // change is reflected immediately; the parse is cheap. A
+      // `0` cap means "opt-out" (the middleware is a pass-through)
+      // and is reported verbatim.
+      meter
+         .createObservableGauge("publisher_query_max_slots", {
+            description:
+               "Current effective PUBLISHER_MAX_CONCURRENT_QUERIES (0 = disabled)",
+         })
+         .addCallback((observation) => {
+            try {
+               observation.observe(getMaxConcurrentQueries());
+            } catch {
+               // A misconfigured env var should fail the next
+               // request that observes it, not the metric scrape.
+               // Surface as -1 so the misconfig is visible.
+               observation.observe(-1);
+            }
+         });
+      concurrencyTelemetryInitialized = true;
+   }
+   return queryConcurrencyRejectionsCounter;
+}
+/**
+ * Visible for tests. Drops the cached instruments so a fresh
+ * MeterProvider can capture them on the next request. Do NOT
+ * call from production code.
+ */
+export function resetQueryConcurrencyTelemetryForTesting(): void {
+   queryConcurrencyRejectionsCounter = null;
+   concurrencyTelemetryInitialized = false;
+}
+/**
+ * Visible for tests / metrics. Don't mutate from outside.
+ */
+export function getActiveQueryCount(): number {
+   return active;
+}
+/**
+ * Visible for tests so a unit test that crashes mid-handler can
+ * reset between cases without spinning a fresh module loader.
+ */
+export function resetActiveQueryCountForTesting(): void {
+   active = 0;
+}
+/**
+ * Express middleware that bounds the number of concurrently
+ * in-flight query requests per pod.
+ *
+ * Defense-in-depth on top of the per-request caps from Steps 1–5:
+ *   - Row/byte caps bound a single response.
+ *   - Memory governor (Step 4) sheds load when RSS crosses the
+ *     high-water mark.
+ *   - Query timeout (Step 5) prevents one query from monopolising a
+ *     slot indefinitely.
+ * This middleware caps the *number of slots in flight* at any one
+ * moment so a burst of well-behaved but expensive queries can't all
+ * land simultaneously and stampede aggregate memory.
+ *
+ * Behavior:
+ *   - When `active >= limit`, the request is rejected with HTTP 503
+ *     and the response body identifies the cap so an operator's
+ *     grep finds the rationale immediately.
+ *   - When admitted, a single-shot decrement is registered on both
+ *     `finish` (normal completion) and `close` (client disconnect).
+ *     The handler must release exactly once even if both events
+ *     fire.
+ *   - `limit === 0` opts out (the middleware becomes a pass-through);
+ *     `getMaxConcurrentQueries()` is read per-request so config
+ *     changes propagate without a server restart. The per-request
+ *     read is a single env-var parse — cheap.
+ *
+ * Failure-mode notes:
+ *   - If the response never emits `finish`/`close` for some reason
+ *     (e.g. a runtime crash that bypasses Express' normal
+ *     teardown), the slot leaks until process restart. This is the
+ *     same failure mode as any active-request counter; in practice
+ *     `close` always fires on socket teardown.
+ *   - We do NOT queue. A backed-up queue would hide load and inflate
+ *     p99 latency; failing fast lets the upstream LB retry against
+ *     a less-loaded pod.
+ */
+/**
+ * Handle on an acquired concurrency slot. The caller MUST invoke
+ * `release()` exactly once when the work is done (success, error,
+ * or cancellation). `release()` is idempotent — calling it twice
+ * is a no-op rather than a double-decrement, so wrappers that
+ * register both `finish` and `close` listeners stay safe.
+ */
+export interface QuerySlotHandle {
+   release: () => void;
+}
+/**
+ * Synchronous slot acquisition shared by the HTTP middleware and
+ * the MCP `executeQuery` tool. Throws {@link ServiceUnavailableError}
+ * (which controllers map to HTTP 503) when the pod is at its cap;
+ * returns a {@link QuerySlotHandle} on success. The `routeLabel`
+ * argument is used only for the rejection counter
+ * (`publisher_query_concurrency_rejections_total`) so dashboards
+ * can identify the hottest surface — keep its cardinality bounded
+ * (Express route patterns, fixed strings like `mcp:executeQuery`).
+ *
+ * Production callers should prefer {@link queryConcurrencyMiddleware}
+ * on HTTP routes (it wires the release to `res.finish`/`close`
+ * automatically). Direct callers (MCP) take responsibility for
+ * release in their own try/finally.
+ */
+export function tryAcquireQuerySlot(routeLabel: string): QuerySlotHandle {
+   // Lazy-init runs on every call so the active/max gauges show up
+   // even on pods where the cap is never reached.
+   ensureConcurrencyTelemetry();
+   const limit = getMaxConcurrentQueries();
+   if (limit <= 0) {
+      // Opt-out: no slot bookkeeping. Useful for OSS deployments
+      // that already have an upstream concurrency bound.
+      return { release: () => undefined };
+   }
+   if (active >= limit) {
+      ensureConcurrencyTelemetry().add(1, {
+         "http.route": routeLabel,
+         limit,
+      });
+      logger.warn(
+         `Rejecting query: ${active}/${limit} slots in use (PUBLISHER_MAX_CONCURRENT_QUERIES).`,
+         { route: routeLabel },
+      );
+      throw new ServiceUnavailableError(
+         `Publisher pod is at its maximum concurrent query cap (${limit}). Retry after in-flight queries complete, or raise PUBLISHER_MAX_CONCURRENT_QUERIES.`,
+      );
+   }
+   active += 1;
+   let released = false;
+   return {
+      release: () => {
+         if (released) return;
+         released = true;
+         active = Math.max(0, active - 1);
+      },
+   };
+}
+export function queryConcurrencyMiddleware(
+   req: Request,
+   res: Response,
+   next: NextFunction,
+): void {
+   let handle: QuerySlotHandle;
+   try {
+      // `req.route?.path` gives the Express-registered pattern
+      // (e.g. `/api/v0/environments/:environmentName/.../sqlQuery`)
+      // rather than the concrete URL, keeping label cardinality
+      // bounded.
+      handle = tryAcquireQuerySlot(req.route?.path ?? req.path);
+   } catch (error) {
+      next(error);
+      return;
+   }
+   // Both events fire on different code paths; we want to release
+   // on whichever comes first and ignore the second:
+   //   - `finish`: normal completion (response fully flushed).
+   //   - `close`: client disconnected before completion (or after,
+   //     in some Express/Node versions; hence the idempotency).
+   res.on("finish", handle.release);
+   res.on("close", handle.release);
+   next();
+}
+/**
+ * Convenience for the route-registration call site: produces a
+ * single middleware reference so registrations stay readable.
+ * Returning a typed `RequestHandler` keeps Express' overloads happy.
+ */
+export function queryConcurrency(): RequestHandler {
+   return queryConcurrencyMiddleware;
+}