@malloy-publisher/server 0.0.198 → 0.0.200

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/build.ts +30 -1
  2. package/dist/app/api-doc.yaml +127 -111
  3. package/dist/app/assets/{EnvironmentPage-C7rtH4mC.js → EnvironmentPage-CgKNjySu.js} +1 -1
  4. package/dist/app/assets/HomePage-BPIpMBjW.js +1 -0
  5. package/dist/app/assets/{MainPage-D38LtZDV.js → MainPage-CAwb8U82.js} +2 -2
  6. package/dist/app/assets/{ModelPage-DOol8Mz7.js → ModelPage-C0Uevsw9.js} +1 -1
  7. package/dist/app/assets/{PackagePage-0tgzA_kO.js → PackagePage-Cu-u9k1g.js} +1 -1
  8. package/dist/app/assets/{RouteError-BaMsOSly.js → RouteError-DVwPh2Ql.js} +1 -1
  9. package/dist/app/assets/{WorkbookPage-Cx4SePkx.js → WorkbookPage-DW38R2Zv.js} +1 -1
  10. package/dist/app/assets/{core-CbsC6R_Y.es-Cwf6asf3.js → core-C0vCMRDQ.es-D_ytHhjS.js} +10 -10
  11. package/dist/app/assets/{index-DL6BZTuw.js → index-BGdcKsFF.js} +1 -1
  12. package/dist/app/assets/{index-DNofXMxi.js → index-CTx4v4_3.js} +1 -1
  13. package/dist/app/assets/index-DE6d5jEy.js +452 -0
  14. package/dist/app/assets/{index.umd-B68wGGkM.js → index.umd-C1Mi1uRm.js} +1 -1
  15. package/dist/app/index.html +1 -1
  16. package/dist/instrumentation.mjs +57 -36
  17. package/dist/package_load_worker.mjs +12213 -0
  18. package/dist/server.mjs +4198 -3648
  19. package/package.json +2 -3
  20. package/src/config.spec.ts +246 -0
  21. package/src/config.ts +121 -1
  22. package/src/constants.ts +84 -1
  23. package/src/controller/compile.controller.ts +3 -1
  24. package/src/controller/connection.controller.spec.ts +803 -0
  25. package/src/controller/connection.controller.ts +207 -20
  26. package/src/controller/model.controller.ts +19 -1
  27. package/src/controller/query.controller.ts +22 -6
  28. package/src/controller/watch-mode.controller.ts +11 -2
  29. package/src/errors.spec.ts +44 -0
  30. package/src/errors.ts +34 -0
  31. package/src/health.spec.ts +90 -0
  32. package/src/health.ts +88 -45
  33. package/src/heap_check.spec.ts +144 -0
  34. package/src/heap_check.ts +144 -0
  35. package/src/instrumentation.ts +50 -0
  36. package/src/mcp/handler_utils.ts +14 -0
  37. package/src/mcp/tools/execute_query_tool.ts +52 -10
  38. package/src/oom_guards.integration.spec.ts +261 -0
  39. package/src/package_load/package_load_pool.spec.ts +252 -0
  40. package/src/package_load/package_load_pool.ts +920 -0
  41. package/src/package_load/package_load_worker.ts +980 -0
  42. package/src/package_load/protocol.ts +336 -0
  43. package/src/path_safety.ts +9 -3
  44. package/src/query_cap_metrics.spec.ts +89 -0
  45. package/src/query_cap_metrics.ts +115 -0
  46. package/src/query_concurrency.spec.ts +247 -0
  47. package/src/query_concurrency.ts +236 -0
  48. package/src/query_param_utils.ts +18 -0
  49. package/src/query_timeout.spec.ts +224 -0
  50. package/src/query_timeout.ts +178 -0
  51. package/src/server-old.ts +21 -1
  52. package/src/server.ts +61 -57
  53. package/src/service/connection.ts +8 -2
  54. package/src/service/db_utils.spec.ts +1 -1
  55. package/src/service/environment.ts +85 -4
  56. package/src/service/environment_admission.spec.ts +165 -1
  57. package/src/service/environment_store.spec.ts +103 -0
  58. package/src/service/environment_store.ts +98 -26
  59. package/src/service/filter_integration.spec.ts +110 -0
  60. package/src/service/given.ts +80 -0
  61. package/src/service/givens_integration.spec.ts +192 -0
  62. package/src/service/model.spec.ts +298 -3
  63. package/src/service/model.ts +362 -23
  64. package/src/service/model_limits.spec.ts +181 -0
  65. package/src/service/model_limits.ts +110 -0
  66. package/src/service/package.spec.ts +12 -6
  67. package/src/service/package.ts +263 -146
  68. package/src/service/package_worker_path.spec.ts +196 -0
  69. package/src/service/path_injection.spec.ts +39 -0
  70. package/src/stream_helpers.spec.ts +280 -0
  71. package/src/stream_helpers.ts +162 -0
  72. package/src/test_helpers/metrics_harness.ts +126 -0
  73. package/tests/integration/concurrent_package/concurrent_package.integration.spec.ts +280 -0
  74. package/dist/app/assets/HomePage-DwkH7OrS.js +0 -1
  75. package/dist/app/assets/index-U38AyjJL.js +0 -451
package/src/health.ts CHANGED
@@ -57,8 +57,8 @@ export function markNotReady(): void {
57
57
  * 2. Waits shutdownDrainDurationSeconds to allow in-flight requests to complete
58
58
  * 3. Sets preGracefulShutdownCompleted flag (enables drainingGuard middleware to reject new requests)
59
59
  * 4. Closes main server and MCP server (stops accepting new connections)
60
- * 5. Closes logger
61
- * 6. Waits shutdownGracefulCloseTimeoutSeconds (if > 0) for final cleanup
60
+ * 5. Waits shutdownGracefulCloseTimeoutSeconds (if > 0) for final cleanup
61
+ * 6. Closes logger (last, so any logs emitted during cleanup are flushed)
62
62
  * 7. Exits process
63
63
  *
64
64
  * Note: drainingGuard only rejects requests after step 3 completes. During step 2,
@@ -92,51 +92,94 @@ export function registerSignalHandlers(
92
92
  }, shutdownDrainDurationSeconds * 1000),
93
93
  );
94
94
 
95
- const closeServer = (server: Server, name: string) =>
96
- new Promise<void>((resolve) => {
97
- if (server && server.listening) {
98
- server.close((err) => {
99
- if (err) {
100
- logger.error(`${name} close error:`, err);
101
- } else {
102
- logger.info(`${name} closed`);
103
- }
104
- resolve();
105
- });
106
- } else {
107
- resolve();
108
- }
109
- });
110
-
111
- await Promise.all([
112
- closeServer(server, "Main server"),
113
- closeServer(mcpServer, "MCP server"),
114
- ]);
115
-
116
- try {
117
- await shutdownSDK();
118
- logger.info("OpenTelemetry SDK shut down");
119
- } catch (_error) {
120
- /* do nothing */
121
- }
122
-
123
- try {
124
- logger.close();
125
- } catch (_error) {
126
- /* do nothing */
127
- }
128
-
129
- if (shutdownGracefulCloseTimeoutSeconds > 0) {
130
- logger.info(
131
- `Waiting ${shutdownGracefulCloseTimeoutSeconds} seconds after server close before exit...`,
132
- );
133
- await new Promise((resolve) =>
134
- setTimeout(resolve, shutdownGracefulCloseTimeoutSeconds * 1000),
135
- );
136
- }
137
- process.exit(0);
95
+ await performGracefulShutdownAfterDrain(
96
+ server,
97
+ mcpServer,
98
+ shutdownGracefulCloseTimeoutSeconds,
99
+ );
138
100
  });
139
101
  }
102
+
103
+ /**
104
+ * Performs the post-drain shutdown work: closes both HTTP servers,
105
+ * shuts down the OpenTelemetry SDK, waits the optional graceful-close
106
+ * window so any in-flight cleanup can finish logging, closes the
107
+ * winston logger, and exits the process.
108
+ *
109
+ * Exported so unit tests can exercise the close + log + exit ordering
110
+ * without emitting SIGTERM (which would leave module-level
111
+ * operationalState stuck in "draining" and leak into sibling specs).
112
+ */
113
+ export async function performGracefulShutdownAfterDrain(
114
+ server: Server,
115
+ mcpServer: Server,
116
+ shutdownGracefulCloseTimeoutSeconds: number,
117
+ ): Promise<void> {
118
+ const closeServer = (server: Server, name: string) =>
119
+ new Promise<void>((resolve) => {
120
+ if (server && server.listening) {
121
+ server.close((err) => {
122
+ if (err) {
123
+ logger.error(`${name} close error:`, err);
124
+ } else {
125
+ logger.info(`${name} closed`);
126
+ }
127
+ resolve();
128
+ });
129
+ } else {
130
+ resolve();
131
+ }
132
+ });
133
+
134
+ await Promise.all([
135
+ closeServer(server, "Main server"),
136
+ closeServer(mcpServer, "MCP server"),
137
+ ]);
138
+
139
+ try {
140
+ await shutdownSDK();
141
+ logger.info("OpenTelemetry SDK shut down");
142
+ } catch (_error) {
143
+ /* do nothing */
144
+ }
145
+
146
+ try {
147
+ // Drain in-flight compiles and terminate worker_threads before
148
+ // we exit so a slow compile doesn't leave orphan worker
149
+ // processes. Lazy-imported to avoid pulling the pool module
150
+ // into the health.ts dep graph for tests that don't exercise
151
+ // the compile path.
152
+ const { getPackageLoadPool } = await import(
153
+ "./package_load/package_load_pool"
154
+ );
155
+ await getPackageLoadPool().shutdown();
156
+ logger.info("Package-load worker pool shut down");
157
+ } catch (_error) {
158
+ /* do nothing */
159
+ }
160
+
161
+ if (shutdownGracefulCloseTimeoutSeconds > 0) {
162
+ logger.info(
163
+ `Waiting ${shutdownGracefulCloseTimeoutSeconds} seconds after server close before exit...`,
164
+ );
165
+ await new Promise((resolve) =>
166
+ setTimeout(resolve, shutdownGracefulCloseTimeoutSeconds * 1000),
167
+ );
168
+ }
169
+
170
+ // Close the logger last so anything emitted during the wait window
171
+ // above (or by other shutdown paths still running) reaches its
172
+ // transports. Closing earlier triggers winston's
173
+ // "Attempt to write logs with no transports" warning on any
174
+ // subsequent logger call.
175
+ try {
176
+ logger.close();
177
+ } catch (_error) {
178
+ /* do nothing */
179
+ }
180
+
181
+ process.exit(0);
182
+ }
140
183
  /**
141
184
  * Middleware that returns 503 for non-health and metrics requests when service is draining.
142
185
  * Must be registered before application routes.
@@ -0,0 +1,144 @@
1
+ import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test";
2
+
3
+ import {
4
+ checkHeapConfiguration,
5
+ resetHeapTelemetryForTesting,
6
+ } from "./heap_check";
7
+ import {
8
+ startMetricsHarness,
9
+ type MetricsHarness,
10
+ } from "./test_helpers/metrics_harness";
11
+
12
+ function makeLogStub(): {
13
+ warn: ReturnType<typeof mock>;
14
+ info: ReturnType<typeof mock>;
15
+ } {
16
+ return {
17
+ warn: mock(() => undefined),
18
+ info: mock(() => undefined),
19
+ };
20
+ }
21
+
22
+ describe("checkHeapConfiguration", () => {
23
+ it("warns + reports warned=true when heap limit is below the 2 GiB threshold", () => {
24
+ const log = makeLogStub();
25
+ // 1.5 GiB — below the recommended floor.
26
+ const { warned } = checkHeapConfiguration({
27
+ getHeapStatistics: () => ({
28
+ heap_size_limit: 1.5 * 1024 * 1024 * 1024,
29
+ }),
30
+ log,
31
+ });
32
+ expect(warned).toBe(true);
33
+ expect(log.warn).toHaveBeenCalledTimes(1);
34
+ expect(log.info).not.toHaveBeenCalled();
35
+ const args = log.warn.mock.calls[0] as unknown as [
36
+ string,
37
+ Record<string, unknown>,
38
+ ];
39
+ // Operator must be able to grep for the offending env-var
40
+ // hint without guessing the surrounding sentence.
41
+ expect(args[0]).toContain("--max-old-space-size");
42
+ expect(args[0]).toContain("MiB");
43
+ expect(args[1].heapSizeLimitBytes).toBe(1.5 * 1024 * 1024 * 1024);
44
+ });
45
+
46
+ it("logs at info + reports warned=false when heap limit meets the recommendation", () => {
47
+ const log = makeLogStub();
48
+ const { warned, heapSizeLimitBytes } = checkHeapConfiguration({
49
+ getHeapStatistics: () => ({
50
+ heap_size_limit: 4 * 1024 * 1024 * 1024,
51
+ }),
52
+ log,
53
+ });
54
+ expect(warned).toBe(false);
55
+ expect(heapSizeLimitBytes).toBe(4 * 1024 * 1024 * 1024);
56
+ expect(log.warn).not.toHaveBeenCalled();
57
+ expect(log.info).toHaveBeenCalledTimes(1);
58
+ });
59
+
60
+ it("treats exactly-the-recommended-value as 'meets threshold' (no warn)", () => {
61
+ // Boundary: the comparison is `<`, so equality should pass.
62
+ // Lock the boundary so a future tightening to `<=` requires
63
+ // an explicit test change rather than silently regressing.
64
+ const log = makeLogStub();
65
+ const { warned } = checkHeapConfiguration({
66
+ getHeapStatistics: () => ({
67
+ heap_size_limit: 2 * 1024 * 1024 * 1024,
68
+ }),
69
+ log,
70
+ });
71
+ expect(warned).toBe(false);
72
+ expect(log.warn).not.toHaveBeenCalled();
73
+ });
74
+
75
+ it("does not throw or warn under tiny heaps; the cap helpers still work", () => {
76
+ // Pathological "this pod has 128 MiB" case: we want a noisy
77
+ // warning, not a process crash, so the publisher still boots
78
+ // and the operator sees the message in pod logs.
79
+ const log = makeLogStub();
80
+ expect(() =>
81
+ checkHeapConfiguration({
82
+ getHeapStatistics: () => ({ heap_size_limit: 128 * 1024 * 1024 }),
83
+ log,
84
+ }),
85
+ ).not.toThrow();
86
+ expect(log.warn).toHaveBeenCalledTimes(1);
87
+ });
88
+
89
+ it("uses live v8.getHeapStatistics when no override is provided (smoke check)", () => {
90
+ // No assertion on warn/info — the result depends on how Node
91
+ // was started — just that the call resolves and returns a
92
+ // sensible structure. Locks the production code path against
93
+ // accidental coupling to the injected getter.
94
+ const result = checkHeapConfiguration();
95
+ expect(result.heapSizeLimitBytes).toBeGreaterThan(0);
96
+ expect(typeof result.warned).toBe("boolean");
97
+ });
98
+
99
+ describe("telemetry", () => {
100
+ let harness: MetricsHarness;
101
+
102
+ beforeEach(async () => {
103
+ harness = await startMetricsHarness();
104
+ resetHeapTelemetryForTesting();
105
+ });
106
+
107
+ afterEach(async () => {
108
+ resetHeapTelemetryForTesting();
109
+ await harness.shutdown();
110
+ });
111
+
112
+ it("publisher_heap_size_limit_bytes reports the live V8 heap_size_limit", async () => {
113
+ const log = makeLogStub();
114
+ checkHeapConfiguration({ log });
115
+ const value = await harness.collectGauge(
116
+ "publisher_heap_size_limit_bytes",
117
+ );
118
+ // Live value — we just assert it's a sensible positive
119
+ // number so the test isn't sensitive to how this Bun
120
+ // process was launched.
121
+ expect(typeof value).toBe("number");
122
+ expect(value).toBeGreaterThan(0);
123
+ });
124
+
125
+ it("publisher_heap_used_bytes reports the live V8 used_heap_size", async () => {
126
+ const log = makeLogStub();
127
+ checkHeapConfiguration({ log });
128
+ const value = await harness.collectGauge("publisher_heap_used_bytes");
129
+ expect(typeof value).toBe("number");
130
+ expect(value).toBeGreaterThan(0);
131
+ });
132
+
133
+ it("does not register the gauges before checkHeapConfiguration is called (lazy install)", async () => {
134
+ // The gauges are wired up via `installHeapGauges`, which
135
+ // is intentionally called from `checkHeapConfiguration`
136
+ // so the OTel SDK is fully up before instruments are
137
+ // registered. Without that, instruments would bind to
138
+ // NoOp during module load and never emit data.
139
+ expect(
140
+ await harness.collectGauge("publisher_heap_size_limit_bytes"),
141
+ ).toBeUndefined();
142
+ });
143
+ });
144
+ });
@@ -0,0 +1,144 @@
1
+ import { metrics } from "@opentelemetry/api";
2
+ import * as v8 from "v8";
3
+
4
+ import { logger } from "./logger";
5
+
6
+ /**
7
+ * Subset of the winston logger surface this module needs. Defined
8
+ * structurally so tests can pass a plain object stub and so we
9
+ * don't take a direct dependency on the concrete winston type.
10
+ */
11
+ interface HeapCheckLogger {
12
+ warn: (...args: unknown[]) => unknown;
13
+ info: (...args: unknown[]) => unknown;
14
+ }
15
+
16
+ /**
17
+ * Observable gauges for heap configuration so dashboards can render
18
+ * "configured heap ceiling" alongside the RSS / back-pressure
19
+ * timeseries from `PackageMemoryGovernor`. The values are observed
20
+ * on every scrape rather than cached — `v8.getHeapStatistics()` is
21
+ * cheap (a single VM call) and serving the live value avoids stale
22
+ * reads.
23
+ *
24
+ * Lazy init for the same reason as `query_timeout.ts`: instruments
25
+ * captured before `setGlobalMeterProvider` are bound to NoOp. We
26
+ * call `installHeapGauges` from `checkHeapConfiguration` so it
27
+ * runs once at startup, after the OTel SDK is up.
28
+ *
29
+ * Mirrors the governor's `publisher_*` unlabeled style.
30
+ */
31
+ let heapGaugesInstalled = false;
32
+ function installHeapGauges(): void {
33
+ if (heapGaugesInstalled) return;
34
+ heapGaugesInstalled = true;
35
+ const meter = metrics.getMeter("publisher");
36
+ meter
37
+ .createObservableGauge("publisher_heap_size_limit_bytes", {
38
+ description:
39
+ "V8 heap_size_limit (--max-old-space-size). Compare with PUBLISHER_MAX_MEMORY_BYTES.",
40
+ unit: "By",
41
+ })
42
+ .addCallback((observation) => {
43
+ observation.observe(v8.getHeapStatistics().heap_size_limit);
44
+ });
45
+ meter
46
+ .createObservableGauge("publisher_heap_used_bytes", {
47
+ description:
48
+ "Current V8 used_heap_size in bytes. Watch this alongside publisher_process_rss_bytes; the two diverge under native-allocator pressure (DuckDB, etc.).",
49
+ unit: "By",
50
+ })
51
+ .addCallback((observation) => {
52
+ observation.observe(v8.getHeapStatistics().used_heap_size);
53
+ });
54
+ }
55
+
56
+ /**
57
+ * Visible for tests; production code never calls this. Resets the
58
+ * lazy guard so a re-installation captures into a fresh
59
+ * MeterProvider.
60
+ */
61
+ export function resetHeapTelemetryForTesting(): void {
62
+ heapGaugesInstalled = false;
63
+ }
64
+
65
+ /**
66
+ * Minimum V8 heap ceiling (`--max-old-space-size`) the publisher
67
+ * expects in production. Below this the row/byte caps from earlier
68
+ * steps still apply, but a single buffered model query at the
69
+ * default 50 MB byte cap plus the surrounding `Result` allocation
70
+ * can plausibly chew through the remaining headroom and OOM the
71
+ * process before back-pressure trips.
72
+ *
73
+ * 2 GiB is the smallest value at which the defaults are comfortably
74
+ * survivable. Operators running explicitly tuned-down pods (e.g. a
75
+ * lightweight smoke-test deploy) can ignore the warning.
76
+ */
77
+ const MIN_RECOMMENDED_HEAP_BYTES = 2 * 1024 * 1024 * 1024;
78
+
79
+ /**
80
+ * Probe `v8.getHeapStatistics().heap_size_limit` at startup and
81
+ * emit a single structured warning when the process is configured
82
+ * with a heap ceiling below {@link MIN_RECOMMENDED_HEAP_BYTES}.
83
+ *
84
+ * Why warn (not exit):
85
+ * - Smaller pods are legitimate (CI, local dev, smoke tests).
86
+ * Hard-failing them would be hostile to those workflows.
87
+ * - The earlier steps already bound memory growth per request; this
88
+ * is a "you probably want to know" signal, not a safety
89
+ * interlock.
90
+ * - A warning at boot is grep-able in pod logs / dashboards and
91
+ * surfaces faster than waiting for the first OOMKill.
92
+ *
93
+ * Returns the observed heap limit so the caller (server.ts startup)
94
+ * can also surface it in startup metrics if desired.
95
+ */
96
+ /**
97
+ * Test seam: parameters allow injecting a fake heap-stats getter
98
+ * and logger so the bun/sinon "ES modules cannot be stubbed"
99
+ * restriction doesn't force a module-level mock. Production calls
100
+ * (server.ts startup) use the defaults; tests pass in stubs.
101
+ */
102
+ export interface CheckHeapOptions {
103
+ getHeapStatistics?: () => Pick<v8.HeapInfo, "heap_size_limit">;
104
+ log?: HeapCheckLogger;
105
+ }
106
+
107
+ export function checkHeapConfiguration(options: CheckHeapOptions = {}): {
108
+ heapSizeLimitBytes: number;
109
+ warned: boolean;
110
+ } {
111
+ // Install heap-related observable gauges on the same startup
112
+ // tick. Idempotent — re-calling in tests / warmup paths is
113
+ // safe and re-uses the same instruments.
114
+ installHeapGauges();
115
+ const getStats = options.getHeapStatistics ?? v8.getHeapStatistics;
116
+ const log = options.log ?? logger;
117
+ const stats = getStats();
118
+ const heapSizeLimitBytes = stats.heap_size_limit;
119
+ const limitMiB = Math.round(heapSizeLimitBytes / (1024 * 1024));
120
+ const recommendedMiB = Math.round(
121
+ MIN_RECOMMENDED_HEAP_BYTES / (1024 * 1024),
122
+ );
123
+
124
+ if (heapSizeLimitBytes < MIN_RECOMMENDED_HEAP_BYTES) {
125
+ log.warn(
126
+ `V8 heap_size_limit is ${limitMiB} MiB, below the recommended ${recommendedMiB} MiB. ` +
127
+ `Pass --max-old-space-size=${recommendedMiB} (or higher) on the node process to keep ` +
128
+ `the row/byte caps (PUBLISHER_MAX_QUERY_ROWS / PUBLISHER_MAX_RESPONSE_BYTES) within ` +
129
+ `safe margin. With a smaller heap, a single large query can OOM the pod before the ` +
130
+ `memory governor's back-pressure has a chance to trip.`,
131
+ {
132
+ heapSizeLimitBytes,
133
+ recommendedHeapSizeBytes: MIN_RECOMMENDED_HEAP_BYTES,
134
+ },
135
+ );
136
+ return { heapSizeLimitBytes, warned: true };
137
+ }
138
+
139
+ log.info(
140
+ `V8 heap_size_limit is ${limitMiB} MiB (>= recommended ${recommendedMiB} MiB).`,
141
+ { heapSizeLimitBytes },
142
+ );
143
+ return { heapSizeLimitBytes, warned: false };
144
+ }
@@ -1,3 +1,4 @@
1
+ import { monitorEventLoopDelay } from "node:perf_hooks";
1
2
  import { metrics } from "@opentelemetry/api";
2
3
  import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
3
4
  import { OTLPLogExporter } from "@opentelemetry/exporter-logs-otlp-proto";
@@ -116,6 +117,55 @@ const httpRequestCount = meter.createCounter("http_server_requests_total", {
116
117
  description: "Total number of HTTP requests",
117
118
  });
118
119
 
120
+ // Event-loop-delay metrics. A blocked event loop is the only way the
121
+ // /health/liveness probe (a pure synchronous 200 handler) can fail under K8s,
122
+ // so we surface p50/p99/max so an operator can correlate liveness restarts
123
+ // with sustained event-loop pressure (large Malloy compiles, GC, etc.).
124
+ const eventLoopHistogram = monitorEventLoopDelay({ resolution: 20 });
125
+ eventLoopHistogram.enable();
126
+
127
+ const eventLoopLagP50 = meter.createObservableGauge(
128
+ "publisher_event_loop_lag_p50_ms",
129
+ {
130
+ description:
131
+ "Event loop delay p50 since the last scrape, in milliseconds",
132
+ unit: "ms",
133
+ },
134
+ );
135
+ const eventLoopLagP99 = meter.createObservableGauge(
136
+ "publisher_event_loop_lag_p99_ms",
137
+ {
138
+ description:
139
+ "Event loop delay p99 since the last scrape, in milliseconds",
140
+ unit: "ms",
141
+ },
142
+ );
143
+ const eventLoopLagMax = meter.createObservableGauge(
144
+ "publisher_event_loop_lag_max_ms",
145
+ {
146
+ description:
147
+ "Event loop delay max since the last scrape, in milliseconds",
148
+ unit: "ms",
149
+ },
150
+ );
151
+
152
+ // Sample all three in one batch so the histogram reset can't race the reads.
153
+ meter.addBatchObservableCallback(
154
+ (observableResult) => {
155
+ observableResult.observe(
156
+ eventLoopLagP50,
157
+ eventLoopHistogram.percentile(50) / 1e6,
158
+ );
159
+ observableResult.observe(
160
+ eventLoopLagP99,
161
+ eventLoopHistogram.percentile(99) / 1e6,
162
+ );
163
+ observableResult.observe(eventLoopLagMax, eventLoopHistogram.max / 1e6);
164
+ eventLoopHistogram.reset();
165
+ },
166
+ [eventLoopLagP50, eventLoopLagP99, eventLoopLagMax],
167
+ );
168
+
119
169
  const IGNORED_PATHS = new Set([
120
170
  "/health",
121
171
  "/health/liveness",
@@ -7,6 +7,7 @@ import {
7
7
  ModelNotFoundError,
8
8
  ModelCompilationError,
9
9
  EnvironmentNotFoundError,
10
+ ServiceUnavailableError,
10
11
  } from "../errors";
11
12
  import {
12
13
  getNotFoundError,
@@ -132,6 +133,9 @@ export async function getModelForQuery(
132
133
  environmentName,
133
134
  false,
134
135
  );
136
+ // Shed load before any disk / DB work; mirrors the HTTP query
137
+ // controllers so MCP traffic obeys the same back-pressure rules.
138
+ environment.assertCanAdmitQuery();
135
139
  const pkg = await environment.getPackage(packageName, false);
136
140
  const model = pkg.getModel(modelPath);
137
141
  if (!model || model.getModelType() === "notebook") {
@@ -163,6 +167,16 @@ export async function getModelForQuery(
163
167
  `${environmentName}/${packageName}/${modelPath}`,
164
168
  error,
165
169
  );
170
+ } else if (error instanceof ServiceUnavailableError) {
171
+ // Back-pressure: don't dress this up as a 404/500. Surface the
172
+ // server's own message so the MCP caller knows to retry.
173
+ errorDetails = {
174
+ message: error.message,
175
+ suggestions: [
176
+ "Retry after the publisher's memory usage drops below the configured low-water mark.",
177
+ "If this happens repeatedly, raise PUBLISHER_MAX_MEMORY_BYTES or scale up the pod.",
178
+ ],
179
+ } satisfies ErrorDetails;
166
180
  } else {
167
181
  // Unexpected error during setup
168
182
  errorDetails = getInternalError("executeQuery (Setup)", error);
@@ -1,7 +1,14 @@
1
1
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
2
  import { ErrorCode, McpError } from "@modelcontextprotocol/sdk/types.js";
3
3
  import { z } from "zod";
4
+ import type { GivenValue } from "@malloydata/malloy";
5
+ import { getQueryTimeoutMs } from "../../config";
4
6
  import { logger } from "../../logger";
7
+ import {
8
+ tryAcquireQuerySlot,
9
+ type QuerySlotHandle,
10
+ } from "../../query_concurrency";
11
+ import { runWithQueryTimeout } from "../../query_timeout";
5
12
  import { EnvironmentStore } from "../../service/environment_store";
6
13
  import { getMalloyErrorDetails, type ErrorDetails } from "../error_messages";
7
14
  import { buildMalloyUri, getModelForQuery } from "../handler_utils";
@@ -30,6 +37,12 @@ const executeQueryShape = {
30
37
  .describe(
31
38
  "Filter parameter values keyed by filter name. Used with sources that declare #(filter) annotations.",
32
39
  ),
40
+ givens: z
41
+ .record(z.unknown())
42
+ .optional()
43
+ .describe(
44
+ "Per-query given values that override model defaults. Keys are given names declared in the model's given: block.",
45
+ ),
33
46
  };
34
47
 
35
48
  // Type inference is handled automatically by the MCP server based on the executeQueryShape
@@ -56,6 +69,7 @@ export function registerExecuteQueryTool(
56
69
  sourceName,
57
70
  queryName,
58
71
  filterParams,
72
+ givens,
59
73
  } = params;
60
74
 
61
75
  logger.info("[MCP Tool executeQuery] Received params:", { params });
@@ -120,14 +134,30 @@ export function registerExecuteQueryTool(
120
134
  logger.info(
121
135
  `[MCP Tool executeQuery] Model found. Proceeding to execute query.`,
122
136
  );
137
+ // Per-pod concurrency slot. MCP shares the same slot pool
138
+ // as the HTTP query routes so a hot agent loop can't
139
+ // bypass PUBLISHER_MAX_CONCURRENT_QUERIES. `mcp:executeQuery`
140
+ // is a fixed label so the dashboard can separate MCP load
141
+ // from HTTP route load. Acquisition can throw
142
+ // ServiceUnavailableError; the existing catch below surfaces
143
+ // it as the standard MCP error-content payload.
144
+ let querySlot: QuerySlotHandle | null = null;
123
145
  try {
146
+ querySlot = tryAcquireQuerySlot("mcp:executeQuery");
124
147
  // If ad-hoc query is provided, use it directly in the 3rd arg
125
148
  if (query) {
126
- const { result } = await model.getQueryResults(
127
- undefined,
128
- undefined,
129
- query,
130
- filterParams,
149
+ const { result } = await runWithQueryTimeout(
150
+ (abortSignal) =>
151
+ model.getQueryResults(
152
+ undefined,
153
+ undefined,
154
+ query,
155
+ filterParams,
156
+ undefined,
157
+ givens as Record<string, GivenValue> | undefined,
158
+ abortSignal,
159
+ ),
160
+ getQueryTimeoutMs(),
131
161
  );
132
162
  const { validateRenderTags } = await import(
133
163
  "@malloydata/render-validator"
@@ -169,11 +199,18 @@ export function registerExecuteQueryTool(
169
199
 
170
200
  return { isError: false, content };
171
201
  } else if (queryName) {
172
- const { result } = await model.getQueryResults(
173
- sourceName,
174
- queryName,
175
- undefined,
176
- filterParams,
202
+ const { result } = await runWithQueryTimeout(
203
+ (abortSignal) =>
204
+ model.getQueryResults(
205
+ sourceName,
206
+ queryName,
207
+ undefined,
208
+ filterParams,
209
+ undefined,
210
+ givens as Record<string, GivenValue> | undefined,
211
+ abortSignal,
212
+ ),
213
+ getQueryTimeoutMs(),
177
214
  );
178
215
  const { validateRenderTags } = await import(
179
216
  "@malloydata/render-validator"
@@ -259,6 +296,11 @@ export function registerExecuteQueryTool(
259
296
  },
260
297
  ],
261
298
  };
299
+ } finally {
300
+ // Release on every exit path — success, error, or
301
+ // unreachable code-path throw. `release()` is idempotent
302
+ // so a double-fault during cleanup can't double-decrement.
303
+ querySlot?.release();
262
304
  }
263
305
  },
264
306
  );