@malloy-publisher/server 0.0.198 → 0.0.200
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build.ts +30 -1
- package/dist/app/api-doc.yaml +127 -111
- package/dist/app/assets/{EnvironmentPage-C7rtH4mC.js → EnvironmentPage-CgKNjySu.js} +1 -1
- package/dist/app/assets/HomePage-BPIpMBjW.js +1 -0
- package/dist/app/assets/{MainPage-D38LtZDV.js → MainPage-CAwb8U82.js} +2 -2
- package/dist/app/assets/{ModelPage-DOol8Mz7.js → ModelPage-C0Uevsw9.js} +1 -1
- package/dist/app/assets/{PackagePage-0tgzA_kO.js → PackagePage-Cu-u9k1g.js} +1 -1
- package/dist/app/assets/{RouteError-BaMsOSly.js → RouteError-DVwPh2Ql.js} +1 -1
- package/dist/app/assets/{WorkbookPage-Cx4SePkx.js → WorkbookPage-DW38R2Zv.js} +1 -1
- package/dist/app/assets/{core-CbsC6R_Y.es-Cwf6asf3.js → core-C0vCMRDQ.es-D_ytHhjS.js} +10 -10
- package/dist/app/assets/{index-DL6BZTuw.js → index-BGdcKsFF.js} +1 -1
- package/dist/app/assets/{index-DNofXMxi.js → index-CTx4v4_3.js} +1 -1
- package/dist/app/assets/index-DE6d5jEy.js +452 -0
- package/dist/app/assets/{index.umd-B68wGGkM.js → index.umd-C1Mi1uRm.js} +1 -1
- package/dist/app/index.html +1 -1
- package/dist/instrumentation.mjs +57 -36
- package/dist/package_load_worker.mjs +12213 -0
- package/dist/server.mjs +4198 -3648
- package/package.json +2 -3
- package/src/config.spec.ts +246 -0
- package/src/config.ts +121 -1
- package/src/constants.ts +84 -1
- package/src/controller/compile.controller.ts +3 -1
- package/src/controller/connection.controller.spec.ts +803 -0
- package/src/controller/connection.controller.ts +207 -20
- package/src/controller/model.controller.ts +19 -1
- package/src/controller/query.controller.ts +22 -6
- package/src/controller/watch-mode.controller.ts +11 -2
- package/src/errors.spec.ts +44 -0
- package/src/errors.ts +34 -0
- package/src/health.spec.ts +90 -0
- package/src/health.ts +88 -45
- package/src/heap_check.spec.ts +144 -0
- package/src/heap_check.ts +144 -0
- package/src/instrumentation.ts +50 -0
- package/src/mcp/handler_utils.ts +14 -0
- package/src/mcp/tools/execute_query_tool.ts +52 -10
- package/src/oom_guards.integration.spec.ts +261 -0
- package/src/package_load/package_load_pool.spec.ts +252 -0
- package/src/package_load/package_load_pool.ts +920 -0
- package/src/package_load/package_load_worker.ts +980 -0
- package/src/package_load/protocol.ts +336 -0
- package/src/path_safety.ts +9 -3
- package/src/query_cap_metrics.spec.ts +89 -0
- package/src/query_cap_metrics.ts +115 -0
- package/src/query_concurrency.spec.ts +247 -0
- package/src/query_concurrency.ts +236 -0
- package/src/query_param_utils.ts +18 -0
- package/src/query_timeout.spec.ts +224 -0
- package/src/query_timeout.ts +178 -0
- package/src/server-old.ts +21 -1
- package/src/server.ts +61 -57
- package/src/service/connection.ts +8 -2
- package/src/service/db_utils.spec.ts +1 -1
- package/src/service/environment.ts +85 -4
- package/src/service/environment_admission.spec.ts +165 -1
- package/src/service/environment_store.spec.ts +103 -0
- package/src/service/environment_store.ts +98 -26
- package/src/service/filter_integration.spec.ts +110 -0
- package/src/service/given.ts +80 -0
- package/src/service/givens_integration.spec.ts +192 -0
- package/src/service/model.spec.ts +298 -3
- package/src/service/model.ts +362 -23
- package/src/service/model_limits.spec.ts +181 -0
- package/src/service/model_limits.ts +110 -0
- package/src/service/package.spec.ts +12 -6
- package/src/service/package.ts +263 -146
- package/src/service/package_worker_path.spec.ts +196 -0
- package/src/service/path_injection.spec.ts +39 -0
- package/src/stream_helpers.spec.ts +280 -0
- package/src/stream_helpers.ts +162 -0
- package/src/test_helpers/metrics_harness.ts +126 -0
- package/tests/integration/concurrent_package/concurrent_package.integration.spec.ts +280 -0
- package/dist/app/assets/HomePage-DwkH7OrS.js +0 -1
- package/dist/app/assets/index-U38AyjJL.js +0 -451
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, it } from "bun:test";
|
|
2
|
+
|
|
3
|
+
import { QueryTimeoutError } from "./errors";
|
|
4
|
+
import {
|
|
5
|
+
PUBLISHER_QUERY_TIMEOUT_REASON,
|
|
6
|
+
resetQueryTimeoutTelemetryForTesting,
|
|
7
|
+
runWithQueryTimeout,
|
|
8
|
+
} from "./query_timeout";
|
|
9
|
+
import {
|
|
10
|
+
startMetricsHarness,
|
|
11
|
+
type MetricsHarness,
|
|
12
|
+
} from "./test_helpers/metrics_harness";
|
|
13
|
+
|
|
14
|
+
describe("runWithQueryTimeout", () => {
|
|
15
|
+
it("returns the inner result when fn finishes before the timeout", async () => {
|
|
16
|
+
const result = await runWithQueryTimeout(async (signal) => {
|
|
17
|
+
expect(signal).toBeInstanceOf(AbortSignal);
|
|
18
|
+
expect(signal.aborted).toBe(false);
|
|
19
|
+
return "ok";
|
|
20
|
+
}, 1000);
|
|
21
|
+
expect(result).toBe("ok");
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it("hands fn a signal even when the timeout is disabled (uniform contract)", async () => {
|
|
25
|
+
// timeoutMs=0 is the "operator opted out" path. We still want
|
|
26
|
+
// callers to be able to forward `signal` unconditionally; the
|
|
27
|
+
// signal must exist, be an AbortSignal, and never fire.
|
|
28
|
+
let observed: AbortSignal | undefined;
|
|
29
|
+
const result = await runWithQueryTimeout(async (signal) => {
|
|
30
|
+
observed = signal;
|
|
31
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
32
|
+
return "still-here";
|
|
33
|
+
}, 0);
|
|
34
|
+
expect(result).toBe("still-here");
|
|
35
|
+
expect(observed).toBeInstanceOf(AbortSignal);
|
|
36
|
+
expect(observed?.aborted).toBe(false);
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it("aborts the signal and throws QueryTimeoutError when fn exceeds the budget", async () => {
|
|
40
|
+
let observedSignal: AbortSignal | undefined;
|
|
41
|
+
await expect(
|
|
42
|
+
runWithQueryTimeout(async (signal) => {
|
|
43
|
+
observedSignal = signal;
|
|
44
|
+
// Mimic a slow driver: resolve only when the signal aborts
|
|
45
|
+
// (so the test doesn't hang). The thrown error from the
|
|
46
|
+
// driver is irrelevant — runWithQueryTimeout owns the
|
|
47
|
+
// verdict once the timer has fired.
|
|
48
|
+
await new Promise<void>((_resolve, reject) => {
|
|
49
|
+
signal.addEventListener("abort", () =>
|
|
50
|
+
reject(new Error("driver: aborted by AbortSignal")),
|
|
51
|
+
);
|
|
52
|
+
});
|
|
53
|
+
}, 25),
|
|
54
|
+
).rejects.toBeInstanceOf(QueryTimeoutError);
|
|
55
|
+
expect(observedSignal?.aborted).toBe(true);
|
|
56
|
+
// The reason sentinel lets composed helpers (e.g. streaming
|
|
57
|
+
// cap-abort) distinguish "publisher timeout" from "their own
|
|
58
|
+
// abort" without coupling to message strings.
|
|
59
|
+
expect(observedSignal?.reason).toBe(PUBLISHER_QUERY_TIMEOUT_REASON);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it("includes the configured timeout in the QueryTimeoutError message (operator can grep logs)", async () => {
|
|
63
|
+
let caught: unknown;
|
|
64
|
+
try {
|
|
65
|
+
await runWithQueryTimeout(async (signal) => {
|
|
66
|
+
await new Promise<void>((_resolve, reject) => {
|
|
67
|
+
signal.addEventListener("abort", () =>
|
|
68
|
+
reject(new Error("driver aborted")),
|
|
69
|
+
);
|
|
70
|
+
});
|
|
71
|
+
}, 17);
|
|
72
|
+
} catch (err) {
|
|
73
|
+
caught = err;
|
|
74
|
+
}
|
|
75
|
+
expect(caught).toBeInstanceOf(QueryTimeoutError);
|
|
76
|
+
expect((caught as Error).message).toContain("17ms");
|
|
77
|
+
expect((caught as Error).message).toContain("PUBLISHER_QUERY_TIMEOUT_MS");
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
it("re-throws non-timeout errors verbatim (does not mask driver failures)", async () => {
|
|
81
|
+
// Driver fails *before* the timeout; we must surface its error
|
|
82
|
+
// unchanged so the controller's normal error mapping kicks in
|
|
83
|
+
// (502 ConnectionError, 400 BadRequestError, etc.) — wrapping
|
|
84
|
+
// every failure in QueryTimeoutError would lie to clients.
|
|
85
|
+
const driverError = new Error("upstream connection refused");
|
|
86
|
+
await expect(
|
|
87
|
+
runWithQueryTimeout(async () => {
|
|
88
|
+
throw driverError;
|
|
89
|
+
}, 1000),
|
|
90
|
+
).rejects.toBe(driverError);
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
it("does not wrap an error that happens to mention 'abort' if the timer never fired", async () => {
|
|
94
|
+
// Edge case: a driver might surface its own AbortError for an
|
|
95
|
+
// unrelated reason (e.g. caller canceled, transport reset). If
|
|
96
|
+
// the publisher's timer never fired, the error is not ours and
|
|
97
|
+
// must not be re-cast as QueryTimeoutError.
|
|
98
|
+
const fakeAbort = Object.assign(new Error("aborted by something else"), {
|
|
99
|
+
name: "AbortError",
|
|
100
|
+
});
|
|
101
|
+
await expect(
|
|
102
|
+
runWithQueryTimeout(async () => {
|
|
103
|
+
throw fakeAbort;
|
|
104
|
+
}, 1000),
|
|
105
|
+
).rejects.toBe(fakeAbort);
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
describe("telemetry", () => {
|
|
109
|
+
let harness: MetricsHarness;
|
|
110
|
+
beforeEach(async () => {
|
|
111
|
+
harness = await startMetricsHarness();
|
|
112
|
+
// Drop cached instruments so they re-init against the new
|
|
113
|
+
// provider; otherwise this test's writes go to a counter
|
|
114
|
+
// bound to the *previous* provider's reader.
|
|
115
|
+
resetQueryTimeoutTelemetryForTesting();
|
|
116
|
+
});
|
|
117
|
+
afterEach(async () => {
|
|
118
|
+
delete process.env.PUBLISHER_QUERY_TIMEOUT_MS;
|
|
119
|
+
await harness.shutdown();
|
|
120
|
+
resetQueryTimeoutTelemetryForTesting();
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Install telemetry without firing a timeout. After the
|
|
125
|
+
* `ensureTimeoutTelemetry` fix any successful call is enough
|
|
126
|
+
* — both the counter and the gauge register on every entry,
|
|
127
|
+
* not just the timeout branch.
|
|
128
|
+
*/
|
|
129
|
+
async function primeTelemetry(): Promise<void> {
|
|
130
|
+
await runWithQueryTimeout(async () => 0, 10_000);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
it("publisher_query_timeout_total ticks each time the timer fires", async () => {
|
|
134
|
+
// Establish baseline (0) before the trigger so this test
|
|
135
|
+
// isn't sensitive to whatever else has happened earlier.
|
|
136
|
+
expect(
|
|
137
|
+
await harness.collectCounter("publisher_query_timeout_total"),
|
|
138
|
+
).toBe(0);
|
|
139
|
+
await expect(
|
|
140
|
+
runWithQueryTimeout(async (signal) => {
|
|
141
|
+
await new Promise<void>((_resolve, reject) => {
|
|
142
|
+
signal.addEventListener("abort", () =>
|
|
143
|
+
reject(new Error("driver aborted")),
|
|
144
|
+
);
|
|
145
|
+
});
|
|
146
|
+
}, 15),
|
|
147
|
+
).rejects.toBeInstanceOf(QueryTimeoutError);
|
|
148
|
+
expect(
|
|
149
|
+
await harness.collectCounter("publisher_query_timeout_total"),
|
|
150
|
+
).toBe(1);
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
it("does NOT tick the counter on non-timeout errors (driver failed before deadline)", async () => {
|
|
154
|
+
await expect(
|
|
155
|
+
runWithQueryTimeout(async () => {
|
|
156
|
+
throw new Error("upstream broken");
|
|
157
|
+
}, 1000),
|
|
158
|
+
).rejects.toThrow("upstream broken");
|
|
159
|
+
// A driver failure is not a timeout — the counter must
|
|
160
|
+
// stay at zero or operators will chase phantom timeouts.
|
|
161
|
+
expect(
|
|
162
|
+
await harness.collectCounter("publisher_query_timeout_total"),
|
|
163
|
+
).toBe(0);
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
it("publisher_query_timeout_ms gauge is registered after the FIRST call, not just after a timeout fires", async () => {
|
|
167
|
+
// Regression test for the lazy-init bug where the gauge
|
|
168
|
+
// installed only inside the timeout branch — leaving
|
|
169
|
+
// `publisher_query_timeout_ms` absent from `/metrics`
|
|
170
|
+
// until the first 504. Operators tuning the timeout
|
|
171
|
+
// BEFORE getting paged need this visible.
|
|
172
|
+
process.env.PUBLISHER_QUERY_TIMEOUT_MS = "30000";
|
|
173
|
+
await runWithQueryTimeout(async () => "ok", 60_000);
|
|
174
|
+
expect(await harness.collectGauge("publisher_query_timeout_ms")).toBe(
|
|
175
|
+
30000,
|
|
176
|
+
);
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
it("publisher_query_timeout_ms gauge reports the current config", async () => {
|
|
180
|
+
process.env.PUBLISHER_QUERY_TIMEOUT_MS = "42000";
|
|
181
|
+
await primeTelemetry();
|
|
182
|
+
expect(await harness.collectGauge("publisher_query_timeout_ms")).toBe(
|
|
183
|
+
42000,
|
|
184
|
+
);
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
it("publisher_query_timeout_ms gauge reports 0 when the timeout is opted out", async () => {
|
|
188
|
+
process.env.PUBLISHER_QUERY_TIMEOUT_MS = "0";
|
|
189
|
+
await primeTelemetry();
|
|
190
|
+
expect(await harness.collectGauge("publisher_query_timeout_ms")).toBe(
|
|
191
|
+
0,
|
|
192
|
+
);
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
it("publisher_query_timeout_ms gauge surfaces -1 on misconfig instead of crashing the scrape", async () => {
|
|
196
|
+
process.env.PUBLISHER_QUERY_TIMEOUT_MS = "garbage";
|
|
197
|
+
await primeTelemetry();
|
|
198
|
+
// Operators must be able to *see* misconfig in dashboards
|
|
199
|
+
// — silently dropping the data point would hide the
|
|
200
|
+
// problem until a query timed out.
|
|
201
|
+
expect(await harness.collectGauge("publisher_query_timeout_ms")).toBe(
|
|
202
|
+
-1,
|
|
203
|
+
);
|
|
204
|
+
});
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
it("clears the timer on success so the event loop can exit promptly", async () => {
|
|
208
|
+
// Hard to assert directly without leaking implementation
|
|
209
|
+
// internals. Proxy assertion: the call returns quickly and a
|
|
210
|
+
// subsequent timer doesn't race us. If the timer leaked we'd
|
|
211
|
+
// also see a QueryTimeoutError on the *next* call below — the
|
|
212
|
+
// signal would already be aborted.
|
|
213
|
+
const result = await runWithQueryTimeout(async () => "fast", 5000);
|
|
214
|
+
expect(result).toBe("fast");
|
|
215
|
+
// Run a second call: a leaked timer would have aborted the
|
|
216
|
+
// first signal, but the second call gets its own signal so
|
|
217
|
+
// this is really a smoke check.
|
|
218
|
+
const result2 = await runWithQueryTimeout(async (signal) => {
|
|
219
|
+
expect(signal.aborted).toBe(false);
|
|
220
|
+
return "also-fast";
|
|
221
|
+
}, 5000);
|
|
222
|
+
expect(result2).toBe("also-fast");
|
|
223
|
+
});
|
|
224
|
+
});
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
import { metrics, type Counter } from "@opentelemetry/api";
|
|
2
|
+
|
|
3
|
+
import { getQueryTimeoutMs } from "./config";
|
|
4
|
+
import { QueryTimeoutError } from "./errors";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Lazy-initialized telemetry. Instruments are created on first use
|
|
8
|
+
* rather than at module load so unit tests that install a real
|
|
9
|
+
* MeterProvider AFTER the module is imported still record their
|
|
10
|
+
* data — OTel JS's `ProxyMeter` binds counters created before
|
|
11
|
+
* `setGlobalMeterProvider` to NoOp instruments (see
|
|
12
|
+
* https://github.com/open-telemetry/opentelemetry-js/issues/3505).
|
|
13
|
+
* In production the first request triggers initialization, which
|
|
14
|
+
* is well after `server.ts` boot and any OTel SDK setup, so the
|
|
15
|
+
* lazy-init has no observable latency cost.
|
|
16
|
+
*/
|
|
17
|
+
let queryTimeoutCounter: Counter | null = null;
|
|
18
|
+
let timeoutTelemetryInitialized = false;
|
|
19
|
+
/**
|
|
20
|
+
* Idempotent installer for the timeout counter + config gauge.
|
|
21
|
+
* Called at the top of {@link runWithQueryTimeout} (every path,
|
|
22
|
+
* including the opt-out branch) so the gauge is registered with
|
|
23
|
+
* the OTel SDK as soon as the publisher serves its first query —
|
|
24
|
+
* not just on the first timeout firing. Returns the counter for
|
|
25
|
+
* convenience.
|
|
26
|
+
*/
|
|
27
|
+
function ensureTimeoutTelemetry(): Counter {
|
|
28
|
+
if (queryTimeoutCounter && timeoutTelemetryInitialized) {
|
|
29
|
+
return queryTimeoutCounter;
|
|
30
|
+
}
|
|
31
|
+
const meter = metrics.getMeter("publisher");
|
|
32
|
+
if (!queryTimeoutCounter) {
|
|
33
|
+
queryTimeoutCounter = meter.createCounter(
|
|
34
|
+
"publisher_query_timeout_total",
|
|
35
|
+
{
|
|
36
|
+
description:
|
|
37
|
+
"Queries aborted because PUBLISHER_QUERY_TIMEOUT_MS elapsed before the underlying SDK call completed",
|
|
38
|
+
},
|
|
39
|
+
);
|
|
40
|
+
}
|
|
41
|
+
if (!timeoutTelemetryInitialized) {
|
|
42
|
+
// Observable gauge so dashboards can render the *configured*
|
|
43
|
+
// timeout alongside actual query durations from
|
|
44
|
+
// `malloy_model_query_duration` /
|
|
45
|
+
// `http_server_request_duration_ms`. Read live on each
|
|
46
|
+
// scrape so an env-var change between scrapes is visible
|
|
47
|
+
// without a restart.
|
|
48
|
+
meter
|
|
49
|
+
.createObservableGauge("publisher_query_timeout_ms", {
|
|
50
|
+
description:
|
|
51
|
+
"Current effective PUBLISHER_QUERY_TIMEOUT_MS (0 = disabled)",
|
|
52
|
+
unit: "ms",
|
|
53
|
+
})
|
|
54
|
+
.addCallback((observation) => {
|
|
55
|
+
try {
|
|
56
|
+
observation.observe(getQueryTimeoutMs());
|
|
57
|
+
} catch {
|
|
58
|
+
// A misconfigured env var should fail the request
|
|
59
|
+
// that observes it, not the metric scrape. Surface
|
|
60
|
+
// as -1 so dashboards reveal the misconfig rather
|
|
61
|
+
// than silently dropping the sample.
|
|
62
|
+
observation.observe(-1);
|
|
63
|
+
}
|
|
64
|
+
});
|
|
65
|
+
timeoutTelemetryInitialized = true;
|
|
66
|
+
}
|
|
67
|
+
return queryTimeoutCounter;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Visible for tests so they can re-trigger lazy init against a
|
|
72
|
+
* freshly-installed MeterProvider between cases. Do NOT call from
|
|
73
|
+
* production code.
|
|
74
|
+
*/
|
|
75
|
+
export function resetQueryTimeoutTelemetryForTesting(): void {
|
|
76
|
+
queryTimeoutCounter = null;
|
|
77
|
+
timeoutTelemetryInitialized = false;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Per-query wall-clock guard. Hands an {@link AbortSignal} to `fn`
|
|
82
|
+
* and arms a `setTimeout` for `timeoutMs`. When the timer fires the
|
|
83
|
+
* signal is aborted with reason `Symbol.for("publisher.queryTimeout")`
|
|
84
|
+
* so downstream catch blocks can distinguish a publisher-issued
|
|
85
|
+
* timeout from a caller cancel or a driver-internal abort.
|
|
86
|
+
*
|
|
87
|
+
* Contract:
|
|
88
|
+
* - `fn` MUST forward `signal` into the underlying SDK call
|
|
89
|
+
* (`runSQLOptions.abortSignal`, `runnable.run({ abortSignal })`,
|
|
90
|
+
* etc.) so the abort actually cancels the work — not just unblocks
|
|
91
|
+
* the awaiter. Failure to forward leaks the query for `timeoutMs`
|
|
92
|
+
* beyond the 504 response.
|
|
93
|
+
* - `timeoutMs === 0` opts out (no timer is armed); the signal is
|
|
94
|
+
* still passed for consistency. Use when an operator deliberately
|
|
95
|
+
* sets `PUBLISHER_QUERY_TIMEOUT_MS=0`.
|
|
96
|
+
* - On timeout AND a subsequent rejection from `fn`, this throws
|
|
97
|
+
* {@link QueryTimeoutError}. If `fn` happens to resolve cleanly
|
|
98
|
+
* between "timer fired" and "we entered the catch" (a race that
|
|
99
|
+
* any driver can win), the success value is returned to the
|
|
100
|
+
* caller — a query that completed is more useful than a 504 with
|
|
101
|
+
* an already-materialized result. The timeout counter ticks only
|
|
102
|
+
* when 504 is actually emitted.
|
|
103
|
+
* - On non-timeout error, the underlying error is re-thrown
|
|
104
|
+
* unmodified.
|
|
105
|
+
*/
|
|
106
|
+
export async function runWithQueryTimeout<T>(
|
|
107
|
+
fn: (signal: AbortSignal) => Promise<T>,
|
|
108
|
+
timeoutMs: number,
|
|
109
|
+
): Promise<T> {
|
|
110
|
+
// Install telemetry on every call (idempotent) so the
|
|
111
|
+
// `publisher_query_timeout_ms` gauge shows up in `/metrics` as
|
|
112
|
+
// soon as the publisher serves its first query, even if no
|
|
113
|
+
// timeout ever fires. Without this, the gauge would be absent
|
|
114
|
+
// until the first 504 — useless for "tune the timeout BEFORE
|
|
115
|
+
// you get paged" workflows.
|
|
116
|
+
ensureTimeoutTelemetry();
|
|
117
|
+
|
|
118
|
+
if (timeoutMs <= 0) {
|
|
119
|
+
// Opt-out path: no timer, no abort. We still pass a never-aborts
|
|
120
|
+
// signal so `fn`'s signature is uniform and forwarding stays
|
|
121
|
+
// mechanical — no per-call branching for "did we get a timeout?".
|
|
122
|
+
const ac = new AbortController();
|
|
123
|
+
return fn(ac.signal);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
const ac = new AbortController();
|
|
127
|
+
const reason = PUBLISHER_QUERY_TIMEOUT_REASON;
|
|
128
|
+
let timedOut = false;
|
|
129
|
+
const timer = setTimeout(() => {
|
|
130
|
+
timedOut = true;
|
|
131
|
+
// `abort(reason)` propagates the reason through `signal.reason`
|
|
132
|
+
// so a downstream catch can `signal.reason === reason` to tell
|
|
133
|
+
// "publisher timeout" from "client disconnect" from "driver
|
|
134
|
+
// internal error" without string-matching error messages.
|
|
135
|
+
ac.abort(reason);
|
|
136
|
+
}, timeoutMs);
|
|
137
|
+
// Match HTTP request lifecycle: don't keep the event loop alive
|
|
138
|
+
// just for the timer. If the process is shutting down and the
|
|
139
|
+
// query has already resolved, we don't want this hanging the
|
|
140
|
+
// graceful-shutdown.
|
|
141
|
+
timer.unref?.();
|
|
142
|
+
|
|
143
|
+
try {
|
|
144
|
+
return await fn(ac.signal);
|
|
145
|
+
} catch (error) {
|
|
146
|
+
if (timedOut) {
|
|
147
|
+
// Increment before throwing so the counter ticks even if
|
|
148
|
+
// the controller swallows the error (the MCP tool's catch
|
|
149
|
+
// surfaces failures as content payloads, for instance).
|
|
150
|
+
// Carry the configured timeout as a label so dashboards
|
|
151
|
+
// can pivot a flapping pod between "we tuned the env var
|
|
152
|
+
// down" and "queries got slower".
|
|
153
|
+
ensureTimeoutTelemetry().add(1, { timeout_ms: timeoutMs });
|
|
154
|
+
throw new QueryTimeoutError(
|
|
155
|
+
`Query exceeded PUBLISHER_QUERY_TIMEOUT_MS (${timeoutMs}ms) and was aborted. Refine the query (add a more selective WHERE, lower LIMIT, or simplify joins) or raise the timeout.`,
|
|
156
|
+
);
|
|
157
|
+
}
|
|
158
|
+
throw error;
|
|
159
|
+
} finally {
|
|
160
|
+
clearTimeout(timer);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Sentinel attached to `AbortSignal.reason` when a publisher-issued
|
|
166
|
+
* query timeout fires. Currently consumed by `runWithQueryTimeout`
|
|
167
|
+
* itself (and by tests verifying the wiring); exported so future
|
|
168
|
+
* call sites composing this signal with their own (e.g. a custom
|
|
169
|
+
* driver wrapper) can write `if (signal.reason === PUBLISHER_QUERY_TIMEOUT_REASON)`
|
|
170
|
+
* to detect "this was the publisher's timeout, not the cap" without
|
|
171
|
+
* coupling to error-message strings. `runWithQueryTimeout`'s own
|
|
172
|
+
* timeout-vs-other-error distinction uses the local `timedOut`
|
|
173
|
+
* flag rather than this symbol, so consumers can rely on the
|
|
174
|
+
* symbol being attached even if the implementation changes.
|
|
175
|
+
*/
|
|
176
|
+
export const PUBLISHER_QUERY_TIMEOUT_REASON = Symbol.for(
|
|
177
|
+
"publisher.queryTimeout",
|
|
178
|
+
);
|
package/src/server-old.ts
CHANGED
|
@@ -41,7 +41,8 @@ import {
|
|
|
41
41
|
NotImplementedError,
|
|
42
42
|
} from "./errors";
|
|
43
43
|
import { logger } from "./logger";
|
|
44
|
-
import {
|
|
44
|
+
import { queryConcurrency } from "./query_concurrency";
|
|
45
|
+
import { normalizeQueryArray } from "./query_param_utils";
|
|
45
46
|
import { EnvironmentStore } from "./service/environment_store";
|
|
46
47
|
|
|
47
48
|
const LEGACY_API_PREFIX = "/api/v0";
|
|
@@ -459,8 +460,18 @@ export function registerLegacyRoutes(
|
|
|
459
460
|
|
|
460
461
|
// queryData (deprecated GET) + sqlQuery (supported POST), per-project +
|
|
461
462
|
// per-package
|
|
463
|
+
// Legacy `/projects/...` query routes keep the GET `queryData`
|
|
464
|
+
// endpoints (unlike the modern `/environments/...` surface, which
|
|
465
|
+
// removed them) so existing SDK clients are not broken. The
|
|
466
|
+
// missing protection is concurrency: without `queryConcurrency()`
|
|
467
|
+
// a flood of legacy clients can saturate the pod even while the
|
|
468
|
+
// modern routes are properly gated. Apply the same per-pod cap
|
|
469
|
+
// here so the legacy surface respects PUBLISHER_MAX_CONCURRENT_QUERIES.
|
|
470
|
+
// Admission, timeout, and row/byte caps are already enforced by
|
|
471
|
+
// the shared controllers downstream.
|
|
462
472
|
app.get(
|
|
463
473
|
`${LEGACY_API_PREFIX}/projects/:projectName/connections/:connectionName/queryData`,
|
|
474
|
+
queryConcurrency(),
|
|
464
475
|
async (req, res) => {
|
|
465
476
|
try {
|
|
466
477
|
res.status(200).json(
|
|
@@ -481,6 +492,7 @@ export function registerLegacyRoutes(
|
|
|
481
492
|
|
|
482
493
|
app.get(
|
|
483
494
|
`${LEGACY_API_PREFIX}/projects/:projectName/packages/:packageName/connections/:connectionName/queryData`,
|
|
495
|
+
queryConcurrency(),
|
|
484
496
|
async (req, res) => {
|
|
485
497
|
try {
|
|
486
498
|
res.status(200).json(
|
|
@@ -502,6 +514,7 @@ export function registerLegacyRoutes(
|
|
|
502
514
|
|
|
503
515
|
app.post(
|
|
504
516
|
`${LEGACY_API_PREFIX}/projects/:projectName/connections/:connectionName/sqlQuery`,
|
|
517
|
+
queryConcurrency(),
|
|
505
518
|
async (req, res) => {
|
|
506
519
|
try {
|
|
507
520
|
let options: string | ParsedQs | (string | ParsedQs)[] | undefined;
|
|
@@ -528,6 +541,7 @@ export function registerLegacyRoutes(
|
|
|
528
541
|
|
|
529
542
|
app.post(
|
|
530
543
|
`${LEGACY_API_PREFIX}/projects/:projectName/packages/:packageName/connections/:connectionName/sqlQuery`,
|
|
544
|
+
queryConcurrency(),
|
|
531
545
|
async (req, res) => {
|
|
532
546
|
try {
|
|
533
547
|
let options: string | ParsedQs | (string | ParsedQs)[] | undefined;
|
|
@@ -556,6 +570,7 @@ export function registerLegacyRoutes(
|
|
|
556
570
|
// temporaryTable (deprecated GET) + sqlTemporaryTable (supported POST)
|
|
557
571
|
app.get(
|
|
558
572
|
`${LEGACY_API_PREFIX}/projects/:projectName/connections/:connectionName/temporaryTable`,
|
|
573
|
+
queryConcurrency(),
|
|
559
574
|
async (req, res) => {
|
|
560
575
|
try {
|
|
561
576
|
res.status(200).json(
|
|
@@ -575,6 +590,7 @@ export function registerLegacyRoutes(
|
|
|
575
590
|
|
|
576
591
|
app.get(
|
|
577
592
|
`${LEGACY_API_PREFIX}/projects/:projectName/packages/:packageName/connections/:connectionName/temporaryTable`,
|
|
593
|
+
queryConcurrency(),
|
|
578
594
|
async (req, res) => {
|
|
579
595
|
try {
|
|
580
596
|
res.status(200).json(
|
|
@@ -595,6 +611,7 @@ export function registerLegacyRoutes(
|
|
|
595
611
|
|
|
596
612
|
app.post(
|
|
597
613
|
`${LEGACY_API_PREFIX}/projects/:projectName/connections/:connectionName/sqlTemporaryTable`,
|
|
614
|
+
queryConcurrency(),
|
|
598
615
|
async (req, res) => {
|
|
599
616
|
try {
|
|
600
617
|
res.status(200).json(
|
|
@@ -614,6 +631,7 @@ export function registerLegacyRoutes(
|
|
|
614
631
|
|
|
615
632
|
app.post(
|
|
616
633
|
`${LEGACY_API_PREFIX}/projects/:projectName/packages/:packageName/connections/:connectionName/sqlTemporaryTable`,
|
|
634
|
+
queryConcurrency(),
|
|
617
635
|
async (req, res) => {
|
|
618
636
|
try {
|
|
619
637
|
res.status(200).json(
|
|
@@ -780,6 +798,7 @@ export function registerLegacyRoutes(
|
|
|
780
798
|
|
|
781
799
|
app.post(
|
|
782
800
|
`${LEGACY_API_PREFIX}/projects/:projectName/packages/:packageName/models/*?/query`,
|
|
801
|
+
queryConcurrency(),
|
|
783
802
|
async (req, res) => {
|
|
784
803
|
if (req.body.versionId) {
|
|
785
804
|
setVersionIdError(res);
|
|
@@ -856,6 +875,7 @@ export function registerLegacyRoutes(
|
|
|
856
875
|
// Cell execution route comes BEFORE the general getNotebook wildcard
|
|
857
876
|
app.get(
|
|
858
877
|
`${LEGACY_API_PREFIX}/projects/:projectName/packages/:packageName/notebooks/*/cells/:cellIndex`,
|
|
878
|
+
queryConcurrency(),
|
|
859
879
|
async (req, res) => {
|
|
860
880
|
if (req.query.versionId) {
|
|
861
881
|
setVersionIdError(res);
|