@malloy-publisher/server 0.0.198 → 0.0.200

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/build.ts +30 -1
  2. package/dist/app/api-doc.yaml +127 -111
  3. package/dist/app/assets/{EnvironmentPage-C7rtH4mC.js → EnvironmentPage-CgKNjySu.js} +1 -1
  4. package/dist/app/assets/HomePage-BPIpMBjW.js +1 -0
  5. package/dist/app/assets/{MainPage-D38LtZDV.js → MainPage-CAwb8U82.js} +2 -2
  6. package/dist/app/assets/{ModelPage-DOol8Mz7.js → ModelPage-C0Uevsw9.js} +1 -1
  7. package/dist/app/assets/{PackagePage-0tgzA_kO.js → PackagePage-Cu-u9k1g.js} +1 -1
  8. package/dist/app/assets/{RouteError-BaMsOSly.js → RouteError-DVwPh2Ql.js} +1 -1
  9. package/dist/app/assets/{WorkbookPage-Cx4SePkx.js → WorkbookPage-DW38R2Zv.js} +1 -1
  10. package/dist/app/assets/{core-CbsC6R_Y.es-Cwf6asf3.js → core-C0vCMRDQ.es-D_ytHhjS.js} +10 -10
  11. package/dist/app/assets/{index-DL6BZTuw.js → index-BGdcKsFF.js} +1 -1
  12. package/dist/app/assets/{index-DNofXMxi.js → index-CTx4v4_3.js} +1 -1
  13. package/dist/app/assets/index-DE6d5jEy.js +452 -0
  14. package/dist/app/assets/{index.umd-B68wGGkM.js → index.umd-C1Mi1uRm.js} +1 -1
  15. package/dist/app/index.html +1 -1
  16. package/dist/instrumentation.mjs +57 -36
  17. package/dist/package_load_worker.mjs +12213 -0
  18. package/dist/server.mjs +4198 -3648
  19. package/package.json +2 -3
  20. package/src/config.spec.ts +246 -0
  21. package/src/config.ts +121 -1
  22. package/src/constants.ts +84 -1
  23. package/src/controller/compile.controller.ts +3 -1
  24. package/src/controller/connection.controller.spec.ts +803 -0
  25. package/src/controller/connection.controller.ts +207 -20
  26. package/src/controller/model.controller.ts +19 -1
  27. package/src/controller/query.controller.ts +22 -6
  28. package/src/controller/watch-mode.controller.ts +11 -2
  29. package/src/errors.spec.ts +44 -0
  30. package/src/errors.ts +34 -0
  31. package/src/health.spec.ts +90 -0
  32. package/src/health.ts +88 -45
  33. package/src/heap_check.spec.ts +144 -0
  34. package/src/heap_check.ts +144 -0
  35. package/src/instrumentation.ts +50 -0
  36. package/src/mcp/handler_utils.ts +14 -0
  37. package/src/mcp/tools/execute_query_tool.ts +52 -10
  38. package/src/oom_guards.integration.spec.ts +261 -0
  39. package/src/package_load/package_load_pool.spec.ts +252 -0
  40. package/src/package_load/package_load_pool.ts +920 -0
  41. package/src/package_load/package_load_worker.ts +980 -0
  42. package/src/package_load/protocol.ts +336 -0
  43. package/src/path_safety.ts +9 -3
  44. package/src/query_cap_metrics.spec.ts +89 -0
  45. package/src/query_cap_metrics.ts +115 -0
  46. package/src/query_concurrency.spec.ts +247 -0
  47. package/src/query_concurrency.ts +236 -0
  48. package/src/query_param_utils.ts +18 -0
  49. package/src/query_timeout.spec.ts +224 -0
  50. package/src/query_timeout.ts +178 -0
  51. package/src/server-old.ts +21 -1
  52. package/src/server.ts +61 -57
  53. package/src/service/connection.ts +8 -2
  54. package/src/service/db_utils.spec.ts +1 -1
  55. package/src/service/environment.ts +85 -4
  56. package/src/service/environment_admission.spec.ts +165 -1
  57. package/src/service/environment_store.spec.ts +103 -0
  58. package/src/service/environment_store.ts +98 -26
  59. package/src/service/filter_integration.spec.ts +110 -0
  60. package/src/service/given.ts +80 -0
  61. package/src/service/givens_integration.spec.ts +192 -0
  62. package/src/service/model.spec.ts +298 -3
  63. package/src/service/model.ts +362 -23
  64. package/src/service/model_limits.spec.ts +181 -0
  65. package/src/service/model_limits.ts +110 -0
  66. package/src/service/package.spec.ts +12 -6
  67. package/src/service/package.ts +263 -146
  68. package/src/service/package_worker_path.spec.ts +196 -0
  69. package/src/service/path_injection.spec.ts +39 -0
  70. package/src/stream_helpers.spec.ts +280 -0
  71. package/src/stream_helpers.ts +162 -0
  72. package/src/test_helpers/metrics_harness.ts +126 -0
  73. package/tests/integration/concurrent_package/concurrent_package.integration.spec.ts +280 -0
  74. package/dist/app/assets/HomePage-DwkH7OrS.js +0 -1
  75. package/dist/app/assets/index-U38AyjJL.js +0 -451
@@ -0,0 +1,336 @@
1
+ /**
2
+ * Wire protocol between the main thread (`PackageLoadPool`) and a
3
+ * package-load worker thread.
4
+ *
5
+ * Boundary
6
+ * --------
7
+ * The worker performs the **CPU-bound bulk of `Package.create`** off
8
+ * the main event loop:
9
+ *
10
+ * 1. Read `publisher.config.json` (cheap, but already on the worker
11
+ * side of the boundary so the main thread isn't blocked).
12
+ * 2. Compile every `.malloy` / `.malloynb` (the Malloy parser,
13
+ * type-checker, and IR-builder — the dominant CPU cost).
14
+ * 3. Return a structured-clonable POJO carrying every `modelDef`,
15
+ * `sourceInfos`, dataStyles, etc. that the main thread needs to
16
+ * reconstitute a live `Package`.
17
+ *
18
+ * Embedded database probing (`.parquet` / `.csv` schema + row count)
19
+ * stays on the main thread — it reuses the package's existing DuckDB
20
+ * connection (PR #772) and the probe queries are async-IO-bound, not
21
+ * CPU-bound. Keeping all native-DB handles on the main thread also
22
+ * sidesteps Bun crash 0x20131 where duckdb-native cannot be loaded
23
+ * into more than one isolate of the same process.
24
+ *
25
+ * The main thread reconstitutes by:
26
+ * - Building a fresh `MalloyConfig` against its own connection pool
27
+ * (live native handles can't cross the worker boundary).
28
+ * - Lazy-hydrating each model's `ModelMaterializer` from `modelDef`
29
+ * via `Runtime._loadModelFromModelDef` on first query — NO
30
+ * recompile. This is what closes the loop on PR #767's original
31
+ * "first-query recompile on main thread" gap.
32
+ *
33
+ * Per-model compile failures are returned in-band on
34
+ * `SerializedModel.compilationError` so a single bad model doesn't
35
+ * abort the rest of the package load. The main thread decides
36
+ * whether/when to surface as a fatal `Package.create` error (today
37
+ * it throws on the first error; `Package.reloadAllModels` keeps the
38
+ * failed models as placeholders in the package's model map).
39
+ *
40
+ * Whole-package failures (manifest missing, FS errors, worker
41
+ * crashes) come back as `LoadPackageError`. The pool main-thread
42
+ * half (`PackageLoadPool.loadPackage`) rejects with a deserialised
43
+ * Error; `Package.loadViaWorker` then rewraps any non-compile
44
+ * failure as `ServiceUnavailableError` so the HTTP layer responds
45
+ * 503 (transient, retryable) — there is no in-process fallback.
46
+ *
47
+ * Direction summary
48
+ * -----------------
49
+ * main ──▶ worker: LoadPackageRequest (start)
50
+ * worker ──▶ main: LoadPackageResult (success)
51
+ * worker ──▶ main: LoadPackageError (whole-package failure)
52
+ *
53
+ * worker ──▶ main: ConnectionMetadataRequest (proxy non-duckdb lookups)
54
+ * worker ──▶ main: SchemaForTablesRequest (proxy schema fetch)
55
+ * worker ──▶ main: SchemaForSqlRequest (proxy SQL block schema)
56
+ * worker ──▶ main: ReadUrlRequest (proxy non-file URL reads)
57
+ * main ──▶ worker: *Response (correlated by requestId)
58
+ *
59
+ * main ──▶ worker: ShutdownRequest (graceful drain)
60
+ * worker ──▶ main: ReadyMessage (post-init handshake)
61
+ *
62
+ * The protocol uses plain structured-clonable POJOs so the
63
+ * `postMessage` transfer goes through V8's structured clone — much
64
+ * cheaper than `JSON.stringify` for the multi-MB modelDef payloads.
65
+ */
66
+
67
+ import type {
68
+ Annotation,
69
+ SQLSourceDef,
70
+ TableSourceDef,
71
+ } from "@malloydata/malloy";
72
+
73
+ // ──────────────────────────────────────────────────────────────────────
74
+ // Direction: main ──▶ worker (load-package job)
75
+ // ──────────────────────────────────────────────────────────────────────
76
+
77
+ /**
78
+ * Connection metadata the worker needs to construct a stub
79
+ * `InfoConnection`. Resolved lazily — the worker asks the main thread
80
+ * on the first `lookupConnection(name)` call (see
81
+ * {@link ConnectionMetadataRequest}). We don't ship the full list
82
+ * upfront because Malloy only references connections by name as it
83
+ * encounters `<connection>.table('...')` / `<connection>.sql('...')`
84
+ * inside the model.
85
+ */
86
+ export interface ConnectionMetadata {
87
+ name: string;
88
+ dialectName: string;
89
+ digest: string;
90
+ }
91
+
92
+ export interface LoadPackageRequest {
93
+ type: "load-package";
94
+ requestId: string;
95
+ /** Absolute path to the package directory on disk. */
96
+ packagePath: string;
97
+ /** Logical package name (used in metric labels + log fields). */
98
+ packageName: string;
99
+ /**
100
+ * Default connection name (passed verbatim to the worker; today
101
+ * always `"duckdb"` for embedded packages, but kept configurable
102
+ * to mirror Malloy's own surface).
103
+ */
104
+ defaultConnectionName: string | null;
105
+ /** Optional row-build manifest passed through to Malloy Runtime. */
106
+ buildManifest?: unknown;
107
+ }
108
+
109
+ // ──────────────────────────────────────────────────────────────────────
110
+ // Direction: worker ──▶ main (load-package result)
111
+ // ──────────────────────────────────────────────────────────────────────
112
+
113
+ /**
114
+ * Wire shape for one compiled model in the package. Mirrors the
115
+ * data a main-thread `Model` constructor needs without holding a
116
+ * `ModelMaterializer` reference (that binds to live native
117
+ * connection handles and can't cross the worker boundary).
118
+ *
119
+ * `compilationError` is set when this single model failed to
120
+ * compile but the rest of the package is fine; the main thread
121
+ * decides whether to abort `Package.create`.
122
+ */
123
+ export interface SerializedModel {
124
+ /** Path relative to the package root, forward-slash normalized. */
125
+ modelPath: string;
126
+ modelType: "model" | "notebook";
127
+ /** Set when the model compiled successfully. Wire-typed as
128
+ * `unknown` so the protocol module doesn't drag in the full
129
+ * Malloy type surface; cast to `ModelDef` on receipt. */
130
+ modelDef?: unknown;
131
+ /**
132
+ * Precomputed `modelDefToModelInfo(modelDef)`. Shipped from the
133
+ * worker so the main-thread `Model` constructor doesn't pay the
134
+ * derivation cost on every package load and every subsequent
135
+ * `getModel()` / `getNotebook()` API hit can stringify a cached
136
+ * object instead of recomputing.
137
+ */
138
+ modelInfo?: unknown;
139
+ sourceInfos?: unknown[];
140
+ sources?: unknown[];
141
+ queries?: unknown[];
142
+ filterMap?: Array<[string, unknown[]]>;
143
+ givens?: unknown[];
144
+ /** Notebook (.malloynb) only — per-cell pre-extracted info. */
145
+ notebookCells?: SerializedNotebookCell[];
146
+ /** Accumulated dataStyles from sibling `.styles.json` files. */
147
+ dataStyles?: unknown;
148
+ /** Wall-clock ms spent compiling this single model in the worker. */
149
+ compileDurationMs?: number;
150
+ /** Set when the model failed to compile. */
151
+ compilationError?: SerializedError;
152
+ }
153
+
154
+ export interface SerializedNotebookCell {
155
+ type: "code" | "markdown";
156
+ /** Raw cell text. */
157
+ text: string;
158
+ /**
159
+ * Per-cell ModelDef captured at the cell's point in the
160
+ * `extendModel` chain. The main thread hydrates a per-cell
161
+ * `ModelMaterializer` from this via
162
+ * `Runtime._loadModelFromModelDef`, so cell-level filter
163
+ * refinement can compile new queries against the correct scope
164
+ * without ever recompiling the .malloynb itself.
165
+ */
166
+ cellModelDef?: unknown;
167
+ /**
168
+ * The final-query QueryDef for this cell, captured during the
169
+ * worker's compile. Main thread hydrates a `QueryMaterializer`
170
+ * via `ModelMaterializer._loadQueryFromQueryDef` — no recompile.
171
+ */
172
+ cellQueryDef?: unknown;
173
+ newSources?: unknown[];
174
+ queryInfo?: unknown;
175
+ }
176
+
177
+ export interface LoadPackageResult {
178
+ type: "load-package-result";
179
+ requestId: string;
180
+ packageMetadata: { name?: string; description?: string };
181
+ models: SerializedModel[];
182
+ /** Wall-clock ms inside the worker for the full package load. */
183
+ loadDurationMs: number;
184
+ }
185
+
186
+ export interface LoadPackageError {
187
+ type: "load-package-error";
188
+ requestId: string;
189
+ error: SerializedError;
190
+ }
191
+
192
+ /**
193
+ * Error wire-shape. We cannot transfer `Error` instances directly
194
+ * across `postMessage` cleanly (Bun/Node behaviour diverges on stack
195
+ * propagation), so we ship a structured payload and reconstitute on
196
+ * the main thread.
197
+ */
198
+ export interface SerializedError {
199
+ name: string;
200
+ message: string;
201
+ stack?: string;
202
+ /** Set when the error originated as a Malloy `MalloyError`. */
203
+ malloyProblems?: unknown[];
204
+ /** Set when the error originated as `ModelCompilationError`. */
205
+ isCompilationError?: boolean;
206
+ }
207
+
208
+ // ──────────────────────────────────────────────────────────────────────
209
+ // Direction: worker ──▶ main (proxy connection metadata)
210
+ // ──────────────────────────────────────────────────────────────────────
211
+
212
+ export interface ConnectionMetadataRequest {
213
+ type: "connection-metadata";
214
+ requestId: string;
215
+ jobId: string;
216
+ connectionName: string;
217
+ }
218
+
219
+ export interface ConnectionMetadataResponse {
220
+ type: "connection-metadata-response";
221
+ requestId: string;
222
+ ok: true;
223
+ metadata: ConnectionMetadata;
224
+ }
225
+
226
+ // ──────────────────────────────────────────────────────────────────────
227
+ // Direction: worker ──▶ main (proxy schema fetches for non-duckdb)
228
+ // ──────────────────────────────────────────────────────────────────────
229
+
230
+ export interface SchemaForTablesRequest {
231
+ type: "schema-for-tables";
232
+ requestId: string;
233
+ /** Job this RPC belongs to (so main routes to the right config). */
234
+ jobId: string;
235
+ connectionName: string;
236
+ tables: Record<string, string>;
237
+ options: {
238
+ refreshTimestamp?: number;
239
+ modelAnnotation?: Annotation;
240
+ };
241
+ }
242
+
243
+ export interface SchemaForTablesResponse {
244
+ type: "schema-for-tables-response";
245
+ requestId: string;
246
+ ok: true;
247
+ schemas: Record<string, TableSourceDef>;
248
+ errors: Record<string, string>;
249
+ }
250
+
251
+ export interface SchemaForSqlRequest {
252
+ type: "schema-for-sql";
253
+ requestId: string;
254
+ jobId: string;
255
+ connectionName: string;
256
+ sentence: unknown;
257
+ options: {
258
+ refreshTimestamp?: number;
259
+ modelAnnotation?: Annotation;
260
+ };
261
+ }
262
+
263
+ export interface SchemaForSqlResponse {
264
+ type: "schema-for-sql-response";
265
+ requestId: string;
266
+ ok: true;
267
+ structDef?: SQLSourceDef;
268
+ error?: string;
269
+ }
270
+
271
+ export interface RpcErrorResponse {
272
+ type: "rpc-error";
273
+ requestId: string;
274
+ ok: false;
275
+ error: SerializedError;
276
+ }
277
+
278
+ // ──────────────────────────────────────────────────────────────────────
279
+ // Direction: worker ──▶ main (file read for non-file URLs)
280
+ // ──────────────────────────────────────────────────────────────────────
281
+
282
+ /**
283
+ * Workers read most files directly via `fs` (they share the host's
284
+ * filesystem namespace). This RPC exists for the rare case where the
285
+ * package URL reader has host-specific behaviour (e.g. virtual files,
286
+ * remote URLs) — we delegate back to the main thread's URL reader so
287
+ * compile semantics stay identical to the in-process path.
288
+ */
289
+ export interface ReadUrlRequest {
290
+ type: "read-url";
291
+ requestId: string;
292
+ jobId: string;
293
+ url: string;
294
+ }
295
+
296
+ export interface ReadUrlResponse {
297
+ type: "read-url-response";
298
+ requestId: string;
299
+ ok: true;
300
+ contents: string;
301
+ invalidationKey?: string | number | null;
302
+ }
303
+
304
+ // ──────────────────────────────────────────────────────────────────────
305
+ // Lifecycle
306
+ // ──────────────────────────────────────────────────────────────────────
307
+
308
+ export interface ShutdownRequest {
309
+ type: "shutdown";
310
+ }
311
+
312
+ export interface ReadyMessage {
313
+ type: "ready";
314
+ }
315
+
316
+ // ──────────────────────────────────────────────────────────────────────
317
+ // Union types for routing
318
+ // ──────────────────────────────────────────────────────────────────────
319
+
320
+ export type MainToWorkerMessage =
321
+ | LoadPackageRequest
322
+ | ConnectionMetadataResponse
323
+ | SchemaForTablesResponse
324
+ | SchemaForSqlResponse
325
+ | ReadUrlResponse
326
+ | RpcErrorResponse
327
+ | ShutdownRequest;
328
+
329
+ export type WorkerToMainMessage =
330
+ | LoadPackageResult
331
+ | LoadPackageError
332
+ | ConnectionMetadataRequest
333
+ | SchemaForTablesRequest
334
+ | SchemaForSqlRequest
335
+ | ReadUrlRequest
336
+ | ReadyMessage;
@@ -44,7 +44,9 @@ const MAX_ENVIRONMENT_PATH_LEN = 4096;
44
44
  * production package name we've seen fits within it, and tightening
45
45
  * here costs nothing.
46
46
  */
47
- export function assertSafePackageName(packageName: unknown): void {
47
+ export function assertSafePackageName(
48
+ packageName: unknown,
49
+ ): asserts packageName is string {
48
50
  if (typeof packageName !== "string" || !SAFE_NAME_RE.test(packageName)) {
49
51
  throw new BadRequestError(
50
52
  `Invalid package name: must be 1-255 characters of letters, digits, "-", "_", or "." and must not start with "."`,
@@ -58,7 +60,9 @@ export function assertSafePackageName(packageName: unknown): void {
58
60
  * live in subdirectories like `models/foo.malloy`); backslashes,
59
61
  * absolute paths, NUL bytes, and `..` / `.` segments are not.
60
62
  */
61
- export function assertSafeRelativeModelPath(modelPath: unknown): void {
63
+ export function assertSafeRelativeModelPath(
64
+ modelPath: unknown,
65
+ ): asserts modelPath is string {
62
66
  if (
63
67
  typeof modelPath !== "string" ||
64
68
  modelPath.length === 0 ||
@@ -90,7 +94,9 @@ export function assertSafeRelativeModelPath(modelPath: unknown): void {
90
94
  * sanitizer-barrier pattern CodeQL's `js/path-injection` query
91
95
  * recognises.
92
96
  */
93
- export function assertSafeEnvironmentPath(environmentPath: unknown): void {
97
+ export function assertSafeEnvironmentPath(
98
+ environmentPath: unknown,
99
+ ): asserts environmentPath is string {
94
100
  if (typeof environmentPath !== "string") {
95
101
  throw new BadRequestError(`Invalid environment path: must be a string`);
96
102
  }
@@ -0,0 +1,89 @@
1
+ import { afterEach, beforeEach, describe, expect, it } from "bun:test";
2
+
3
+ import {
4
+ recordQueryCapExceeded,
5
+ resetQueryCapTelemetryForTesting,
6
+ } from "./query_cap_metrics";
7
+ import {
8
+ startMetricsHarness,
9
+ type MetricsHarness,
10
+ } from "./test_helpers/metrics_harness";
11
+
12
+ describe("query_cap_metrics", () => {
13
+ let harness: MetricsHarness;
14
+
15
+ beforeEach(async () => {
16
+ harness = await startMetricsHarness();
17
+ // Drop cached instruments so they re-init against the new
18
+ // provider; otherwise this test's writes go to a counter
19
+ // bound to the previous provider's reader.
20
+ resetQueryCapTelemetryForTesting();
21
+ });
22
+
23
+ afterEach(async () => {
24
+ delete process.env.PUBLISHER_MAX_QUERY_ROWS;
25
+ delete process.env.PUBLISHER_MAX_RESPONSE_BYTES;
26
+ resetQueryCapTelemetryForTesting();
27
+ await harness.shutdown();
28
+ });
29
+
30
+ it("publisher_query_cap_exceeded_total ticks per call, labeled by cap_type and source", async () => {
31
+ recordQueryCapExceeded("rows", "connection_sql");
32
+ recordQueryCapExceeded("rows", "connection_sql");
33
+ recordQueryCapExceeded("bytes", "model_query");
34
+ recordQueryCapExceeded("rows", "notebook_cell");
35
+
36
+ expect(
37
+ await harness.collectCounter("publisher_query_cap_exceeded_total", {
38
+ cap_type: "rows",
39
+ source: "connection_sql",
40
+ }),
41
+ ).toBe(2);
42
+ expect(
43
+ await harness.collectCounter("publisher_query_cap_exceeded_total", {
44
+ cap_type: "bytes",
45
+ source: "model_query",
46
+ }),
47
+ ).toBe(1);
48
+ expect(
49
+ await harness.collectCounter("publisher_query_cap_exceeded_total", {
50
+ cap_type: "rows",
51
+ source: "notebook_cell",
52
+ }),
53
+ ).toBe(1);
54
+ });
55
+
56
+ it("publisher_max_query_rows gauge reports the live env-var value", async () => {
57
+ process.env.PUBLISHER_MAX_QUERY_ROWS = "12345";
58
+ // Prime telemetry — the gauges install on the first
59
+ // counter-emitting call (`recordQueryCapExceeded`); in
60
+ // production that's the first 413, in tests we trigger it
61
+ // explicitly so the gauge is observable without a 413.
62
+ recordQueryCapExceeded("rows", "connection_sql");
63
+ expect(await harness.collectGauge("publisher_max_query_rows")).toBe(
64
+ 12345,
65
+ );
66
+ });
67
+
68
+ it("publisher_max_response_bytes gauge reports the live env-var value", async () => {
69
+ process.env.PUBLISHER_MAX_RESPONSE_BYTES = "9876543";
70
+ recordQueryCapExceeded("bytes", "connection_sql");
71
+ expect(await harness.collectGauge("publisher_max_response_bytes")).toBe(
72
+ 9876543,
73
+ );
74
+ });
75
+
76
+ it("publisher_max_query_rows gauge reports 0 when the cap is opted out", async () => {
77
+ process.env.PUBLISHER_MAX_QUERY_ROWS = "0";
78
+ recordQueryCapExceeded("rows", "connection_sql");
79
+ expect(await harness.collectGauge("publisher_max_query_rows")).toBe(0);
80
+ });
81
+
82
+ it("publisher_max_query_rows gauge reports -1 on misconfig so dashboards reveal the bad value", async () => {
83
+ process.env.PUBLISHER_MAX_QUERY_ROWS = "not-a-number";
84
+ // Misconfig must not crash the scrape; -1 is the agreed
85
+ // signal mirroring `publisher_query_timeout_ms`.
86
+ recordQueryCapExceeded("rows", "connection_sql");
87
+ expect(await harness.collectGauge("publisher_max_query_rows")).toBe(-1);
88
+ });
89
+ });
@@ -0,0 +1,115 @@
1
+ /**
2
+ * Centralized telemetry for row-cap / byte-cap rejections (HTTP 413).
3
+ *
4
+ * Without this, operators tuning {@link PUBLISHER_MAX_QUERY_ROWS} /
5
+ * {@link PUBLISHER_MAX_RESPONSE_BYTES} can only see undifferentiated
6
+ * `http_server_requests_total{status_code="413"}` — they can't tell
7
+ * which cap is firing or which query surface is hottest. The counter
8
+ * here carries `cap_type` (`rows` / `bytes`) and `source`
9
+ * (`connection_sql` / `model_query` / `notebook_cell`) so a single
10
+ * dashboard panel can answer "what should I tune and on which
11
+ * endpoint?".
12
+ *
13
+ * Observable gauges expose the current effective caps so dashboards
14
+ * can render `actual_rows_returned / max_rows` utilization without
15
+ * a separate config feed — same pattern the memory governor uses
16
+ * for high/low water bytes and the concurrency middleware uses for
17
+ * its slot cap.
18
+ *
19
+ * Lazy init for the same reason as `query_timeout.ts` /
20
+ * `query_concurrency.ts`: instruments created before
21
+ * `setGlobalMeterProvider` bind to a NoOp meter
22
+ * (https://github.com/open-telemetry/opentelemetry-js/issues/3505).
23
+ * The first throw site initializes the counter; the gauges are
24
+ * installed alongside on the same call so the production hot path
25
+ * is one boolean check after the first 413.
26
+ */
27
+
28
+ import { metrics, type Counter } from "@opentelemetry/api";
29
+
30
+ import { getMaxQueryRows, getMaxResponseBytes } from "./config";
31
+
32
+ export type QueryCapType = "rows" | "bytes";
33
+ export type QueryCapSource = "connection_sql" | "model_query" | "notebook_cell";
34
+
35
+ let capExceededCounter: Counter | null = null;
36
+ let configGaugesInstalled = false;
37
+
38
+ function ensureCapTelemetry(): Counter {
39
+ if (capExceededCounter && configGaugesInstalled) {
40
+ return capExceededCounter;
41
+ }
42
+ const meter = metrics.getMeter("publisher");
43
+ if (!capExceededCounter) {
44
+ capExceededCounter = meter.createCounter(
45
+ "publisher_query_cap_exceeded_total",
46
+ {
47
+ description:
48
+ "Queries rejected with 413 because the row or byte cap was exceeded. Labels: cap_type ('rows'|'bytes'), source ('connection_sql'|'model_query'|'notebook_cell').",
49
+ },
50
+ );
51
+ }
52
+ if (!configGaugesInstalled) {
53
+ // Live config readouts so dashboards can render
54
+ // "actual / max" utilization for the row and byte caps the
55
+ // same way `publisher_memory_*_bytes` does for the governor.
56
+ // Read on every scrape so a runtime env-var change is
57
+ // visible without a restart; an env-var parse failure
58
+ // reports -1 so misconfig is visible rather than silently
59
+ // dropped (mirrors `publisher_query_timeout_ms`).
60
+ meter
61
+ .createObservableGauge("publisher_max_query_rows", {
62
+ description:
63
+ "Current effective PUBLISHER_MAX_QUERY_ROWS cap (0 = disabled, -1 = misconfigured)",
64
+ })
65
+ .addCallback((observation) => {
66
+ try {
67
+ observation.observe(getMaxQueryRows());
68
+ } catch {
69
+ observation.observe(-1);
70
+ }
71
+ });
72
+ meter
73
+ .createObservableGauge("publisher_max_response_bytes", {
74
+ description:
75
+ "Current effective PUBLISHER_MAX_RESPONSE_BYTES cap (0 = disabled, -1 = misconfigured)",
76
+ unit: "By",
77
+ })
78
+ .addCallback((observation) => {
79
+ try {
80
+ observation.observe(getMaxResponseBytes());
81
+ } catch {
82
+ observation.observe(-1);
83
+ }
84
+ });
85
+ configGaugesInstalled = true;
86
+ }
87
+ return capExceededCounter;
88
+ }
89
+
90
+ /**
91
+ * Record a single 413 cap-exceeded event. Call BEFORE throwing
92
+ * `PayloadTooLargeError` so the metric ticks even if a downstream
93
+ * `catch` swallows the error (MCP tools surface failures as content
94
+ * payloads rather than letting them bubble to the HTTP error
95
+ * mapper).
96
+ *
97
+ * `cap_type` must be one of `rows` / `bytes`; `source` identifies
98
+ * the query surface that detected the overflow.
99
+ */
100
+ export function recordQueryCapExceeded(
101
+ capType: QueryCapType,
102
+ source: QueryCapSource,
103
+ ): void {
104
+ ensureCapTelemetry().add(1, { cap_type: capType, source });
105
+ }
106
+
107
+ /**
108
+ * Visible for tests. Drops the cached instruments so a fresh
109
+ * `MeterProvider` (installed via `startMetricsHarness`) can capture
110
+ * future emissions. Do NOT call from production code.
111
+ */
112
+ export function resetQueryCapTelemetryForTesting(): void {
113
+ capExceededCounter = null;
114
+ configGaugesInstalled = false;
115
+ }