@malloy-publisher/server 0.0.198 → 0.0.200

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/build.ts +30 -1
  2. package/dist/app/api-doc.yaml +127 -111
  3. package/dist/app/assets/{EnvironmentPage-C7rtH4mC.js → EnvironmentPage-CgKNjySu.js} +1 -1
  4. package/dist/app/assets/HomePage-BPIpMBjW.js +1 -0
  5. package/dist/app/assets/{MainPage-D38LtZDV.js → MainPage-CAwb8U82.js} +2 -2
  6. package/dist/app/assets/{ModelPage-DOol8Mz7.js → ModelPage-C0Uevsw9.js} +1 -1
  7. package/dist/app/assets/{PackagePage-0tgzA_kO.js → PackagePage-Cu-u9k1g.js} +1 -1
  8. package/dist/app/assets/{RouteError-BaMsOSly.js → RouteError-DVwPh2Ql.js} +1 -1
  9. package/dist/app/assets/{WorkbookPage-Cx4SePkx.js → WorkbookPage-DW38R2Zv.js} +1 -1
  10. package/dist/app/assets/{core-CbsC6R_Y.es-Cwf6asf3.js → core-C0vCMRDQ.es-D_ytHhjS.js} +10 -10
  11. package/dist/app/assets/{index-DL6BZTuw.js → index-BGdcKsFF.js} +1 -1
  12. package/dist/app/assets/{index-DNofXMxi.js → index-CTx4v4_3.js} +1 -1
  13. package/dist/app/assets/index-DE6d5jEy.js +452 -0
  14. package/dist/app/assets/{index.umd-B68wGGkM.js → index.umd-C1Mi1uRm.js} +1 -1
  15. package/dist/app/index.html +1 -1
  16. package/dist/instrumentation.mjs +57 -36
  17. package/dist/package_load_worker.mjs +12213 -0
  18. package/dist/server.mjs +4198 -3648
  19. package/package.json +2 -3
  20. package/src/config.spec.ts +246 -0
  21. package/src/config.ts +121 -1
  22. package/src/constants.ts +84 -1
  23. package/src/controller/compile.controller.ts +3 -1
  24. package/src/controller/connection.controller.spec.ts +803 -0
  25. package/src/controller/connection.controller.ts +207 -20
  26. package/src/controller/model.controller.ts +19 -1
  27. package/src/controller/query.controller.ts +22 -6
  28. package/src/controller/watch-mode.controller.ts +11 -2
  29. package/src/errors.spec.ts +44 -0
  30. package/src/errors.ts +34 -0
  31. package/src/health.spec.ts +90 -0
  32. package/src/health.ts +88 -45
  33. package/src/heap_check.spec.ts +144 -0
  34. package/src/heap_check.ts +144 -0
  35. package/src/instrumentation.ts +50 -0
  36. package/src/mcp/handler_utils.ts +14 -0
  37. package/src/mcp/tools/execute_query_tool.ts +52 -10
  38. package/src/oom_guards.integration.spec.ts +261 -0
  39. package/src/package_load/package_load_pool.spec.ts +252 -0
  40. package/src/package_load/package_load_pool.ts +920 -0
  41. package/src/package_load/package_load_worker.ts +980 -0
  42. package/src/package_load/protocol.ts +336 -0
  43. package/src/path_safety.ts +9 -3
  44. package/src/query_cap_metrics.spec.ts +89 -0
  45. package/src/query_cap_metrics.ts +115 -0
  46. package/src/query_concurrency.spec.ts +247 -0
  47. package/src/query_concurrency.ts +236 -0
  48. package/src/query_param_utils.ts +18 -0
  49. package/src/query_timeout.spec.ts +224 -0
  50. package/src/query_timeout.ts +178 -0
  51. package/src/server-old.ts +21 -1
  52. package/src/server.ts +61 -57
  53. package/src/service/connection.ts +8 -2
  54. package/src/service/db_utils.spec.ts +1 -1
  55. package/src/service/environment.ts +85 -4
  56. package/src/service/environment_admission.spec.ts +165 -1
  57. package/src/service/environment_store.spec.ts +103 -0
  58. package/src/service/environment_store.ts +98 -26
  59. package/src/service/filter_integration.spec.ts +110 -0
  60. package/src/service/given.ts +80 -0
  61. package/src/service/givens_integration.spec.ts +192 -0
  62. package/src/service/model.spec.ts +298 -3
  63. package/src/service/model.ts +362 -23
  64. package/src/service/model_limits.spec.ts +181 -0
  65. package/src/service/model_limits.ts +110 -0
  66. package/src/service/package.spec.ts +12 -6
  67. package/src/service/package.ts +263 -146
  68. package/src/service/package_worker_path.spec.ts +196 -0
  69. package/src/service/path_injection.spec.ts +39 -0
  70. package/src/stream_helpers.spec.ts +280 -0
  71. package/src/stream_helpers.ts +162 -0
  72. package/src/test_helpers/metrics_harness.ts +126 -0
  73. package/tests/integration/concurrent_package/concurrent_package.integration.spec.ts +280 -0
  74. package/dist/app/assets/HomePage-DwkH7OrS.js +0 -1
  75. package/dist/app/assets/index-U38AyjJL.js +0 -451
package/src/server.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  // Pre-load the instrumentation module; the instrumentation module must be loaded before the other imports.
2
+ import type { GivenValue } from "@malloydata/malloy";
2
3
  import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
3
4
  import bodyParser from "body-parser";
4
5
  import cors from "cors";
@@ -34,6 +35,8 @@ import {
34
35
  import { logger, loggerMiddleware } from "./logger";
35
36
 
36
37
  import { getMemoryGovernorConfig } from "./config";
38
+ import { checkHeapConfiguration } from "./heap_check";
39
+ import { queryConcurrency } from "./query_concurrency";
37
40
  import { ManifestController } from "./controller/manifest.controller";
38
41
  import { MaterializationController } from "./controller/materialization.controller";
39
42
  import { initializeMcpServer } from "./mcp/server";
@@ -41,14 +44,10 @@ import { registerLegacyRoutes } from "./server-old";
41
44
  import { EnvironmentStore } from "./service/environment_store";
42
45
  import { ManifestService } from "./service/manifest_service";
43
46
  import { MaterializationService } from "./service/materialization_service";
47
+ import { normalizeQueryArray } from "./query_param_utils";
44
48
  import { PackageMemoryGovernor } from "./service/package_memory_governor";
45
49
 
46
- /** Normalize an Express query param into a string[] or undefined. */
47
- export function normalizeQueryArray(value: unknown): string[] | undefined {
48
- if (value === undefined || value === null) return undefined;
49
- if (Array.isArray(value)) return value.map(String);
50
- return [String(value)];
51
- }
50
+ export { normalizeQueryArray } from "./query_param_utils";
52
51
 
53
52
  // Parse command line arguments
54
53
  function parseArgs() {
@@ -122,10 +121,12 @@ function parseArgs() {
122
121
  // Zero-config invocation (`npx @malloy-publisher/server`) opts in to
123
122
  // the bundled DuckDB-only sample config so the Quick Start works
124
123
  // without any flags. Any explicit --server_root or --config disables
125
- // this — the user told us where to look. Skip in NODE_ENV=test so
126
- // specs that import this module for utility helpers (e.g.
127
- // db_utils.spec.ts -> normalizeQueryArray) don't get the bundled
128
- // default leaked into their EnvironmentStore construction.
124
+ // this — the user told us where to look. Skip in NODE_ENV=test as a
125
+ // belt-and-suspenders so any spec that ends up evaluating this
126
+ // module doesn't accidentally pin the EnvironmentStore to the
127
+ // bundled malloy-samples config; query-param helpers have been
128
+ // moved to `./query_param_utils` precisely so unit specs no longer
129
+ // need to import this module at all.
129
130
  if (!sawServerRoot && !sawConfig && process.env.NODE_ENV !== "test") {
130
131
  process.env.PUBLISHER_USE_BUNDLED_DEFAULT = "true";
131
132
  }
@@ -155,6 +156,12 @@ const isDevelopment = process.env["NODE_ENV"] === "development";
155
156
  export const app = express();
156
157
  app.use(loggerMiddleware);
157
158
  app.use(httpMetricsMiddleware);
159
+ // Probe the V8 heap ceiling once at startup and warn if it's below
160
+ // the recommended floor. The row/byte caps from Steps 1–3 still
161
+ // bound per-request memory; this is a "your --max-old-space-size
162
+ // looks low for the default caps" advisory so operators don't
163
+ // chase OOMKills before checking the obvious config.
164
+ checkHeapConfiguration();
158
165
  const environmentStore = new EnvironmentStore(SERVER_ROOT);
159
166
  const manifestService = new ManifestService(environmentStore);
160
167
  const watchModeController = new WatchModeController(environmentStore);
@@ -714,55 +721,18 @@ app.post(
714
721
  },
715
722
  );
716
723
 
717
- /**
718
- * @deprecated Use /environments/:environmentName/connections/:connectionName/sqlQuery POST method instead
719
- */
720
- app.get(
721
- `${API_PREFIX}/environments/:environmentName/connections/:connectionName/queryData`,
722
- async (req, res) => {
723
- try {
724
- res.status(200).json(
725
- await connectionController.getConnectionQueryData(
726
- req.params.environmentName,
727
- req.params.connectionName,
728
- req.query.sqlStatement as string,
729
- req.query.options as string,
730
- ),
731
- );
732
- } catch (error) {
733
- logger.error(error);
734
- const { json, status } = internalErrorToHttpError(error as Error);
735
- res.status(status).json(json);
736
- }
737
- },
738
- );
739
-
740
- /**
741
- * @deprecated Use /environments/:environmentName/packages/:packageName/connections/:connectionName/sqlQuery
742
- */
743
- app.get(
744
- `${API_PREFIX}/environments/:environmentName/packages/:packageName/connections/:connectionName/queryData`,
745
- async (req, res) => {
746
- try {
747
- res.status(200).json(
748
- await connectionController.getConnectionQueryData(
749
- req.params.environmentName,
750
- req.params.connectionName,
751
- req.query.sqlStatement as string,
752
- req.query.options as string,
753
- req.params.packageName,
754
- ),
755
- );
756
- } catch (error) {
757
- logger.error(error);
758
- const { json, status } = internalErrorToHttpError(error as Error);
759
- res.status(status).json(json);
760
- }
761
- },
762
- );
763
-
724
+ // NOTE: The deprecated `GET …/connections/:connectionName/queryData`
725
+ // and `GET …/packages/:packageName/connections/:connectionName/queryData`
726
+ // routes were removed in the operational-guards changeset.
727
+ // They had been marked `@deprecated` for several releases; clients
728
+ // must now use the POST `…/sqlQuery` endpoints below, which take the
729
+ // SQL in the request body so the row/byte caps and query-timeout
730
+ // signals introduced in the OOM-mitigation work apply uniformly.
731
+ // The legacy `GET /projects/…/queryData` twins under `server-old.ts`
732
+ // remain in place for now.
764
733
  app.post(
765
734
  `${API_PREFIX}/environments/:environmentName/connections/:connectionName/sqlQuery`,
735
+ queryConcurrency(),
766
736
  async (req, res) => {
767
737
  try {
768
738
  let options: string | ParsedQs | (string | ParsedQs)[] | undefined;
@@ -792,6 +762,7 @@ app.post(
792
762
 
793
763
  app.post(
794
764
  `${API_PREFIX}/environments/:environmentName/packages/:packageName/connections/:connectionName/sqlQuery`,
765
+ queryConcurrency(),
795
766
  async (req, res) => {
796
767
  try {
797
768
  let options: string | ParsedQs | (string | ParsedQs)[] | undefined;
@@ -822,6 +793,7 @@ app.post(
822
793
  */
823
794
  app.get(
824
795
  `${API_PREFIX}/environments/:environmentName/connections/:connectionName/temporaryTable`,
796
+ queryConcurrency(),
825
797
  async (req, res) => {
826
798
  try {
827
799
  res.status(200).json(
@@ -844,6 +816,7 @@ app.get(
844
816
  */
845
817
  app.get(
846
818
  `${API_PREFIX}/environments/:environmentName/packages/:packageName/connections/:connectionName/temporaryTable`,
819
+ queryConcurrency(),
847
820
  async (req, res) => {
848
821
  try {
849
822
  res.status(200).json(
@@ -864,6 +837,7 @@ app.get(
864
837
 
865
838
  app.post(
866
839
  `${API_PREFIX}/environments/:environmentName/connections/:connectionName/sqlTemporaryTable`,
840
+ queryConcurrency(),
867
841
  async (req, res) => {
868
842
  try {
869
843
  res.status(200).json(
@@ -883,6 +857,7 @@ app.post(
883
857
 
884
858
  app.post(
885
859
  `${API_PREFIX}/environments/:environmentName/packages/:packageName/connections/:connectionName/sqlTemporaryTable`,
860
+ queryConcurrency(),
886
861
  async (req, res) => {
887
862
  try {
888
863
  res.status(200).json(
@@ -1077,6 +1052,7 @@ app.get(
1077
1052
  // to avoid the wildcard matching incorrectly
1078
1053
  app.get(
1079
1054
  `${API_PREFIX}/environments/:environmentName/packages/:packageName/notebooks/*/cells/:cellIndex`,
1055
+ queryConcurrency(),
1080
1056
  async (req, res) => {
1081
1057
  if (req.query.versionId) {
1082
1058
  setVersionIdError(res);
@@ -1110,6 +1086,18 @@ app.get(
1110
1086
  const bypassFilters =
1111
1087
  req.query.bypass_filters === "true" ? true : undefined;
1112
1088
 
1089
+ let givens: Record<string, GivenValue> | undefined;
1090
+ if (typeof req.query.givens === "string") {
1091
+ try {
1092
+ givens = JSON.parse(req.query.givens);
1093
+ } catch {
1094
+ res.status(400).json({
1095
+ error: "Invalid givens: must be valid JSON",
1096
+ });
1097
+ return;
1098
+ }
1099
+ }
1100
+
1113
1101
  res.status(200).json(
1114
1102
  await modelController.executeNotebookCell(
1115
1103
  req.params.environmentName,
@@ -1118,6 +1106,7 @@ app.get(
1118
1106
  cellIndex,
1119
1107
  filterParams,
1120
1108
  bypassFilters,
1109
+ givens,
1121
1110
  ),
1122
1111
  );
1123
1112
  } catch (error) {
@@ -1156,6 +1145,7 @@ app.get(
1156
1145
 
1157
1146
  app.post(
1158
1147
  `${API_PREFIX}/environments/:environmentName/packages/:packageName/models/*?/query`,
1148
+ queryConcurrency(),
1159
1149
  async (req, res) => {
1160
1150
  if (req.body.versionId) {
1161
1151
  setVersionIdError(res);
@@ -1178,6 +1168,7 @@ app.post(
1178
1168
  | Record<string, string | string[]>
1179
1169
  | undefined,
1180
1170
  req.body.bypassFilters === true ? true : undefined,
1171
+ req.body.givens as Record<string, GivenValue> | undefined,
1181
1172
  ),
1182
1173
  );
1183
1174
  } catch (error) {
@@ -1221,6 +1212,7 @@ app.post(
1221
1212
  req.params.modelName,
1222
1213
  req.body.source,
1223
1214
  req.body.includeSql === true,
1215
+ req.body.givens as Record<string, GivenValue> | undefined,
1224
1216
  );
1225
1217
  res.status(200).json(result);
1226
1218
  } catch (error) {
@@ -1431,6 +1423,18 @@ app.use(
1431
1423
  },
1432
1424
  );
1433
1425
 
1426
+ // Eagerly construct the package-load worker pool so we fail fast at
1427
+ // boot if PACKAGE_LOAD_WORKERS is misconfigured (e.g. set to 0, the
1428
+ // removed in-process fallback). Surfacing the bad config here is much
1429
+ // friendlier than surfacing it on the first package load, which could
1430
+ // be hours after start.
1431
+ {
1432
+ const { getPackageLoadPool } = await import(
1433
+ "./package_load/package_load_pool"
1434
+ );
1435
+ getPackageLoadPool();
1436
+ }
1437
+
1434
1438
  const mainServer = http.createServer({ maxHeaderSize: 262144 }, app);
1435
1439
 
1436
1440
  mainServer.timeout = 600000;
@@ -22,10 +22,14 @@ import {
22
22
  import type { LookupConnection } from "@malloydata/malloy/connection";
23
23
  import { AxiosError } from "axios";
24
24
  import fs from "fs/promises";
25
- import path from "path";
26
25
  import { components } from "../api";
27
26
  import { logAxiosError, logger } from "../logger";
28
27
  import { redactPgSecrets } from "../pg_helpers";
28
+ import {
29
+ assertSafeEnvironmentPath,
30
+ assertSafePackageName,
31
+ safeJoinUnderRoot,
32
+ } from "../path_safety";
29
33
  import {
30
34
  assembleEnvironmentConnections,
31
35
  CoreConnectionEntry,
@@ -814,7 +818,9 @@ export async function deleteDuckLakeConnectionFile(
814
818
  connectionName: string,
815
819
  environmentPath: string,
816
820
  ): Promise<void> {
817
- const ducklakePath = path.join(
821
+ assertSafePackageName(connectionName);
822
+ assertSafeEnvironmentPath(environmentPath);
823
+ const ducklakePath = safeJoinUnderRoot(
818
824
  environmentPath,
819
825
  `${connectionName}_ducklake.duckdb`,
820
826
  );
@@ -12,7 +12,7 @@ mock.module("@google-cloud/bigquery", () => ({
12
12
  }));
13
13
 
14
14
  import { Connection } from "@malloydata/malloy";
15
- import { normalizeQueryArray } from "../server";
15
+ import { normalizeQueryArray } from "../query_param_utils";
16
16
  import {
17
17
  extractErrorDataFromError,
18
18
  getSchemasForConnection,
@@ -1,5 +1,6 @@
1
- import type { LogMessage } from "@malloydata/malloy";
1
+ import type { GivenValue, LogMessage } from "@malloydata/malloy";
2
2
  import { MalloyError, Runtime } from "@malloydata/malloy";
3
+ import { metrics } from "@opentelemetry/api";
3
4
  import { Mutex } from "async-mutex";
4
5
  import crypto from "crypto";
5
6
  import * as fs from "fs";
@@ -69,6 +70,49 @@ type RetiredConnectionGeneration = {
69
70
 
70
71
  const RETIRED_CONNECTION_DRAIN_MS = 30_000;
71
72
 
73
+ /**
74
+ * Module-scoped admission-rejection counters. Lazy-initialized so
75
+ * the OTel JS `ProxyMeter` cannot strand them on a NoOp instrument
76
+ * created before the SDK MeterProvider was registered (a real risk
77
+ * in unit tests; see comment in `query_timeout.ts`). Environment
78
+ * name is attached as a label so dashboards can identify hot
79
+ * environments without grepping logs.
80
+ */
81
+ import { type Counter } from "@opentelemetry/api";
82
+ let queryAdmissionRejectionsCounter: Counter | null = null;
83
+ let packageAdmissionRejectionsCounter: Counter | null = null;
84
+ function getQueryAdmissionRejectionsCounter(): Counter {
85
+ if (queryAdmissionRejectionsCounter) return queryAdmissionRejectionsCounter;
86
+ queryAdmissionRejectionsCounter = metrics
87
+ .getMeter("publisher")
88
+ .createCounter("publisher_query_admission_rejections_total", {
89
+ description:
90
+ "Queries rejected with 503 because Environment.assertCanAdmitQuery() observed memory back-pressure",
91
+ });
92
+ return queryAdmissionRejectionsCounter;
93
+ }
94
+ function getPackageAdmissionRejectionsCounter(): Counter {
95
+ if (packageAdmissionRejectionsCounter) {
96
+ return packageAdmissionRejectionsCounter;
97
+ }
98
+ packageAdmissionRejectionsCounter = metrics
99
+ .getMeter("publisher")
100
+ .createCounter("publisher_package_admission_rejections_total", {
101
+ description:
102
+ "Package loads rejected with 503 because Environment.assertCanAdmitNewPackage() observed memory back-pressure",
103
+ });
104
+ return packageAdmissionRejectionsCounter;
105
+ }
106
+
107
+ /**
108
+ * Visible for tests; production code never calls this. Resets the
109
+ * lazy caches so a fresh MeterProvider can capture future writes.
110
+ */
111
+ export function resetAdmissionTelemetryForTesting(): void {
112
+ queryAdmissionRejectionsCounter = null;
113
+ packageAdmissionRejectionsCounter = null;
114
+ }
115
+
72
116
  export class Environment {
73
117
  private packages: Map<string, Package> = new Map();
74
118
  // Lock ordering: connectionMutex (environment) MUST be acquired before any
@@ -176,6 +220,7 @@ export class Environment {
176
220
  environmentPath: string,
177
221
  connections: ApiConnection[],
178
222
  ): Promise<Environment> {
223
+ assertSafeEnvironmentPath(environmentPath);
179
224
  if (!(await fs.promises.stat(environmentPath))?.isDirectory()) {
180
225
  throw new EnvironmentNotFoundError(
181
226
  `Environment path ${environmentPath} not found`,
@@ -218,7 +263,7 @@ export class Environment {
218
263
  try {
219
264
  readme = (
220
265
  await fs.promises.readFile(
221
- path.join(this.environmentPath, README_NAME),
266
+ safeJoinUnderRoot(this.environmentPath, README_NAME),
222
267
  )
223
268
  ).toString();
224
269
  } catch {
@@ -238,6 +283,7 @@ export class Environment {
238
283
  modelName: string,
239
284
  source: string,
240
285
  includeSql: boolean = false,
286
+ givens?: Record<string, GivenValue>,
241
287
  ): Promise<{ problems: LogMessage[]; sql?: string }> {
242
288
  assertSafePackageName(packageName);
243
289
  assertSafeRelativeModelPath(modelName);
@@ -308,7 +354,7 @@ export class Environment {
308
354
  if (includeSql) {
309
355
  try {
310
356
  const queryMaterializer = modelMaterializer.loadFinalQuery();
311
- sql = await queryMaterializer.getSQL();
357
+ sql = await queryMaterializer.getSQL({ givens });
312
358
  } catch {
313
359
  // Source may not contain a runnable query (e.g. only source definitions),
314
360
  // in which case we simply omit the sql field.
@@ -578,8 +624,43 @@ export class Environment {
578
624
  ): void {
579
625
  if (allowAdmission) return;
580
626
  if (!this.memoryGovernor?.isBackpressured()) return;
627
+ // Increment *before* throwing so the metric ticks even on
628
+ // the not-uncommon "caught and swallowed" path. The label
629
+ // shape mirrors `assertCanAdmitQuery` so a dashboard panel
630
+ // can sum both rejection kinds by environment.
631
+ getPackageAdmissionRejectionsCounter().add(1, {
632
+ environment: this.environmentName,
633
+ reason,
634
+ });
635
+ throw new ServiceUnavailableError(
636
+ `Publisher is under memory pressure and cannot ${reason} (package "${packageName}", environment "${this.environmentName}"). Retry after the server's memory usage drops below the low-water mark (PUBLISHER_MEMORY_LOW_WATER_FRACTION of PUBLISHER_MAX_MEMORY_BYTES), or raise PUBLISHER_MAX_MEMORY_BYTES if you have headroom.`,
637
+ );
638
+ }
639
+
640
+ /**
641
+ * Reject incoming queries with HTTP 503 when the memory governor
642
+ * has tripped its high-water mark. Used by every query controller
643
+ * (connection SQL, model query, notebook cell, MCP `execute_query`)
644
+ * to shed load before the query runs — complementing
645
+ * {@link assertCanAdmitNewPackage}, which only fires on cache-miss
646
+ * package loads and so leaves already-loaded packages fully
647
+ * queryable under pressure. With this in place, "back-pressured"
648
+ * means "no new work of any kind" until the governor's low-water
649
+ * mark is crossed.
650
+ *
651
+ * Cheap O(1) boolean read; no allocation when happy.
652
+ */
653
+ public assertCanAdmitQuery(): void {
654
+ if (!this.memoryGovernor?.isBackpressured()) return;
655
+ // Tick first so the counter reflects every rejection even
656
+ // when the controller's catch block swallows the error (e.g.
657
+ // an MCP tool surfaces it as a content payload rather than
658
+ // letting it bubble to the HTTP error mapper).
659
+ getQueryAdmissionRejectionsCounter().add(1, {
660
+ environment: this.environmentName,
661
+ });
581
662
  throw new ServiceUnavailableError(
582
- `Publisher is under memory pressure and cannot ${reason} (package "${packageName}", environment "${this.environmentName}"). Retry after the server's memory usage drops below the configured low-water mark.`,
663
+ `Publisher is under memory pressure and cannot accept new queries (environment "${this.environmentName}"). Retry after the server's memory usage drops below the low-water mark (PUBLISHER_MEMORY_LOW_WATER_FRACTION of PUBLISHER_MAX_MEMORY_BYTES), or raise PUBLISHER_MAX_MEMORY_BYTES if you have headroom.`,
583
664
  );
584
665
  }
585
666
 
@@ -4,8 +4,12 @@ import * as os from "os";
4
4
  import * as path from "path";
5
5
 
6
6
  import { ServiceUnavailableError } from "../errors";
7
+ import {
8
+ startMetricsHarness,
9
+ type MetricsHarness,
10
+ } from "../test_helpers/metrics_harness";
7
11
  import { buildEnvironmentMalloyConfig } from "./connection";
8
- import { Environment } from "./environment";
12
+ import { Environment, resetAdmissionTelemetryForTesting } from "./environment";
9
13
  import type { PackageMemoryGovernor } from "./package_memory_governor";
10
14
 
11
15
  /**
@@ -178,3 +182,163 @@ describe("Environment admission gate (memory governor choke point)", () => {
178
182
  expect(caught).not.toBeInstanceOf(ServiceUnavailableError);
179
183
  });
180
184
  });
185
+
186
+ describe("Environment.assertCanAdmitQuery (query-path back-pressure)", () => {
187
+ let envDir: string;
188
+
189
+ beforeEach(() => {
190
+ envDir = fs.mkdtempSync(
191
+ path.join(os.tmpdir(), "publisher-env-query-admission-"),
192
+ );
193
+ });
194
+
195
+ afterEach(() => {
196
+ fs.rmSync(envDir, { recursive: true, force: true });
197
+ });
198
+
199
+ it("is a no-op when no governor is attached", () => {
200
+ const env = makeEnvironment(envDir);
201
+ // No governor — must not throw. Equivalent of an OSS / non-Docker
202
+ // deployment that never opted into PUBLISHER_MAX_MEMORY_BYTES.
203
+ expect(() => env.assertCanAdmitQuery()).not.toThrow();
204
+ });
205
+
206
+ it("is a no-op when the governor is happy (under high-water mark)", () => {
207
+ const env = makeEnvironment(envDir);
208
+ const governor = new StubGovernor();
209
+ env.setMemoryGovernor(governor as unknown as PackageMemoryGovernor);
210
+ governor.backpressured = false;
211
+
212
+ expect(() => env.assertCanAdmitQuery()).not.toThrow();
213
+ });
214
+
215
+ it("throws ServiceUnavailableError (→503) when back-pressured", () => {
216
+ const env = makeEnvironment(envDir);
217
+ const governor = new StubGovernor();
218
+ env.setMemoryGovernor(governor as unknown as PackageMemoryGovernor);
219
+ governor.backpressured = true;
220
+
221
+ expect(() => env.assertCanAdmitQuery()).toThrow(ServiceUnavailableError);
222
+ });
223
+
224
+ it("error message names the environment so operators can pinpoint the hot pod's load", () => {
225
+ const env = makeEnvironment(envDir);
226
+ const governor = new StubGovernor();
227
+ env.setMemoryGovernor(governor as unknown as PackageMemoryGovernor);
228
+ governor.backpressured = true;
229
+
230
+ let caught: unknown;
231
+ try {
232
+ env.assertCanAdmitQuery();
233
+ } catch (err) {
234
+ caught = err;
235
+ }
236
+ expect(caught).toBeInstanceOf(ServiceUnavailableError);
237
+ expect((caught as Error).message).toContain('environment "test-env"');
238
+ expect((caught as Error).message).toContain("memory pressure");
239
+ });
240
+
241
+ it("clearing back-pressure immediately re-admits queries (matches governor hysteresis)", () => {
242
+ const env = makeEnvironment(envDir);
243
+ const governor = new StubGovernor();
244
+ env.setMemoryGovernor(governor as unknown as PackageMemoryGovernor);
245
+
246
+ governor.backpressured = true;
247
+ expect(() => env.assertCanAdmitQuery()).toThrow(ServiceUnavailableError);
248
+
249
+ governor.backpressured = false;
250
+ expect(() => env.assertCanAdmitQuery()).not.toThrow();
251
+ });
252
+
253
+ it("detaching the governor reverts to legacy admit-everything", () => {
254
+ const env = makeEnvironment(envDir);
255
+ const governor = new StubGovernor();
256
+ env.setMemoryGovernor(governor as unknown as PackageMemoryGovernor);
257
+ governor.backpressured = true;
258
+
259
+ env.setMemoryGovernor(null);
260
+ // Even with the stub still claiming back-pressure, a detached
261
+ // governor leaves nothing to consult. Mirrors the package-admission
262
+ // detach behavior.
263
+ expect(() => env.assertCanAdmitQuery()).not.toThrow();
264
+ });
265
+ });
266
+
267
+ describe("Environment admission telemetry", () => {
268
+ let envDir: string;
269
+ let harness: MetricsHarness;
270
+
271
+ beforeEach(async () => {
272
+ envDir = fs.mkdtempSync(
273
+ path.join(os.tmpdir(), "publisher-env-admission-telemetry-"),
274
+ );
275
+ harness = await startMetricsHarness();
276
+ resetAdmissionTelemetryForTesting();
277
+ });
278
+
279
+ afterEach(async () => {
280
+ fs.rmSync(envDir, { recursive: true, force: true });
281
+ resetAdmissionTelemetryForTesting();
282
+ await harness.shutdown();
283
+ });
284
+
285
+ it("publisher_query_admission_rejections_total ticks per query rejection, labeled by environment", async () => {
286
+ const env = makeEnvironment(envDir);
287
+ const governor = new StubGovernor();
288
+ env.setMemoryGovernor(governor as unknown as PackageMemoryGovernor);
289
+ governor.backpressured = true;
290
+
291
+ // Drive three rejections so the counter is unambiguous (a
292
+ // single-tick assertion can pass against a leaked counter
293
+ // from a different test; three is harder to fake).
294
+ for (let i = 0; i < 3; i++) {
295
+ expect(() => env.assertCanAdmitQuery()).toThrow(
296
+ ServiceUnavailableError,
297
+ );
298
+ }
299
+ expect(
300
+ await harness.collectCounter(
301
+ "publisher_query_admission_rejections_total",
302
+ { environment: "test-env" },
303
+ ),
304
+ ).toBe(3);
305
+ });
306
+
307
+ it("counter stays at zero when the governor is happy (no spurious ticks)", async () => {
308
+ const env = makeEnvironment(envDir);
309
+ const governor = new StubGovernor();
310
+ env.setMemoryGovernor(governor as unknown as PackageMemoryGovernor);
311
+ // Not back-pressured.
312
+ env.assertCanAdmitQuery();
313
+ env.assertCanAdmitQuery();
314
+ // No rejection should have been recorded; verifies that the
315
+ // counter doesn't tick on the happy-path admission either.
316
+ expect(
317
+ await harness.collectCounter(
318
+ "publisher_query_admission_rejections_total",
319
+ ),
320
+ ).toBe(0);
321
+ });
322
+
323
+ it("publisher_package_admission_rejections_total ticks per package-load rejection", async () => {
324
+ // Ensure the package directory exists so the 404 doesn't
325
+ // short-circuit ahead of the back-pressure gate.
326
+ const pkgName = "real-pkg";
327
+ fs.mkdirSync(path.join(envDir, pkgName));
328
+
329
+ const env = makeEnvironment(envDir);
330
+ const governor = new StubGovernor();
331
+ env.setMemoryGovernor(governor as unknown as PackageMemoryGovernor);
332
+ governor.backpressured = true;
333
+
334
+ await expect(env.addPackage(pkgName)).rejects.toBeInstanceOf(
335
+ ServiceUnavailableError,
336
+ );
337
+ expect(
338
+ await harness.collectCounter(
339
+ "publisher_package_admission_rejections_total",
340
+ { environment: "test-env", reason: "add a new package" },
341
+ ),
342
+ ).toBe(1);
343
+ });
344
+ });