@malloy-publisher/server 0.0.198-dev → 0.0.198-dev1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/README.docker.md +135 -20
  2. package/README.md +15 -0
  3. package/build.ts +42 -1
  4. package/dist/app/api-doc.yaml +51 -0
  5. package/dist/app/assets/EnvironmentPage-Dpee_Kn6.js +1 -0
  6. package/dist/app/assets/HomePage-DLRWTNoL.js +1 -0
  7. package/dist/app/assets/MainPage-DsVt5QGM.js +2 -0
  8. package/dist/app/assets/ModelPage-AwAugZ37.js +1 -0
  9. package/dist/app/assets/PackagePage-XQ-EWGTC.js +1 -0
  10. package/dist/app/assets/RouteError-3Mv8JQw7.js +1 -0
  11. package/dist/app/assets/WorkbookPage-DHYYpcYc.js +1 -0
  12. package/dist/app/assets/{core-w79IMXAG.es-Bd0UlzOL.js → core-DfcpQGVP.es-DQggNOdX.js} +14 -14
  13. package/dist/app/assets/{index-C513UodQ.js → index-BUp81Qdm.js} +15 -15
  14. package/dist/app/assets/index-D1pdwrUW.js +1803 -0
  15. package/dist/app/assets/index-Dv5bF4Ii.js +451 -0
  16. package/dist/app/assets/{index.umd-BMeMPq_9.js → index.umd-CQH4LZU8.js} +1 -1
  17. package/dist/app/index.html +2 -3
  18. package/dist/compile_worker.mjs +628 -0
  19. package/dist/default-publisher.config.json +23 -0
  20. package/dist/instrumentation.mjs +36 -38
  21. package/dist/server.mjs +2060 -913
  22. package/package.json +11 -12
  23. package/publisher.config.example.bigquery.json +33 -0
  24. package/publisher.config.example.duckdb.json +23 -0
  25. package/publisher.config.json +1 -11
  26. package/src/compile/compile_pool.spec.ts +227 -0
  27. package/src/compile/compile_pool.ts +729 -0
  28. package/src/compile/compile_worker.ts +683 -0
  29. package/src/compile/protocol.ts +251 -0
  30. package/src/config.spec.ts +306 -0
  31. package/src/config.ts +222 -2
  32. package/src/controller/compile.controller.ts +3 -1
  33. package/src/controller/connection.controller.ts +1 -1
  34. package/src/controller/model.controller.ts +8 -1
  35. package/src/controller/package.controller.ts +70 -29
  36. package/src/controller/query.controller.ts +3 -0
  37. package/src/default-publisher.config.json +23 -0
  38. package/src/errors.spec.ts +42 -0
  39. package/src/errors.ts +21 -0
  40. package/src/health.spec.ts +90 -0
  41. package/src/health.ts +86 -45
  42. package/src/logger.ts +1 -3
  43. package/src/mcp/tools/discovery_tools.ts +6 -2
  44. package/src/mcp/tools/execute_query_tool.ts +12 -0
  45. package/src/path_safety.spec.ts +158 -0
  46. package/src/path_safety.ts +140 -0
  47. package/src/pg_helpers.spec.ts +226 -0
  48. package/src/pg_helpers.ts +129 -0
  49. package/src/server-old.ts +3 -23
  50. package/src/server.ts +49 -0
  51. package/src/service/connection.spec.ts +6 -4
  52. package/src/service/connection.ts +8 -3
  53. package/src/service/connection_config.ts +2 -2
  54. package/src/service/environment.ts +621 -176
  55. package/src/service/environment_admission.spec.ts +180 -0
  56. package/src/service/environment_store.ts +22 -0
  57. package/src/service/filter_integration.spec.ts +110 -0
  58. package/src/service/givens_integration.spec.ts +192 -0
  59. package/src/service/manifest_service.spec.ts +7 -2
  60. package/src/service/manifest_service.ts +8 -2
  61. package/src/service/materialization_service.ts +14 -3
  62. package/src/service/model.spec.ts +105 -0
  63. package/src/service/model.ts +317 -10
  64. package/src/service/model_worker_path.spec.ts +125 -0
  65. package/src/service/package.ts +4 -3
  66. package/src/service/package_memory_governor.spec.ts +173 -0
  67. package/src/service/package_memory_governor.ts +233 -0
  68. package/src/service/package_race.spec.ts +208 -0
  69. package/src/storage/StorageManager.ts +71 -11
  70. package/src/storage/duckdb/schema.ts +41 -0
  71. package/src/utils.ts +11 -0
  72. package/tests/harness/rest_e2e.ts +2 -2
  73. package/tests/integration/concurrent_package/concurrent_package.integration.spec.ts +280 -0
  74. package/tests/integration/legacy_routes/legacy_routes.integration.spec.ts +259 -0
  75. package/tests/unit/duckdb/attached_databases.test.ts +5 -5
  76. package/tests/unit/duckdb/legacy_schema_migration.test.ts +194 -0
  77. package/tests/unit/storage/StorageManager.test.ts +166 -0
  78. package/dist/app/assets/EnvironmentPage-1j6QDWAy.js +0 -1
  79. package/dist/app/assets/HomePage-DMop21VG.js +0 -1
  80. package/dist/app/assets/MainPage-BbE8ETz1.js +0 -2
  81. package/dist/app/assets/ModelPage-D2jvfe3t.js +0 -1
  82. package/dist/app/assets/PackagePage-BbnhGoD3.js +0 -1
  83. package/dist/app/assets/RouteError-D3LGEZ3i.js +0 -1
  84. package/dist/app/assets/WorkbookPage-DttVIj4u.js +0 -1
  85. package/dist/app/assets/index-5K9YjIxF.js +0 -456
  86. package/dist/app/assets/index-DIgzgp69.js +0 -1742
@@ -0,0 +1,226 @@
1
+ import { afterEach, describe, expect, it } from "bun:test";
2
+ import { ConnectionAuthError } from "./errors";
3
+ import {
4
+ classifyPgError,
5
+ handlePgAttachError,
6
+ pgConnectTimeoutSeconds,
7
+ redactPgSecrets,
8
+ withPgConnectTimeout,
9
+ } from "./pg_helpers";
10
+
11
+ describe("pgConnectTimeoutSeconds", () => {
12
+ const ORIGINAL_TIMEOUT = process.env.PG_CONNECT_TIMEOUT_SECONDS;
13
+
14
+ afterEach(() => {
15
+ if (ORIGINAL_TIMEOUT === undefined) {
16
+ delete process.env.PG_CONNECT_TIMEOUT_SECONDS;
17
+ } else {
18
+ process.env.PG_CONNECT_TIMEOUT_SECONDS = ORIGINAL_TIMEOUT;
19
+ }
20
+ });
21
+
22
+ it("defaults to 5 when env unset", () => {
23
+ delete process.env.PG_CONNECT_TIMEOUT_SECONDS;
24
+ expect(pgConnectTimeoutSeconds()).toBe(5);
25
+ });
26
+
27
+ it("honors PG_CONNECT_TIMEOUT_SECONDS override", () => {
28
+ process.env.PG_CONNECT_TIMEOUT_SECONDS = "12";
29
+ expect(pgConnectTimeoutSeconds()).toBe(12);
30
+ });
31
+
32
+ it("falls back to 5 when env value is invalid", () => {
33
+ process.env.PG_CONNECT_TIMEOUT_SECONDS = "not-a-number";
34
+ expect(pgConnectTimeoutSeconds()).toBe(5);
35
+ });
36
+
37
+ it("falls back to 5 when env value is zero or negative", () => {
38
+ process.env.PG_CONNECT_TIMEOUT_SECONDS = "0";
39
+ expect(pgConnectTimeoutSeconds()).toBe(5);
40
+ process.env.PG_CONNECT_TIMEOUT_SECONDS = "-3";
41
+ expect(pgConnectTimeoutSeconds()).toBe(5);
42
+ });
43
+ });
44
+
45
+ describe("withPgConnectTimeout", () => {
46
+ it("appends to keyword form when missing", () => {
47
+ expect(withPgConnectTimeout("host=h dbname=d user=u password=p", 5)).toBe(
48
+ "host=h dbname=d user=u password=p connect_timeout=5",
49
+ );
50
+ });
51
+
52
+ it("appends to postgres: keyword form (DuckLake catalogUrl shape)", () => {
53
+ expect(
54
+ withPgConnectTimeout("postgres:host=h user=u password=p dbname=d", 5),
55
+ ).toBe("postgres:host=h user=u password=p dbname=d connect_timeout=5");
56
+ });
57
+
58
+ it("does not override a user-supplied connect_timeout in keyword form", () => {
59
+ expect(withPgConnectTimeout("host=h connect_timeout=30", 99)).toBe(
60
+ "host=h connect_timeout=30",
61
+ );
62
+ });
63
+
64
+ it("appends to URI form with no query", () => {
65
+ expect(withPgConnectTimeout("postgresql://u:p@h:5432/d", 5)).toBe(
66
+ "postgresql://u:p@h:5432/d?connect_timeout=5",
67
+ );
68
+ });
69
+
70
+ it("appends to URI form with existing query", () => {
71
+ expect(
72
+ withPgConnectTimeout("postgresql://u:p@h/d?sslmode=require", 5),
73
+ ).toBe("postgresql://u:p@h/d?sslmode=require&connect_timeout=5");
74
+ });
75
+
76
+ it("appends to URI with bare trailing ?", () => {
77
+ expect(withPgConnectTimeout("postgresql://h/d?", 5)).toBe(
78
+ "postgresql://h/d?connect_timeout=5",
79
+ );
80
+ });
81
+
82
+ it("does not double-append when URI already has connect_timeout (?-style)", () => {
83
+ expect(
84
+ withPgConnectTimeout("postgresql://h/d?connect_timeout=10", 5),
85
+ ).toBe("postgresql://h/d?connect_timeout=10");
86
+ });
87
+
88
+ it("does not double-append when URI already has connect_timeout (&-style)", () => {
89
+ expect(
90
+ withPgConnectTimeout(
91
+ "postgresql://h/d?sslmode=require&connect_timeout=10",
92
+ 5,
93
+ ),
94
+ ).toBe("postgresql://h/d?sslmode=require&connect_timeout=10");
95
+ });
96
+
97
+ it("recognizes postgres:// (alternative scheme) as URI form", () => {
98
+ expect(withPgConnectTimeout("postgres://u@h/d", 5)).toBe(
99
+ "postgres://u@h/d?connect_timeout=5",
100
+ );
101
+ });
102
+ });
103
+
104
+ describe("redactPgSecrets", () => {
105
+ it("redacts bare password values", () => {
106
+ expect(redactPgSecrets("host=h password=hunter2 dbname=d")).toBe(
107
+ "host=h password=*** dbname=d",
108
+ );
109
+ });
110
+
111
+ it("redacts single-quoted password values", () => {
112
+ expect(redactPgSecrets("host=h password='s3 cret' dbname=d")).toBe(
113
+ "host=h password=*** dbname=d",
114
+ );
115
+ });
116
+
117
+ it("leaves non-secret content alone", () => {
118
+ expect(redactPgSecrets("user=alice dbname=billing")).toBe(
119
+ "user=alice dbname=billing",
120
+ );
121
+ });
122
+ });
123
+
124
+ describe("classifyPgError", () => {
125
+ it.each([
126
+ 'password authentication failed for user "alice"',
127
+ "no pg_hba.conf entry for host",
128
+ 'role "alice" does not exist',
129
+ 'database "billing" does not exist',
130
+ "permission denied for relation foo",
131
+ ])("classifies '%s' as auth error", (msg) => {
132
+ const result = classifyPgError(new Error(msg), "PG attach");
133
+ expect(result).toBeInstanceOf(ConnectionAuthError);
134
+ expect(result?.message).toContain("PG attach:");
135
+ });
136
+
137
+ it("returns undefined for unrelated errors", () => {
138
+ expect(
139
+ classifyPgError(
140
+ new Error('relation "users" does not exist'),
141
+ "PG attach",
142
+ ),
143
+ ).toBeUndefined();
144
+ expect(
145
+ classifyPgError(new Error("connection reset by peer"), "PG attach"),
146
+ ).toBeUndefined();
147
+ });
148
+
149
+ it("returns undefined for non-Error values", () => {
150
+ expect(
151
+ classifyPgError("password authentication failed", "ctx"),
152
+ ).toBeUndefined();
153
+ expect(classifyPgError(undefined, "ctx")).toBeUndefined();
154
+ });
155
+
156
+ it("redacts embedded passwords in the wrapped message", () => {
157
+ const result = classifyPgError(
158
+ new Error(
159
+ "password authentication failed: tried host=h password=hunter2",
160
+ ),
161
+ "DuckLake attach",
162
+ );
163
+ expect(result?.message).toContain("password=***");
164
+ expect(result?.message).not.toContain("hunter2");
165
+ });
166
+ });
167
+
168
+ describe("handlePgAttachError", () => {
169
+ it("swallows 'already exists' errors", () => {
170
+ const outcome = handlePgAttachError(
171
+ new Error('database "db_x" already exists'),
172
+ "ctx",
173
+ );
174
+ expect(outcome.action).toBe("swallow");
175
+ });
176
+
177
+ it("swallows 'already attached' errors", () => {
178
+ const outcome = handlePgAttachError(
179
+ new Error("DuckLake catalog db_x is already attached"),
180
+ "ctx",
181
+ );
182
+ expect(outcome.action).toBe("swallow");
183
+ });
184
+
185
+ it("classifies libpq auth failures as ConnectionAuthError", () => {
186
+ const outcome = handlePgAttachError(
187
+ new Error('password authentication failed for user "alice"'),
188
+ "PG attach db_x",
189
+ );
190
+ expect(outcome.action).toBe("throw");
191
+ if (outcome.action === "throw") {
192
+ expect(outcome.error).toBeInstanceOf(ConnectionAuthError);
193
+ expect(outcome.error.message).toContain("PG attach db_x:");
194
+ }
195
+ });
196
+
197
+ it("passes through unrelated Error instances unchanged", () => {
198
+ const original = new Error("network unreachable");
199
+ const outcome = handlePgAttachError(original, "ctx");
200
+ expect(outcome.action).toBe("throw");
201
+ if (outcome.action === "throw") {
202
+ expect(outcome.error).toBe(original);
203
+ expect(outcome.error).not.toBeInstanceOf(ConnectionAuthError);
204
+ }
205
+ });
206
+
207
+ it("wraps non-Error throwables so callers always get an Error", () => {
208
+ const outcome = handlePgAttachError("a string was thrown", "ctx");
209
+ expect(outcome.action).toBe("throw");
210
+ if (outcome.action === "throw") {
211
+ expect(outcome.error).toBeInstanceOf(Error);
212
+ expect(outcome.error.message).toBe("a string was thrown");
213
+ }
214
+ });
215
+
216
+ it("prefers 'already attached' over auth classification when both keywords appear", () => {
217
+ // Defensive: if a future DuckDB version emits a combined message,
218
+ // 'already attached' wins so we don't bubble up a false auth failure
219
+ // on what is actually a benign idempotent re-attach.
220
+ const outcome = handlePgAttachError(
221
+ new Error("already attached; permission denied tail"),
222
+ "ctx",
223
+ );
224
+ expect(outcome.action).toBe("swallow");
225
+ });
226
+ });
@@ -0,0 +1,129 @@
1
+ // Postgres / libpq helpers shared between `service/` (user-facing
2
+ // connections) and `storage/` (materialization-storage catalog). Lives at
3
+ // `src/` root so neither layer takes a dependency on the other — see
4
+ // CLAUDE.md's "Two parallel DuckLake/PG attach paths" note for why this
5
+ // matters.
6
+ import { ConnectionAuthError } from "./errors";
7
+
8
+ // Default Postgres connect_timeout (seconds), used by the materialization
9
+ // storage catalog ATTACH so a slow or wedged libpq handshake fails the
10
+ // caller in seconds instead of stalling the worker until the K8s liveness
11
+ // probe trips.
12
+ //
13
+ // libpq enforces a documented minimum of 2 seconds — values below 2
14
+ // effectively round up to ~2s wall clock.
15
+ export function pgConnectTimeoutSeconds(): number {
16
+ const raw = process.env.PG_CONNECT_TIMEOUT_SECONDS;
17
+ if (!raw) return 5;
18
+ const parsed = Number.parseInt(raw, 10);
19
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : 5;
20
+ }
21
+
22
+ // libpq accepts both keyword=value form ("host=h dbname=d") and URI form
23
+ // ("postgresql://u:p@h/d?param=v"). The materialization-storage catalogUrl
24
+ // can also arrive as `postgres:<keyword=value>` (no `//`). We detect URI
25
+ // form (with `//`) so we know whether to append a new parameter using
26
+ // `?`/`&` or a leading space.
27
+ const URI_FORM_RE = /^[a-z][a-z0-9+.-]*:\/\//i;
28
+
29
+ // Match an existing connect_timeout key in either form. URI form uses
30
+ // `?key=` or `&key=`; keyword form uses whitespace separation or start-of-
31
+ // string. Without the `[?&]` alternatives a URI-form user-supplied timeout
32
+ // would be missed and we'd double-append, producing an invalid URL.
33
+ const HAS_CONNECT_TIMEOUT_RE = /[?&\s]connect_timeout=|^connect_timeout=/;
34
+
35
+ // Append `connect_timeout=N` to a libpq-compatible connection string if
36
+ // the caller hasn't already set one. Handles keyword form ("host=h ..."),
37
+ // URI form ("postgresql://..."), and the `postgres:host=h ...` keyword
38
+ // form with a scheme prefix used by DuckLake catalogUrls.
39
+ export function withPgConnectTimeout(
40
+ connectionString: string,
41
+ timeout: number,
42
+ ): string {
43
+ if (HAS_CONNECT_TIMEOUT_RE.test(connectionString)) {
44
+ return connectionString;
45
+ }
46
+ if (URI_FORM_RE.test(connectionString)) {
47
+ // URI form: append as query parameter. `?` if no query string yet,
48
+ // `&` otherwise. A bare trailing `?` (empty query) gets no extra
49
+ // separator. We don't try to handle URL fragments — libpq URIs don't
50
+ // use them.
51
+ if (!connectionString.includes("?")) {
52
+ return `${connectionString}?connect_timeout=${timeout}`;
53
+ }
54
+ if (connectionString.endsWith("?")) {
55
+ return `${connectionString}connect_timeout=${timeout}`;
56
+ }
57
+ return `${connectionString}&connect_timeout=${timeout}`;
58
+ }
59
+ // Keyword=value form (with or without `postgres:` scheme prefix).
60
+ return `${connectionString} connect_timeout=${timeout}`;
61
+ }
62
+
63
+ // Redact libpq `password=...` values from a string before it goes into a
64
+ // log line or HTTP response body. Handles bare and quoted values.
65
+ //
66
+ // Scope: keyword-form `password=` only. Does not touch URL-style
67
+ // `user:pw@host` credentials, AWS keys, GCS secrets, etc.
68
+ export function redactPgSecrets(s: string): string {
69
+ return s.replace(/password=('[^']*'|"[^"]*"|\S+)/gi, "password=***");
70
+ }
71
+
72
+ // Substring-match libpq error patterns that indicate a non-retryable
73
+ // auth/permission failure. Returns a ConnectionAuthError when matched so
74
+ // callers can fast-fail with HTTP 422 (semantically "the supplied creds
75
+ // are bad; don't retry") instead of letting the raw error fall through to
76
+ // a generic 500 that retry loops treat as transient.
77
+ export function classifyPgError(
78
+ error: unknown,
79
+ context: string,
80
+ ): ConnectionAuthError | undefined {
81
+ if (!(error instanceof Error)) return undefined;
82
+ const msg = error.message;
83
+ const patterns = [
84
+ /password authentication failed/i,
85
+ /pg_hba\.conf/i,
86
+ /role ".*" does not exist/i,
87
+ /database ".*" does not exist/i,
88
+ /permission denied/i,
89
+ ];
90
+ if (!patterns.some((p) => p.test(msg))) return undefined;
91
+ return new ConnectionAuthError(`${context}: ${redactPgSecrets(msg)}`);
92
+ }
93
+
94
+ // Outcome of inspecting an error thrown by an `ATTACH` call:
95
+ // - `{ action: "swallow" }`: DuckDB reported the db is already attached
96
+ // (idempotent re-attach); caller should log and continue.
97
+ // - `{ action: "throw", error: ConnectionAuthError }`: classified as a
98
+ // non-retryable auth failure; caller should warn-log and throw it.
99
+ // - `{ action: "throw", error: <original> }`: unrecognized; caller
100
+ // should rethrow as-is to preserve the original cause for diagnosis.
101
+ //
102
+ // Extracted so the decision tree gets a direct unit test without needing
103
+ // to stub DuckDB or run a real ATTACH.
104
+ export type PgAttachErrorOutcome =
105
+ | { action: "swallow" }
106
+ | { action: "throw"; error: Error };
107
+
108
+ export function handlePgAttachError(
109
+ error: unknown,
110
+ context: string,
111
+ ): PgAttachErrorOutcome {
112
+ if (
113
+ error instanceof Error &&
114
+ (error.message.includes("already exists") ||
115
+ error.message.includes("already attached"))
116
+ ) {
117
+ return { action: "swallow" };
118
+ }
119
+ const authErr = classifyPgError(error, context);
120
+ if (authErr) {
121
+ return { action: "throw", error: authErr };
122
+ }
123
+ if (error instanceof Error) {
124
+ return { action: "throw", error };
125
+ }
126
+ // Non-Error thrown values get wrapped so the catch contract stays
127
+ // (always throws an Error).
128
+ return { action: "throw", error: new Error(String(error)) };
129
+ }
package/src/server-old.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  /* eslint-disable @typescript-eslint/no-explicit-any */
2
+ // TODO: Remove this during projects cleanup
2
3
  /**
3
4
  * Legacy `/projects/...` route registration.
4
5
  *
@@ -15,11 +16,12 @@
15
16
  * format between old (`Project`) and new (`Environment`) specs, so they
16
17
  * pass through unchanged.
17
18
  * - The handful of payloads that DO have field-level renames are remapped:
18
- * * GET /status — `environments` -> `projects`
19
19
  * * Materialization responses — `environmentId` -> `projectId`
20
20
  *
21
21
  * - Watch-mode is intentionally not exposed under the legacy prefix; clients
22
22
  * that need it should use the new `/environments/...` paths directly.
23
+ * - `/status` is shared with the new server.ts handler — both old and new
24
+ * clients receive the new `environments`-keyed payload.
23
25
  */
24
26
 
25
27
  import bodyParser from "body-parser";
@@ -59,16 +61,6 @@ export interface LegacyControllerSet {
59
61
 
60
62
  // ─── response/body field mappers ───────────────────────────────────────────
61
63
 
62
- function remapStatusResponse(status: any): any {
63
- if (!status || typeof status !== "object") return status;
64
- const out: Record<string, any> = { ...status };
65
- if ("environments" in out) {
66
- out.projects = out.environments;
67
- delete out.environments;
68
- }
69
- return out;
70
- }
71
-
72
64
  function remapMaterializationResponse(mat: any): any {
73
65
  if (!mat || typeof mat !== "object") return mat;
74
66
  if (Array.isArray(mat)) {
@@ -112,18 +104,6 @@ export function registerLegacyRoutes(
112
104
  // same `${API_PREFIX}` prefix so they inherit it automatically.
113
105
  void bodyParser; // keep the import; helper file reference for clarity
114
106
 
115
- // ── status ──────────────────────────────────────────────────────────────
116
- app.get(`${LEGACY_API_PREFIX}/status`, async (_req, res) => {
117
- try {
118
- const status = await environmentStore.getStatus();
119
- res.status(200).json(remapStatusResponse(status));
120
- } catch (error) {
121
- logger.error("Error getting status", { error });
122
- const { json, status } = internalErrorToHttpError(error as Error);
123
- res.status(status).json(json);
124
- }
125
- });
126
-
127
107
  // ── projects (== environments) ──────────────────────────────────────────
128
108
  app.get(`${LEGACY_API_PREFIX}/projects`, async (_req, res) => {
129
109
  try {
package/src/server.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  // Pre-load the instrumentation module; the instrumentation module must be loaded before the other imports.
2
+ import type { GivenValue } from "@malloydata/malloy";
2
3
  import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
3
4
  import bodyParser from "body-parser";
4
5
  import cors from "cors";
@@ -33,6 +34,7 @@ import {
33
34
  } from "./instrumentation";
34
35
  import { logger, loggerMiddleware } from "./logger";
35
36
 
37
+ import { getMemoryGovernorConfig } from "./config";
36
38
  import { ManifestController } from "./controller/manifest.controller";
37
39
  import { MaterializationController } from "./controller/materialization.controller";
38
40
  import { initializeMcpServer } from "./mcp/server";
@@ -40,6 +42,7 @@ import { registerLegacyRoutes } from "./server-old";
40
42
  import { EnvironmentStore } from "./service/environment_store";
41
43
  import { ManifestService } from "./service/manifest_service";
42
44
  import { MaterializationService } from "./service/materialization_service";
45
+ import { PackageMemoryGovernor } from "./service/package_memory_governor";
43
46
 
44
47
  /** Normalize an Express query param into a string[] or undefined. */
45
48
  export function normalizeQueryArray(value: unknown): string[] | undefined {
@@ -51,6 +54,8 @@ export function normalizeQueryArray(value: unknown): string[] | undefined {
51
54
  // Parse command line arguments
52
55
  function parseArgs() {
53
56
  const args = process.argv.slice(2);
57
+ let sawServerRoot = false;
58
+ let sawConfig = false;
54
59
  for (let i = 0; i < args.length; i++) {
55
60
  const arg = args[i];
56
61
  if (arg === "--port" && args[i + 1]) {
@@ -60,8 +65,13 @@ function parseArgs() {
60
65
  process.env.PUBLISHER_HOST = args[i + 1];
61
66
  i++;
62
67
  } else if (arg === "--server_root" && args[i + 1]) {
68
+ sawServerRoot = true;
63
69
  process.env.SERVER_ROOT = args[i + 1];
64
70
  i++;
71
+ } else if (arg === "--config" && args[i + 1]) {
72
+ sawConfig = true;
73
+ process.env.PUBLISHER_CONFIG_PATH = args[i + 1];
74
+ i++;
65
75
  } else if (arg === "--mcp_port" && args[i + 1]) {
66
76
  process.env.MCP_PORT = args[i + 1];
67
77
  i++;
@@ -91,6 +101,9 @@ function parseArgs() {
91
101
  console.log(
92
102
  " --server_root <path> Root directory to serve files from (default: .)",
93
103
  );
104
+ console.log(
105
+ " --config <path> Path to publisher.config.json (default: <server_root>/publisher.config.json; falls back to bundled DuckDB-only sample config if missing)",
106
+ );
94
107
  console.log(
95
108
  " --mcp_port <number> Port for MCP server (default: 4040)",
96
109
  );
@@ -107,6 +120,16 @@ function parseArgs() {
107
120
  process.exit(0);
108
121
  }
109
122
  }
123
+ // Zero-config invocation (`npx @malloy-publisher/server`) opts in to
124
+ // the bundled DuckDB-only sample config so the Quick Start works
125
+ // without any flags. Any explicit --server_root or --config disables
126
+ // this — the user told us where to look. Skip in NODE_ENV=test so
127
+ // specs that import this module for utility helpers (e.g.
128
+ // db_utils.spec.ts -> normalizeQueryArray) don't get the bundled
129
+ // default leaked into their EnvironmentStore construction.
130
+ if (!sawServerRoot && !sawConfig && process.env.NODE_ENV !== "test") {
131
+ process.env.PUBLISHER_USE_BUNDLED_DEFAULT = "true";
132
+ }
110
133
  }
111
134
 
112
135
  // Parse CLI arguments before setting up constants
@@ -138,6 +161,17 @@ const manifestService = new ManifestService(environmentStore);
138
161
  const watchModeController = new WatchModeController(environmentStore);
139
162
  const connectionController = new ConnectionController(environmentStore);
140
163
  const modelController = new ModelController(environmentStore);
164
+ // PackageMemoryGovernor is opt-in via PUBLISHER_MAX_MEMORY_BYTES.
165
+ // When set, it polls process RSS and flips an `isBackpressured` flag
166
+ // that Environment.getPackage / addPackage consult before allocating
167
+ // any new package — the server responds with HTTP 503 instead of
168
+ // OOM-killing the pod.
169
+ const memoryGovernorConfig = getMemoryGovernorConfig();
170
+ const memoryGovernor = memoryGovernorConfig
171
+ ? new PackageMemoryGovernor(memoryGovernorConfig)
172
+ : null;
173
+ memoryGovernor?.start();
174
+ environmentStore.setMemoryGovernor(memoryGovernor);
141
175
  const packageController = new PackageController(
142
176
  environmentStore,
143
177
  manifestService,
@@ -1077,6 +1111,18 @@ app.get(
1077
1111
  const bypassFilters =
1078
1112
  req.query.bypass_filters === "true" ? true : undefined;
1079
1113
 
1114
+ let givens: Record<string, GivenValue> | undefined;
1115
+ if (typeof req.query.givens === "string") {
1116
+ try {
1117
+ givens = JSON.parse(req.query.givens);
1118
+ } catch {
1119
+ res.status(400).json({
1120
+ error: "Invalid givens: must be valid JSON",
1121
+ });
1122
+ return;
1123
+ }
1124
+ }
1125
+
1080
1126
  res.status(200).json(
1081
1127
  await modelController.executeNotebookCell(
1082
1128
  req.params.environmentName,
@@ -1085,6 +1131,7 @@ app.get(
1085
1131
  cellIndex,
1086
1132
  filterParams,
1087
1133
  bypassFilters,
1134
+ givens,
1088
1135
  ),
1089
1136
  );
1090
1137
  } catch (error) {
@@ -1145,6 +1192,7 @@ app.post(
1145
1192
  | Record<string, string | string[]>
1146
1193
  | undefined,
1147
1194
  req.body.bypassFilters === true ? true : undefined,
1195
+ req.body.givens as Record<string, GivenValue> | undefined,
1148
1196
  ),
1149
1197
  );
1150
1198
  } catch (error) {
@@ -1188,6 +1236,7 @@ app.post(
1188
1236
  req.params.modelName,
1189
1237
  req.body.source,
1190
1238
  req.body.includeSql === true,
1239
+ req.body.givens as Record<string, GivenValue> | undefined,
1191
1240
  );
1192
1241
  res.status(200).json(result);
1193
1242
  } catch (error) {
@@ -1129,10 +1129,14 @@ describe("connection integration tests", () => {
1129
1129
  ],
1130
1130
  testEnvironmentPath,
1131
1131
  ),
1132
- ).rejects.toThrow(/cannot be 'duckdb'/);
1132
+ ).rejects.toThrow(/'duckdb' is reserved/);
1133
1133
  });
1134
1134
 
1135
1135
  it("should reject DuckDB connections with no attachments", async () => {
1136
+ // Env-level DuckDB connections must declare at least one
1137
+ // attached foreign database; the empty-array case is operator
1138
+ // confusion (the per-package "duckdb" sandbox already covers
1139
+ // the plain-in-memory use case).
1136
1140
  await expect(
1137
1141
  createEnvironmentConnections(
1138
1142
  [
@@ -1144,9 +1148,7 @@ describe("connection integration tests", () => {
1144
1148
  ],
1145
1149
  testEnvironmentPath,
1146
1150
  ),
1147
- ).rejects.toThrow(
1148
- "DuckDB connection must have at least one attached database",
1149
- );
1151
+ ).rejects.toThrow(/has no attached databases/);
1150
1152
  });
1151
1153
 
1152
1154
  it("should reject unsupported DuckDB connector fields", async () => {
@@ -25,6 +25,7 @@ import fs from "fs/promises";
25
25
  import path from "path";
26
26
  import { components } from "../api";
27
27
  import { logAxiosError, logger } from "../logger";
28
+ import { redactPgSecrets } from "../pg_helpers";
28
29
  import {
29
30
  assembleEnvironmentConnections,
30
31
  CoreConnectionEntry,
@@ -365,13 +366,17 @@ async function attachDuckLake(
365
366
  const pgConnString: string = buildPgConnectionString(pg);
366
367
  // Attach DuckLake with Postgres catalog and cloud storage data path in READ_ONLY mode
367
368
  // The client manages metadata - we only read from the catalogs
368
- logger.info(`pgConnString: ${pgConnString}`);
369
+ logger.info(`pgConnString: ${redactPgSecrets(pgConnString)}`);
369
370
  const escapedPgConnString = escapeSQL(pgConnString);
370
- logger.info(`Final escaped connection string: ${escapedPgConnString}`);
371
+ logger.info(
372
+ `Final escaped connection string: ${redactPgSecrets(escapedPgConnString)}`,
373
+ );
371
374
  const escapedBucketUrl = escapeSQL(ducklakeConfig.storage.bucketUrl);
372
375
  logger.info(`escapedBucketUrl: ${escapedBucketUrl}`);
373
376
  const attachCommand = `ATTACH OR REPLACE 'ducklake:postgres:${escapedPgConnString}' AS ${dbName} (DATA_PATH '${escapedBucketUrl}', OVERRIDE_DATA_PATH true, READ_ONLY true);`;
374
- logger.info(`Attaching DuckLake database using command: ${attachCommand}`);
377
+ logger.info(
378
+ `Attaching DuckLake database using command: ${redactPgSecrets(attachCommand)}`,
379
+ );
375
380
  try {
376
381
  await connection.runSQL(attachCommand);
377
382
  logger.info(
@@ -272,7 +272,7 @@ function validateConnectionShape(connection: ApiConnection): void {
272
272
  connection.duckdbConnection.attachedDatabases ?? [];
273
273
  if (attached.length === 0) {
274
274
  throw new Error(
275
- "DuckDB connection must have at least one attached database",
275
+ `DuckDB connection "${connection.name}" has no attached databases. Add at least one foreign database (BigQuery, Snowflake, Postgres, GCS, S3, Azure) to attachedDatabases, or remove this connection entirely — each package already gets a per-package DuckDB sandbox named "duckdb" automatically.`,
276
276
  );
277
277
  }
278
278
  }
@@ -359,7 +359,7 @@ export function assembleEnvironmentConnections(
359
359
 
360
360
  if (connection.name === "duckdb") {
361
361
  throw new Error(
362
- "DuckDB connection name cannot be 'duckdb'; it is reserved for Publisher package sandboxes.",
362
+ "Connection name 'duckdb' is reserved for per-package sandboxes. Choose a different name for environment-level DuckDB connections (e.g. 'shared_duckdb').",
363
363
  );
364
364
  }
365
365