@malloy-publisher/server 0.0.198-dev → 0.0.198-dev1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/README.docker.md +135 -20
  2. package/README.md +15 -0
  3. package/build.ts +42 -1
  4. package/dist/app/api-doc.yaml +51 -0
  5. package/dist/app/assets/EnvironmentPage-Dpee_Kn6.js +1 -0
  6. package/dist/app/assets/HomePage-DLRWTNoL.js +1 -0
  7. package/dist/app/assets/MainPage-DsVt5QGM.js +2 -0
  8. package/dist/app/assets/ModelPage-AwAugZ37.js +1 -0
  9. package/dist/app/assets/PackagePage-XQ-EWGTC.js +1 -0
  10. package/dist/app/assets/RouteError-3Mv8JQw7.js +1 -0
  11. package/dist/app/assets/WorkbookPage-DHYYpcYc.js +1 -0
  12. package/dist/app/assets/{core-w79IMXAG.es-Bd0UlzOL.js → core-DfcpQGVP.es-DQggNOdX.js} +14 -14
  13. package/dist/app/assets/{index-C513UodQ.js → index-BUp81Qdm.js} +15 -15
  14. package/dist/app/assets/index-D1pdwrUW.js +1803 -0
  15. package/dist/app/assets/index-Dv5bF4Ii.js +451 -0
  16. package/dist/app/assets/{index.umd-BMeMPq_9.js → index.umd-CQH4LZU8.js} +1 -1
  17. package/dist/app/index.html +2 -3
  18. package/dist/compile_worker.mjs +628 -0
  19. package/dist/default-publisher.config.json +23 -0
  20. package/dist/instrumentation.mjs +36 -38
  21. package/dist/server.mjs +2060 -913
  22. package/package.json +11 -12
  23. package/publisher.config.example.bigquery.json +33 -0
  24. package/publisher.config.example.duckdb.json +23 -0
  25. package/publisher.config.json +1 -11
  26. package/src/compile/compile_pool.spec.ts +227 -0
  27. package/src/compile/compile_pool.ts +729 -0
  28. package/src/compile/compile_worker.ts +683 -0
  29. package/src/compile/protocol.ts +251 -0
  30. package/src/config.spec.ts +306 -0
  31. package/src/config.ts +222 -2
  32. package/src/controller/compile.controller.ts +3 -1
  33. package/src/controller/connection.controller.ts +1 -1
  34. package/src/controller/model.controller.ts +8 -1
  35. package/src/controller/package.controller.ts +70 -29
  36. package/src/controller/query.controller.ts +3 -0
  37. package/src/default-publisher.config.json +23 -0
  38. package/src/errors.spec.ts +42 -0
  39. package/src/errors.ts +21 -0
  40. package/src/health.spec.ts +90 -0
  41. package/src/health.ts +86 -45
  42. package/src/logger.ts +1 -3
  43. package/src/mcp/tools/discovery_tools.ts +6 -2
  44. package/src/mcp/tools/execute_query_tool.ts +12 -0
  45. package/src/path_safety.spec.ts +158 -0
  46. package/src/path_safety.ts +140 -0
  47. package/src/pg_helpers.spec.ts +226 -0
  48. package/src/pg_helpers.ts +129 -0
  49. package/src/server-old.ts +3 -23
  50. package/src/server.ts +49 -0
  51. package/src/service/connection.spec.ts +6 -4
  52. package/src/service/connection.ts +8 -3
  53. package/src/service/connection_config.ts +2 -2
  54. package/src/service/environment.ts +621 -176
  55. package/src/service/environment_admission.spec.ts +180 -0
  56. package/src/service/environment_store.ts +22 -0
  57. package/src/service/filter_integration.spec.ts +110 -0
  58. package/src/service/givens_integration.spec.ts +192 -0
  59. package/src/service/manifest_service.spec.ts +7 -2
  60. package/src/service/manifest_service.ts +8 -2
  61. package/src/service/materialization_service.ts +14 -3
  62. package/src/service/model.spec.ts +105 -0
  63. package/src/service/model.ts +317 -10
  64. package/src/service/model_worker_path.spec.ts +125 -0
  65. package/src/service/package.ts +4 -3
  66. package/src/service/package_memory_governor.spec.ts +173 -0
  67. package/src/service/package_memory_governor.ts +233 -0
  68. package/src/service/package_race.spec.ts +208 -0
  69. package/src/storage/StorageManager.ts +71 -11
  70. package/src/storage/duckdb/schema.ts +41 -0
  71. package/src/utils.ts +11 -0
  72. package/tests/harness/rest_e2e.ts +2 -2
  73. package/tests/integration/concurrent_package/concurrent_package.integration.spec.ts +280 -0
  74. package/tests/integration/legacy_routes/legacy_routes.integration.spec.ts +259 -0
  75. package/tests/unit/duckdb/attached_databases.test.ts +5 -5
  76. package/tests/unit/duckdb/legacy_schema_migration.test.ts +194 -0
  77. package/tests/unit/storage/StorageManager.test.ts +166 -0
  78. package/dist/app/assets/EnvironmentPage-1j6QDWAy.js +0 -1
  79. package/dist/app/assets/HomePage-DMop21VG.js +0 -1
  80. package/dist/app/assets/MainPage-BbE8ETz1.js +0 -2
  81. package/dist/app/assets/ModelPage-D2jvfe3t.js +0 -1
  82. package/dist/app/assets/PackagePage-BbnhGoD3.js +0 -1
  83. package/dist/app/assets/RouteError-D3LGEZ3i.js +0 -1
  84. package/dist/app/assets/WorkbookPage-DttVIj4u.js +0 -1
  85. package/dist/app/assets/index-5K9YjIxF.js +0 -456
  86. package/dist/app/assets/index-DIgzgp69.js +0 -1742
@@ -0,0 +1,208 @@
1
+ import { afterEach, beforeEach, describe, expect, it } from "bun:test";
2
+ import * as fs from "fs/promises";
3
+ import * as os from "os";
4
+ import * as path from "path";
5
+ import { Environment } from "./environment";
6
+
7
+ /**
8
+ * Race-condition regression tests for the package-directory pipeline.
9
+ *
10
+ * Three tests, all deterministic without timing-based flakiness:
11
+ *
12
+ * 1. **Behavioral race repro** — concurrently install (rewrite the
13
+ * package directory) and read (`getModelFileText`); assert no
14
+ * `ENOENT` is observed. On the pre-fix code, the read would fail
15
+ * mid-rewrite. With the per-package mutex now covering both paths,
16
+ * all reads succeed.
17
+ *
18
+ * 2. **Mutex coverage** — manually hold `withPackageLock` and assert
19
+ * that a concurrent reader is pending until released. Pins the
20
+ * invariant that readers actually take the lock.
21
+ *
22
+ * 3. **Download does not block compile** — start an `installPackage`
23
+ * whose downloader never resolves on its own, then assert that
24
+ * `getModelFileText` resolves promptly. This pins the Phase 1 /
25
+ * Phase 2 split — if a future regression accidentally moves the
26
+ * download inside the lock, this test fails.
27
+ */
28
+ describe("package directory race", () => {
29
+ let rootDir: string;
30
+ let envPath: string;
31
+ let fixtureDir: string;
32
+
33
+ const PUBLISHER_JSON = JSON.stringify({
34
+ name: "pkg",
35
+ description: "race-test fixture",
36
+ });
37
+ const MODEL_MALLOY = `source: ones is duckdb.sql("SELECT 1 as x")\n`;
38
+
39
+ async function writeFixture(targetDir: string): Promise<void> {
40
+ await fs.mkdir(targetDir, { recursive: true });
41
+ await fs.writeFile(
42
+ path.join(targetDir, "publisher.json"),
43
+ PUBLISHER_JSON,
44
+ );
45
+ await fs.writeFile(path.join(targetDir, "model.malloy"), MODEL_MALLOY);
46
+ }
47
+
48
+ async function copyDir(src: string, dst: string): Promise<void> {
49
+ await fs.mkdir(dst, { recursive: true });
50
+ await fs.cp(src, dst, { recursive: true });
51
+ }
52
+
53
+ beforeEach(async () => {
54
+ rootDir = await fs.mkdtemp(path.join(os.tmpdir(), "publisher-race-"));
55
+ envPath = path.join(rootDir, "env");
56
+ fixtureDir = path.join(rootDir, "fixture");
57
+ await fs.mkdir(envPath, { recursive: true });
58
+ await writeFixture(fixtureDir);
59
+ });
60
+
61
+ afterEach(async () => {
62
+ await fs.rm(rootDir, { recursive: true, force: true }).catch(() => {});
63
+ });
64
+
65
+ it("(A) concurrent installs and reads never observe a half-rewritten tree", async () => {
66
+ const env = await Environment.create("testEnv", envPath, []);
67
+
68
+ // Initial install to populate the canonical path.
69
+ await env.installPackage("pkg", (stagingPath) =>
70
+ copyDir(fixtureDir, stagingPath),
71
+ );
72
+
73
+ const ITERATIONS = 30;
74
+ const errors: unknown[] = [];
75
+ let mutatorDone = false;
76
+
77
+ // Mutator loop: re-install the package over and over. Each iteration
78
+ // exercises the full Phase 1 (no-lock) + Phase 2 (locked) swap.
79
+ const mutator = (async () => {
80
+ try {
81
+ for (let i = 0; i < ITERATIONS; i++) {
82
+ try {
83
+ await env.installPackage("pkg", (stagingPath) =>
84
+ copyDir(fixtureDir, stagingPath),
85
+ );
86
+ } catch (err) {
87
+ errors.push({ kind: "install", err });
88
+ }
89
+ }
90
+ } finally {
91
+ mutatorDone = true;
92
+ }
93
+ })();
94
+
95
+ // Reader loop: hammer `getModelFileText` while installs run. On the
96
+ // pre-fix code (no lock on reads), the read would sometimes hit ENOENT
97
+ // because the canonical dir was momentarily missing during the rename
98
+ // window. With the per-package mutex covering reads as well, this
99
+ // window is never observable.
100
+ const reader = (async () => {
101
+ while (!mutatorDone) {
102
+ try {
103
+ const text = await env.getModelFileText("pkg", "model.malloy");
104
+ expect(text).toBe(MODEL_MALLOY);
105
+ } catch (err) {
106
+ errors.push({ kind: "read", err });
107
+ }
108
+ }
109
+ })();
110
+
111
+ await mutator;
112
+ await reader;
113
+
114
+ // Any error here means the lock wasn't actually covering one of the
115
+ // sides — that's the regression we're guarding against.
116
+ if (errors.length > 0) {
117
+ throw new Error(
118
+ `Observed ${errors.length} race-window error(s): ${JSON.stringify(
119
+ errors.slice(0, 3),
120
+ (_k, v) => (v instanceof Error ? `${v.name}: ${v.message}` : v),
121
+ )}`,
122
+ );
123
+ }
124
+ }, 60_000);
125
+
126
+ it("(B) compile-time disk reads queue behind withPackageLock", async () => {
127
+ const env = await Environment.create("testEnv", envPath, []);
128
+ await env.installPackage("pkg", (stagingPath) =>
129
+ copyDir(fixtureDir, stagingPath),
130
+ );
131
+
132
+ const lockEntered = defer<void>();
133
+ const releaseLock = defer<void>();
134
+
135
+ // Hold the per-package mutex from "outside" — simulates a mutator
136
+ // (install / delete / writePackageManifest) being in flight.
137
+ const lockHolder = env.withPackageLock("pkg", async () => {
138
+ lockEntered.resolve();
139
+ await releaseLock.promise;
140
+ });
141
+
142
+ await lockEntered.promise;
143
+
144
+ // While the lock is held, the reader must NOT make progress.
145
+ const readPromise = env.getModelFileText("pkg", "model.malloy");
146
+ const TIMEOUT_SENTINEL = Symbol("timeout");
147
+ const raced = await Promise.race([
148
+ readPromise,
149
+ new Promise<typeof TIMEOUT_SENTINEL>((resolve) =>
150
+ setTimeout(() => resolve(TIMEOUT_SENTINEL), 50),
151
+ ),
152
+ ]);
153
+ expect(raced).toBe(TIMEOUT_SENTINEL);
154
+
155
+ // Release the lock; the reader must now complete.
156
+ releaseLock.resolve();
157
+ await lockHolder;
158
+ const text = await readPromise;
159
+ expect(text).toBe(MODEL_MALLOY);
160
+ }, 15_000);
161
+
162
+ it("(C) a slow download does not block concurrent reads", async () => {
163
+ const env = await Environment.create("testEnv", envPath, []);
164
+ // Initial install to make the package present.
165
+ await env.installPackage("pkg", (stagingPath) =>
166
+ copyDir(fixtureDir, stagingPath),
167
+ );
168
+
169
+ const downloadGate = defer<void>();
170
+
171
+ // Kick off an install whose Phase 1 downloader stalls until we open
172
+ // the gate. Phase 2 (the brief locked swap) cannot run until then.
173
+ const slowInstall = env.installPackage("pkg", async (stagingPath) => {
174
+ await downloadGate.promise;
175
+ await copyDir(fixtureDir, stagingPath);
176
+ });
177
+
178
+ // The reader must resolve well before we open the gate, proving the
179
+ // per-package mutex is NOT held during Phase 1.
180
+ const readStart = Date.now();
181
+ const text = await env.getModelFileText("pkg", "model.malloy");
182
+ const readElapsedMs = Date.now() - readStart;
183
+
184
+ expect(text).toBe(MODEL_MALLOY);
185
+ // 1s is generous; in practice this resolves in single-digit ms.
186
+ expect(readElapsedMs).toBeLessThan(1_000);
187
+
188
+ // Now open the gate and let the install complete.
189
+ downloadGate.resolve();
190
+ await slowInstall;
191
+ }, 15_000);
192
+ });
193
+
194
+ interface Deferred<T> {
195
+ promise: Promise<T>;
196
+ resolve: (value: T) => void;
197
+ reject: (reason?: unknown) => void;
198
+ }
199
+
200
+ function defer<T>(): Deferred<T> {
201
+ let resolve!: (value: T) => void;
202
+ let reject!: (reason?: unknown) => void;
203
+ const promise = new Promise<T>((res, rej) => {
204
+ resolve = res;
205
+ reject = rej;
206
+ });
207
+ return { promise, resolve, reject };
208
+ }
@@ -1,5 +1,13 @@
1
+ import { Mutex } from "async-mutex";
1
2
  import * as crypto from "crypto";
3
+ import { ConnectionAuthError } from "../errors";
2
4
  import { logger } from "../logger";
5
+ import {
6
+ handlePgAttachError,
7
+ pgConnectTimeoutSeconds,
8
+ redactPgSecrets,
9
+ withPgConnectTimeout,
10
+ } from "../pg_helpers";
3
11
  import {
4
12
  DatabaseConnection,
5
13
  ManifestStore,
@@ -78,6 +86,13 @@ export class StorageManager {
78
86
  */
79
87
  private attachedCatalogs = new Map<string, string>();
80
88
 
89
+ // Serializes DuckLake catalog attaches. Concurrent POST /environments calls
90
+ // hitting the same DuckDB connection would otherwise race on extension
91
+ // autoload (httpfs/azure/etc.), where multiple connections download the
92
+ // extension to `.tmp-<uuid>` files in parallel; only one wins the rename
93
+ // and the rest crash with "Could not remove file ... No such file or directory".
94
+ private duckLakeAttachMutex: Mutex = new Mutex();
95
+
81
96
  private config: StorageConfig;
82
97
 
83
98
  constructor(config: StorageConfig) {
@@ -141,14 +156,18 @@ export class StorageManager {
141
156
  }
142
157
 
143
158
  const key = configKey(config);
144
- let catalogName = this.attachedCatalogs.get(key);
145
- if (!catalogName) {
146
- // Catalog name derived from the config so multiple configs can coexist as
147
- // separate ATTACHments without colliding on the name.
148
- catalogName = catalogNameForConfig(config);
149
- await this.attachDuckLakeCatalog(config, catalogName);
150
- this.attachedCatalogs.set(key, catalogName);
151
- }
159
+ const catalogName = await this.duckLakeAttachMutex.runExclusive(
160
+ async () => {
161
+ const existing = this.attachedCatalogs.get(key);
162
+ if (existing) return existing;
163
+ // Catalog name derived from the config so multiple configs can coexist as
164
+ // separate ATTACHments without colliding on the name.
165
+ const name = catalogNameForConfig(config);
166
+ await this.attachDuckLakeCatalog(config, name);
167
+ this.attachedCatalogs.set(key, name);
168
+ return name;
169
+ },
170
+ );
152
171
 
153
172
  const store = new DuckLakeManifestStore(
154
173
  this.duckDbConnection,
@@ -178,12 +197,31 @@ export class StorageManager {
178
197
  await connection.run("INSTALL postgres; LOAD postgres;");
179
198
  }
180
199
 
181
- const escapedCatalogUrl = escapeSQL(config.catalogUrl);
200
+ // For PG-backed catalogs, inject connect_timeout so a wedged libpq
201
+ // handshake fails the caller in seconds rather than hanging the
202
+ // worker until the K8s liveness probe trips (the 2026-05 incident).
203
+ // Non-PG catalogs (e.g. SQLite, MySQL) pass through unchanged.
204
+ const catalogUrl = isPostgres
205
+ ? withPgConnectTimeout(config.catalogUrl, pgConnectTimeoutSeconds())
206
+ : config.catalogUrl;
207
+
208
+ const escapedCatalogUrl = escapeSQL(catalogUrl);
182
209
  const escapedDataPath = escapeSQL(config.dataPath);
183
210
  const isCloudStorage =
184
211
  config.dataPath.startsWith("gs://") ||
185
212
  config.dataPath.startsWith("s3://");
186
213
 
214
+ // Pre-install httpfs explicitly so the ATTACH below doesn't trigger
215
+ // DuckDB's autoloader. The autoloader downloads extensions to
216
+ // `<ext>.tmp-<uuid>` and races when multiple connections within the
217
+ // same process hit it concurrently — losers fail with
218
+ // "Could not remove file ... No such file or directory" on cleanup
219
+ // of their .tmp file. INSTALL/LOAD here is idempotent and serialized
220
+ // by the caller's mutex.
221
+ if (isCloudStorage) {
222
+ await connection.run("INSTALL httpfs; LOAD httpfs;");
223
+ }
224
+
187
225
  let attachCmd = `ATTACH 'ducklake:${escapedCatalogUrl}' AS ${catalogName}`;
188
226
  const attachOpts: string[] = [
189
227
  `DATA_PATH '${escapedDataPath}'`,
@@ -193,13 +231,35 @@ export class StorageManager {
193
231
  // sidestepping object-storage auth issues entirely for this path.
194
232
  "DATA_INLINING_ROW_LIMIT 100000",
195
233
  ];
234
+
196
235
  if (isCloudStorage) {
197
236
  attachOpts.push("OVERRIDE_DATA_PATH true");
198
237
  }
199
238
  attachCmd += ` (${attachOpts.join(", ")});`;
200
239
 
201
- logger.info(`Attaching DuckLake manifest catalog: ${attachCmd}`);
202
- await connection.run(attachCmd);
240
+ logger.info(
241
+ `Attaching DuckLake manifest catalog: ${redactPgSecrets(attachCmd)}`,
242
+ );
243
+ try {
244
+ await connection.run(attachCmd);
245
+ } catch (error) {
246
+ const outcome = handlePgAttachError(
247
+ error,
248
+ `DuckLake catalog credentials rejected for ${catalogName}`,
249
+ );
250
+ if (outcome.action === "swallow") {
251
+ logger.info(
252
+ `DuckLake catalog ${catalogName} is already attached, skipping`,
253
+ );
254
+ return;
255
+ }
256
+ if (outcome.error instanceof ConnectionAuthError) {
257
+ logger.warn("DuckLake catalog credentials rejected", {
258
+ catalogName,
259
+ });
260
+ }
261
+ throw outcome.error;
262
+ }
203
263
  }
204
264
 
205
265
  getRepository(): ResourceRepository {
@@ -17,6 +17,15 @@ export async function initializeSchema(
17
17
  );
18
18
  await dropAllTables(db);
19
19
  } else {
20
+ // TODO: Remove this during projects cleanup
21
+ // If a pre-rename `projects` schema is on disk, the new
22
+ // CREATE TABLE IF NOT EXISTS pass below would silently leave child
23
+ // tables on the old `project_id` column and the first query against
24
+ // `environment_id` would crash. Drop the legacy tables (with a loud
25
+ // warning) so the fresh schema can be created cleanly. This is
26
+ // destructive — operators upgrading should re-create their environments
27
+ // and packages via the API after the upgrade.
28
+ await dropLegacyProjectSchema(db);
20
29
  logger.info("Creating database schema for the first time...");
21
30
  }
22
31
 
@@ -125,6 +134,38 @@ export async function initializeSchema(
125
134
  );
126
135
  }
127
136
 
137
+ // TODO: Remove this during projects cleanup
138
+ // Tables in the pre-rename schema, listed children-first so DROP order
139
+ // satisfies foreign-key dependencies on the legacy `projects` table.
140
+ const LEGACY_TABLES_DROP_ORDER = [
141
+ "build_manifests",
142
+ "materializations",
143
+ "packages",
144
+ "connections",
145
+ "projects",
146
+ ] as const;
147
+
148
+ async function dropLegacyProjectSchema(db: DuckDBConnection): Promise<void> {
149
+ const legacy = await db.all<{ name: string }>(
150
+ "SELECT name FROM sqlite_master WHERE type='table' AND name='projects'",
151
+ );
152
+ if (!legacy || legacy.length === 0) {
153
+ return;
154
+ }
155
+
156
+ logger.warn(
157
+ "Detected legacy 'projects' schema. Dropping legacy tables; existing environments/packages/connections/materializations data will be lost. Re-create them via the API after upgrade.",
158
+ );
159
+
160
+ for (const table of LEGACY_TABLES_DROP_ORDER) {
161
+ try {
162
+ await db.run(`DROP TABLE IF EXISTS ${table}`);
163
+ } catch (err) {
164
+ logger.warn(`Failed to drop legacy table ${table}:`, err);
165
+ }
166
+ }
167
+ }
168
+
128
169
  async function dropAllTables(db: DuckDBConnection): Promise<void> {
129
170
  const tables = [
130
171
  "build_manifests",
package/src/utils.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import { URLReader } from "@malloydata/malloy";
2
2
  import * as fs from "fs";
3
+ import * as path from "path";
3
4
  import { fileURLToPath } from "url";
4
5
 
5
6
  export const URL_READER: URLReader = {
@@ -11,3 +12,13 @@ export const URL_READER: URLReader = {
11
12
  return fs.promises.readFile(path, "utf8");
12
13
  },
13
14
  };
15
+
16
+ /**
17
+ * Skip dotfiles/dotdirs (.vscode, .git, .DS_Store, etc.) when walking a
18
+ * package tree. These come from editors/VCS, never contain Malloy models
19
+ * or databases, and have been a source of spurious ENOENTs when their
20
+ * contents disappear mid-scan.
21
+ */
22
+ export function ignoreDotfiles(file: string): boolean {
23
+ return path.basename(file).startsWith(".");
24
+ }
@@ -12,8 +12,8 @@ export interface RestE2EEnv {
12
12
  * reuses the cached Express app and binds on an OS-assigned port
13
13
  * to avoid collisions.
14
14
  *
15
- * Callers are responsible for creating any test-specific projects
16
- * via the REST API (POST /api/v0/projects) and cleaning them up.
15
+ * Callers are responsible for creating any test-specific environments
16
+ * via the REST API (POST /api/v0/environments) and cleaning them up.
17
17
  */
18
18
  export async function startRestE2E(): Promise<
19
19
  RestE2EEnv & { stop(): Promise<void> }