@malloy-publisher/server 0.0.198-dev → 0.0.198-dev1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/README.docker.md +135 -20
  2. package/README.md +15 -0
  3. package/build.ts +42 -1
  4. package/dist/app/api-doc.yaml +51 -0
  5. package/dist/app/assets/EnvironmentPage-Dpee_Kn6.js +1 -0
  6. package/dist/app/assets/HomePage-DLRWTNoL.js +1 -0
  7. package/dist/app/assets/MainPage-DsVt5QGM.js +2 -0
  8. package/dist/app/assets/ModelPage-AwAugZ37.js +1 -0
  9. package/dist/app/assets/PackagePage-XQ-EWGTC.js +1 -0
  10. package/dist/app/assets/RouteError-3Mv8JQw7.js +1 -0
  11. package/dist/app/assets/WorkbookPage-DHYYpcYc.js +1 -0
  12. package/dist/app/assets/{core-w79IMXAG.es-Bd0UlzOL.js → core-DfcpQGVP.es-DQggNOdX.js} +14 -14
  13. package/dist/app/assets/{index-C513UodQ.js → index-BUp81Qdm.js} +15 -15
  14. package/dist/app/assets/index-D1pdwrUW.js +1803 -0
  15. package/dist/app/assets/index-Dv5bF4Ii.js +451 -0
  16. package/dist/app/assets/{index.umd-BMeMPq_9.js → index.umd-CQH4LZU8.js} +1 -1
  17. package/dist/app/index.html +2 -3
  18. package/dist/compile_worker.mjs +628 -0
  19. package/dist/default-publisher.config.json +23 -0
  20. package/dist/instrumentation.mjs +36 -38
  21. package/dist/server.mjs +2060 -913
  22. package/package.json +11 -12
  23. package/publisher.config.example.bigquery.json +33 -0
  24. package/publisher.config.example.duckdb.json +23 -0
  25. package/publisher.config.json +1 -11
  26. package/src/compile/compile_pool.spec.ts +227 -0
  27. package/src/compile/compile_pool.ts +729 -0
  28. package/src/compile/compile_worker.ts +683 -0
  29. package/src/compile/protocol.ts +251 -0
  30. package/src/config.spec.ts +306 -0
  31. package/src/config.ts +222 -2
  32. package/src/controller/compile.controller.ts +3 -1
  33. package/src/controller/connection.controller.ts +1 -1
  34. package/src/controller/model.controller.ts +8 -1
  35. package/src/controller/package.controller.ts +70 -29
  36. package/src/controller/query.controller.ts +3 -0
  37. package/src/default-publisher.config.json +23 -0
  38. package/src/errors.spec.ts +42 -0
  39. package/src/errors.ts +21 -0
  40. package/src/health.spec.ts +90 -0
  41. package/src/health.ts +86 -45
  42. package/src/logger.ts +1 -3
  43. package/src/mcp/tools/discovery_tools.ts +6 -2
  44. package/src/mcp/tools/execute_query_tool.ts +12 -0
  45. package/src/path_safety.spec.ts +158 -0
  46. package/src/path_safety.ts +140 -0
  47. package/src/pg_helpers.spec.ts +226 -0
  48. package/src/pg_helpers.ts +129 -0
  49. package/src/server-old.ts +3 -23
  50. package/src/server.ts +49 -0
  51. package/src/service/connection.spec.ts +6 -4
  52. package/src/service/connection.ts +8 -3
  53. package/src/service/connection_config.ts +2 -2
  54. package/src/service/environment.ts +621 -176
  55. package/src/service/environment_admission.spec.ts +180 -0
  56. package/src/service/environment_store.ts +22 -0
  57. package/src/service/filter_integration.spec.ts +110 -0
  58. package/src/service/givens_integration.spec.ts +192 -0
  59. package/src/service/manifest_service.spec.ts +7 -2
  60. package/src/service/manifest_service.ts +8 -2
  61. package/src/service/materialization_service.ts +14 -3
  62. package/src/service/model.spec.ts +105 -0
  63. package/src/service/model.ts +317 -10
  64. package/src/service/model_worker_path.spec.ts +125 -0
  65. package/src/service/package.ts +4 -3
  66. package/src/service/package_memory_governor.spec.ts +173 -0
  67. package/src/service/package_memory_governor.ts +233 -0
  68. package/src/service/package_race.spec.ts +208 -0
  69. package/src/storage/StorageManager.ts +71 -11
  70. package/src/storage/duckdb/schema.ts +41 -0
  71. package/src/utils.ts +11 -0
  72. package/tests/harness/rest_e2e.ts +2 -2
  73. package/tests/integration/concurrent_package/concurrent_package.integration.spec.ts +280 -0
  74. package/tests/integration/legacy_routes/legacy_routes.integration.spec.ts +259 -0
  75. package/tests/unit/duckdb/attached_databases.test.ts +5 -5
  76. package/tests/unit/duckdb/legacy_schema_migration.test.ts +194 -0
  77. package/tests/unit/storage/StorageManager.test.ts +166 -0
  78. package/dist/app/assets/EnvironmentPage-1j6QDWAy.js +0 -1
  79. package/dist/app/assets/HomePage-DMop21VG.js +0 -1
  80. package/dist/app/assets/MainPage-BbE8ETz1.js +0 -2
  81. package/dist/app/assets/ModelPage-D2jvfe3t.js +0 -1
  82. package/dist/app/assets/PackagePage-BbnhGoD3.js +0 -1
  83. package/dist/app/assets/RouteError-D3LGEZ3i.js +0 -1
  84. package/dist/app/assets/WorkbookPage-DttVIj4u.js +0 -1
  85. package/dist/app/assets/index-5K9YjIxF.js +0 -456
  86. package/dist/app/assets/index-DIgzgp69.js +0 -1742
@@ -1,6 +1,7 @@
1
- import type { LogMessage } from "@malloydata/malloy";
1
+ import type { GivenValue, LogMessage } from "@malloydata/malloy";
2
2
  import { MalloyError, Runtime } from "@malloydata/malloy";
3
3
  import { Mutex } from "async-mutex";
4
+ import crypto from "crypto";
4
5
  import * as fs from "fs";
5
6
  import * as path from "path";
6
7
  import { components } from "../api";
@@ -9,8 +10,16 @@ import {
9
10
  ConnectionNotFoundError,
10
11
  EnvironmentNotFoundError,
11
12
  PackageNotFoundError,
13
+ ServiceUnavailableError,
12
14
  } from "../errors";
13
15
  import { logger } from "../logger";
16
+ import {
17
+ assertSafeEnvironmentPath,
18
+ assertSafePackageName,
19
+ assertSafeRelativeModelPath,
20
+ safeJoinUnderRoot,
21
+ } from "../path_safety";
22
+ import { BuildManifest } from "../storage/DatabaseInterface";
14
23
  import { URL_READER } from "../utils";
15
24
  import {
16
25
  buildEnvironmentMalloyConfig,
@@ -20,6 +29,23 @@ import {
20
29
  } from "./connection";
21
30
  import { ApiConnection } from "./model";
22
31
  import { Package } from "./package";
32
+ import type { PackageMemoryGovernor } from "./package_memory_governor";
33
+
34
+ /**
35
+ * Sibling dirs under `environmentPath` used by the install/delete pipeline so
36
+ * that long downloads do not hold the per-package mutex.
37
+ *
38
+ * - `.staging/<pkg>-<uuid>/` — a download in progress. Renamed to the
39
+ * canonical path under the lock once complete.
40
+ * - `.retired/<pkg>-<uuid>/` — the previous canonical tree, atomically
41
+ * renamed out of the way during a swap or delete. `fs.rm`'d asynchronously
42
+ * after the lock is released.
43
+ *
44
+ * Both names start with a `.` so the package walkers (which use
45
+ * {@link ignoreDotfiles}) skip them.
46
+ */
47
+ const STAGING_DIR_NAME = ".staging";
48
+ const RETIRED_DIR_NAME = ".retired";
23
49
 
24
50
  export enum PackageStatus {
25
51
  LOADING = "loading",
@@ -60,6 +86,12 @@ export class Environment {
60
86
  private environmentPath: string;
61
87
  private environmentName: string;
62
88
  public metadata: ApiEnvironment;
89
+ // The shared memory governor that consults process RSS. Optional —
90
+ // when null the gate is a no-op and the environment behaves exactly
91
+ // like it did before the governor was introduced. Set by
92
+ // EnvironmentStore.setMemoryGovernor at server start so we keep the
93
+ // governor as the single owner of the back-pressure boolean.
94
+ private memoryGovernor: PackageMemoryGovernor | null = null;
63
95
 
64
96
  constructor(
65
97
  environmentName: string,
@@ -67,6 +99,10 @@ export class Environment {
67
99
  malloyConfig: EnvironmentMalloyConfig,
68
100
  apiConnections: InternalConnection[],
69
101
  ) {
102
+ // Sanitizer barrier: every downstream `path.join(this.environmentPath,
103
+ // …)` site (including the static `sweepStaleInstallDirs` sweep) gets a
104
+ // value that has cleared an allowlist check at the gate.
105
+ assertSafeEnvironmentPath(environmentPath);
70
106
  this.environmentName = environmentName;
71
107
  this.environmentPath = environmentPath;
72
108
  this.malloyConfig = malloyConfig;
@@ -166,6 +202,14 @@ export class Environment {
166
202
  malloyConfig.apiConnections,
167
203
  );
168
204
 
205
+ // Best-effort: a previous run may have crashed mid-install or
206
+ // mid-delete and left orphan dirs under .staging/ or .retired/.
207
+ // Run against the validated constructor argument so the sink path
208
+ // here does NOT route through `this` (which CodeQL conservatively
209
+ // treats as tainted because other methods on this class touch
210
+ // request-derived `packageName` values).
211
+ await Environment.sweepStaleInstallDirs(environmentPath);
212
+
169
213
  return environment;
170
214
  }
171
215
 
@@ -194,74 +238,95 @@ export class Environment {
194
238
  modelName: string,
195
239
  source: string,
196
240
  includeSql: boolean = false,
241
+ givens?: Record<string, GivenValue>,
197
242
  ): Promise<{ problems: LogMessage[]; sql?: string }> {
198
- // Place the virtual file in the model's directory so relative imports resolve correctly.
199
- const modelDir = path.dirname(
200
- path.join(this.environmentPath, packageName, modelName),
201
- );
202
- const virtualUri = `file://${path.join(modelDir, "__compile_check.malloy")}`;
203
- const virtualUrl = new URL(virtualUri);
243
+ assertSafePackageName(packageName);
244
+ assertSafeRelativeModelPath(modelName);
245
+ // Hold the per-package mutex for the duration of every disk read —
246
+ // both the explicit `fs.readFile(modelPath)` below and the implicit
247
+ // import resolution that `runtime.loadModel` does through the URL
248
+ // reader. This is mutually exclusive with `installPackage`'s Phase 2
249
+ // rename swap and with `deletePackage`'s rename-to-retired, so a
250
+ // compile can never observe a half-rewritten tree. The slow Phase 1
251
+ // download happens outside this lock, so a multi-second clone does
252
+ // not block compiles.
253
+ return this.withPackageLock(packageName, async () => {
254
+ // Sanitized join: input segments are allowlisted above; the
255
+ // resolve-and-contain check here is the secondary guard CodeQL's
256
+ // path-injection sanitizer recognises.
257
+ const modelPath = safeJoinUnderRoot(
258
+ this.environmentPath,
259
+ packageName,
260
+ modelName,
261
+ );
262
+ // Place the virtual file in the model's directory so relative imports resolve correctly.
263
+ const modelDir = path.dirname(modelPath);
264
+ const virtualUri = `file://${path.join(modelDir, "__compile_check.malloy")}`;
265
+ const virtualUrl = new URL(virtualUri);
266
+
267
+ // Read the full model file so the submitted source inherits the model's
268
+ // complete namespace — imports, source definitions, queries, etc.
269
+ let modelContent = "";
270
+ try {
271
+ modelContent = await fs.promises.readFile(modelPath, "utf8");
272
+ } catch {
273
+ // If the model file can't be read, proceed with empty content
274
+ // and let compilation surface any errors naturally.
275
+ }
276
+ const fullSource = modelContent
277
+ ? `${modelContent}\n${source}`
278
+ : source;
279
+
280
+ // Create a URL Reader that serves the source string for the virtual file,
281
+ // but falls back to the disk for everything else (imports).
282
+ const interceptingReader = {
283
+ readURL: async (url: URL) => {
284
+ if (url.toString() === virtualUri) {
285
+ return fullSource;
286
+ }
287
+ return URL_READER.readURL(url);
288
+ },
289
+ };
204
290
 
205
- // Read the full model file so the submitted source inherits the model's
206
- // complete namespace imports, source definitions, queries, etc.
207
- const modelPath = path.join(this.environmentPath, packageName, modelName);
208
- let modelContent = "";
209
- try {
210
- modelContent = await fs.promises.readFile(modelPath, "utf8");
211
- } catch {
212
- // If the model file can't be read, proceed with empty content
213
- // and let compilation surface any errors naturally.
214
- }
215
- const fullSource = modelContent ? `${modelContent}\n${source}` : source;
216
-
217
- // Create a URL Reader that serves the source string for the virtual file,
218
- // but falls back to the disk for everything else (imports).
219
- const interceptingReader = {
220
- readURL: async (url: URL) => {
221
- if (url.toString() === virtualUri) {
222
- return fullSource;
223
- }
224
- return URL_READER.readURL(url);
225
- },
226
- };
291
+ // Use the locked variant we already hold the per-package mutex.
292
+ const pkg = await this._loadOrGetPackageLocked(packageName);
227
293
 
228
- const pkg = await this.getPackage(packageName);
294
+ // Initialize Runtime with the package's active MalloyConfig so compile
295
+ // checks see the same package-scoped duckdb as execution. This runtime
296
+ // borrows the package config; the package/environment lifecycle owns release.
297
+ const runtime = new Runtime({
298
+ urlReader: interceptingReader,
299
+ config: pkg.getMalloyConfig(),
300
+ });
229
301
 
230
- // Initialize Runtime with the package's active MalloyConfig so compile
231
- // checks see the same package-scoped duckdb as execution. This runtime
232
- // borrows the package config; the package/environment lifecycle owns release.
233
- const runtime = new Runtime({
234
- urlReader: interceptingReader,
235
- config: pkg.getMalloyConfig(),
236
- });
302
+ // Attempt to compile
303
+ try {
304
+ const modelMaterializer = runtime.loadModel(virtualUrl);
305
+ const model = await modelMaterializer.getModel();
237
306
 
238
- // Attempt to compile
239
- try {
240
- const modelMaterializer = runtime.loadModel(virtualUrl);
241
- const model = await modelMaterializer.getModel();
242
-
243
- // If includeSql is requested and compilation succeeded, attempt to extract SQL
244
- let sql: string | undefined;
245
- if (includeSql) {
246
- try {
247
- const queryMaterializer = modelMaterializer.loadFinalQuery();
248
- sql = await queryMaterializer.getSQL();
249
- } catch {
250
- // Source may not contain a runnable query (e.g. only source definitions),
251
- // in which case we simply omit the sql field.
307
+ // If includeSql is requested and compilation succeeded, attempt to extract SQL
308
+ let sql: string | undefined;
309
+ if (includeSql) {
310
+ try {
311
+ const queryMaterializer = modelMaterializer.loadFinalQuery();
312
+ sql = await queryMaterializer.getSQL({ givens });
313
+ } catch {
314
+ // Source may not contain a runnable query (e.g. only source definitions),
315
+ // in which case we simply omit the sql field.
316
+ }
252
317
  }
253
- }
254
318
 
255
- // If successful, return any non-fatal warnings
256
- return { problems: model.problems, sql };
257
- } catch (error) {
258
- // If parsing/compilation fails, return the errors
259
- if (error instanceof MalloyError) {
260
- return { problems: error.problems };
319
+ // If successful, return any non-fatal warnings
320
+ return { problems: model.problems, sql };
321
+ } catch (error) {
322
+ // If parsing/compilation fails, return the errors
323
+ if (error instanceof MalloyError) {
324
+ return { problems: error.problems };
325
+ }
326
+ // If it's a system error (e.g. file not found), throw it up
327
+ throw error;
261
328
  }
262
- // If it's a system error (e.g. file not found), throw it up
263
- throw error;
264
- }
329
+ });
265
330
  }
266
331
 
267
332
  public listApiConnections(): ApiConnection[] {
@@ -389,80 +454,227 @@ export class Environment {
389
454
  }
390
455
  }
391
456
 
457
+ /**
458
+ * One mutex per package name; never replace after create — replacing
459
+ * would allow two loads of the same package to run in parallel and
460
+ * race on the canonical directory.
461
+ *
462
+ * `deletePackage` intentionally leaves the entry behind: a
463
+ * subsequent re-install must serialize against any straggling
464
+ * readers from the deleted generation that are still inside
465
+ * `withPackageLock`. The map therefore grows by the count of
466
+ * *distinct* package names the environment has ever served, not by
467
+ * install churn, so for the publisher's expected workload
468
+ * (config-declared packages, occasional ad-hoc additions) this is
469
+ * bounded in practice. Long-lived deployments that create and
470
+ * delete unique package names indefinitely would need an explicit
471
+ * sweep; we'll add one if/when that pattern appears.
472
+ */
473
+ private getOrCreatePackageMutex(packageName: string): Mutex {
474
+ let packageMutex = this.packageMutexes.get(packageName);
475
+ if (packageMutex === undefined) {
476
+ packageMutex = new Mutex();
477
+ this.packageMutexes.set(packageName, packageMutex);
478
+ }
479
+ return packageMutex;
480
+ }
481
+
482
+ /**
483
+ * Run `fn` while holding the per-package mutex. This is the single
484
+ * synchronization primitive that protects a package directory: every
485
+ * code path that mutates `{environmentPath}/{packageName}/` or reads
486
+ * from disk under it must serialize through this lock. See the lock
487
+ * ordering note above the `packageMutexes` field for the wider
488
+ * invariant.
489
+ *
490
+ * `async-mutex` is **not reentrant** — `fn` must not call any other
491
+ * method that calls `withPackageLock` on the same package, or it will
492
+ * deadlock. Use the `_xxxLocked` variants below in that case.
493
+ */
494
+ public async withPackageLock<T>(
495
+ packageName: string,
496
+ fn: () => Promise<T>,
497
+ ): Promise<T> {
498
+ assertSafePackageName(packageName);
499
+ return this.getOrCreatePackageMutex(packageName).runExclusive(fn);
500
+ }
501
+
502
+ private allocateStagingPath(packageName: string): string {
503
+ return safeJoinUnderRoot(
504
+ this.environmentPath,
505
+ STAGING_DIR_NAME,
506
+ `${packageName}-${crypto.randomUUID()}`,
507
+ );
508
+ }
509
+
510
+ private allocateRetiredPath(packageName: string): string {
511
+ return safeJoinUnderRoot(
512
+ this.environmentPath,
513
+ RETIRED_DIR_NAME,
514
+ `${packageName}-${crypto.randomUUID()}`,
515
+ );
516
+ }
517
+
518
+ /**
519
+ * Best-effort sweep of `.staging/` and `.retired/` left over from a
520
+ * previous run (crash, OOM, etc). Safe because both dirs are managed
521
+ * exclusively by `installPackage` / `deletePackage`; no in-flight
522
+ * operation in this process can be using them yet.
523
+ *
524
+ * Static + path-as-parameter on purpose: the sink path here must
525
+ * derive from the validated factory argument, not from `this`,
526
+ * because CodeQL's path-injection query conservatively treats every
527
+ * field on this class as tainted (other methods on the same class
528
+ * receive request-derived `packageName` values).
529
+ */
530
+ public static async sweepStaleInstallDirs(
531
+ environmentPath: string,
532
+ ): Promise<void> {
533
+ assertSafeEnvironmentPath(environmentPath);
534
+ for (const dirName of [STAGING_DIR_NAME, RETIRED_DIR_NAME]) {
535
+ const dir = safeJoinUnderRoot(environmentPath, dirName);
536
+ // Inline sanitizer barriers in the precise shape CodeQL's
537
+ // `js/path-injection` query recognises (regex-test +
538
+ // `indexOf("..") !== -1` guard) so the sink right below is
539
+ // covered even when the call chain feeding `environmentPath`
540
+ // is taint-tracked from an HTTP request handler.
541
+ if (dir.indexOf("..") !== -1) continue;
542
+ if (path.basename(dir) !== dirName) continue;
543
+ try {
544
+ await fs.promises.rm(dir, { recursive: true, force: true });
545
+ } catch (err) {
546
+ logger.warn(`Failed to sweep stale ${dirName} dir at ${dir}`, {
547
+ error: err,
548
+ });
549
+ }
550
+ }
551
+ }
552
+
553
+ /**
554
+ * Attach (or detach with `null`) the memory governor that gates new
555
+ * package allocations. The single instance is owned by the
556
+ * EnvironmentStore and propagated to every Environment so the
557
+ * back-pressure decision is process-wide.
558
+ */
559
+ public setMemoryGovernor(governor: PackageMemoryGovernor | null): void {
560
+ this.memoryGovernor = governor;
561
+ }
562
+
563
+ /**
564
+ * Choke-point check called from every code path that would allocate
565
+ * a *new* package into the in-memory map (lazy load on cache miss,
566
+ * explicit reload, `addPackage`). Throws HTTP 503 when the governor
567
+ * is back-pressured; cheap no-op when the governor is unset or
568
+ * happy.
569
+ *
570
+ * `allowAdmission` is the documented opt-out for read paths that
571
+ * genuinely cannot tolerate 503s. None of the current callers set
572
+ * it; the parameter exists so a future caller (e.g. a
573
+ * health/warmup probe) can self-document its bypass intent.
574
+ */
575
+ private assertCanAdmitNewPackage(
576
+ packageName: string,
577
+ reason: string,
578
+ allowAdmission: boolean,
579
+ ): void {
580
+ if (allowAdmission) return;
581
+ if (!this.memoryGovernor?.isBackpressured()) return;
582
+ throw new ServiceUnavailableError(
583
+ `Publisher is under memory pressure and cannot ${reason} (package "${packageName}", environment "${this.environmentName}"). Retry after the server's memory usage drops below the configured low-water mark.`,
584
+ );
585
+ }
586
+
392
587
  public async getPackage(
393
588
  packageName: string,
394
589
  reload: boolean = false,
590
+ options: { allowAdmission?: boolean } = {},
395
591
  ): Promise<Package> {
396
- // Check if package is already loaded first
592
+ assertSafePackageName(packageName);
593
+ // Fast-path: serve from cache without acquiring the lock. Safe because
594
+ // `Package` references are immutable; the disk-reading methods that
595
+ // actually need protection (compileSource, getModelFileText,
596
+ // reloadAllModelsForPackage, ...) acquire the lock themselves.
597
+ //
598
+ // INVARIANT: callers that consume the returned Package on the fast
599
+ // path (notably MCP resource handlers and Model.getModel()) must
600
+ // remain in-memory only. If any code reachable from a `Package`
601
+ // method ever grows new disk I/O against the canonical tree, that
602
+ // path needs to be bracketed by `withPackageLock`; otherwise a
603
+ // concurrent install/delete will race against an unlocked reader.
397
604
  const _package = this.packages.get(packageName);
398
605
  if (_package !== undefined && !reload) {
399
606
  return _package;
400
607
  }
401
608
 
402
- // We need to acquire the mutex to prevent a thundering herd of requests from creating the
403
- // package multiple times.
404
- let packageMutex = this.packageMutexes.get(packageName);
405
- if (packageMutex?.isLocked()) {
406
- logger.debug(
407
- `Package ${packageName} is being loaded, waiting for unlock...`,
408
- );
409
- await packageMutex.waitForUnlock();
410
- logger.debug(`Package ${packageName} unlocked`);
411
- const existingPackage = this.packages.get(packageName);
412
- if (existingPackage) {
413
- logger.debug(`Package ${packageName} loaded by another request`);
414
- return existingPackage;
415
- }
416
- // If package still doesn't exist after unlock, it might have failed to load
417
- // Continue to try loading it ourselves
418
- }
419
- packageMutex = new Mutex();
420
- this.packageMutexes.set(packageName, packageMutex);
421
-
422
- return packageMutex.runExclusive(async () => {
423
- // Double-check after acquiring mutex
424
- const existingPackage = this.packages.get(packageName);
425
- if (existingPackage !== undefined && !reload) {
426
- return existingPackage;
427
- }
609
+ // We are either reloading or about to lazy-load on a cache miss
610
+ // — both allocate a new package. This is the single choke point
611
+ // for admission control; controllers no longer need their own
612
+ // back-pressure check.
613
+ this.assertCanAdmitNewPackage(
614
+ packageName,
615
+ reload ? "reload a package" : "load a package",
616
+ options.allowAdmission === true,
617
+ );
428
618
 
429
- // Set package status to loading
430
- this.setPackageStatus(packageName, PackageStatus.LOADING);
619
+ return this.withPackageLock(packageName, () =>
620
+ this._loadOrGetPackageLocked(packageName, reload),
621
+ );
622
+ }
431
623
 
432
- try {
433
- logger.debug(`Loading package ${packageName}...`);
434
- const packagePath = path.join(this.environmentPath, packageName);
435
- const _package = await Package.create(
436
- this.environmentName,
437
- packageName,
438
- packagePath,
439
- () => this.malloyConfig.malloyConfig,
440
- );
441
- if (existingPackage !== undefined && reload) {
442
- this.retireConnectionGeneration(`package ${packageName}`, () =>
443
- existingPackage.getMalloyConfig().releaseConnections(),
444
- );
445
- }
446
- this.packages.set(packageName, _package);
624
+ /**
625
+ * Load (or reload) a package from its canonical disk location. Assumes
626
+ * the caller holds the per-package mutex (via {@link withPackageLock}).
627
+ *
628
+ * Used by {@link getPackage} and by {@link compileSource} so the
629
+ * cache-miss path doesn't re-enter the mutex.
630
+ */
631
+ private async _loadOrGetPackageLocked(
632
+ packageName: string,
633
+ reload: boolean = false,
634
+ ): Promise<Package> {
635
+ const existingPackage = this.packages.get(packageName);
636
+ if (existingPackage !== undefined && !reload) {
637
+ return existingPackage;
638
+ }
447
639
 
448
- // Set package status to serving
449
- this.setPackageStatus(packageName, PackageStatus.SERVING);
450
- logger.debug(`Successfully loaded package ${packageName}`);
640
+ this.setPackageStatus(packageName, PackageStatus.LOADING);
451
641
 
452
- return _package;
453
- } catch (error) {
454
- logger.error(`Failed to load package ${packageName}`, { error });
455
- // Clean up on error - mutex will be automatically released by runExclusive
456
- this.packages.delete(packageName);
457
- this.packageStatuses.delete(packageName);
458
- throw error;
642
+ try {
643
+ logger.debug(`Loading package ${packageName}...`);
644
+ const packagePath = safeJoinUnderRoot(
645
+ this.environmentPath,
646
+ packageName,
647
+ );
648
+ const _package = await Package.create(
649
+ this.environmentName,
650
+ packageName,
651
+ packagePath,
652
+ () => this.malloyConfig.malloyConfig,
653
+ );
654
+ if (existingPackage !== undefined && reload) {
655
+ this.retireConnectionGeneration(`package ${packageName}`, () =>
656
+ existingPackage.getMalloyConfig().releaseConnections(),
657
+ );
459
658
  }
460
- // Mutex is automatically released here by runExclusive
461
- });
659
+ this.packages.set(packageName, _package);
660
+ this.setPackageStatus(packageName, PackageStatus.SERVING);
661
+ logger.debug(`Successfully loaded package ${packageName}`);
662
+
663
+ return _package;
664
+ } catch (error) {
665
+ logger.error(`Failed to load package ${packageName}`, { error });
666
+ this.packages.delete(packageName);
667
+ this.packageStatuses.delete(packageName);
668
+ throw error;
669
+ }
462
670
  }
463
671
 
464
- public async addPackage(packageName: string) {
465
- const packagePath = path.join(this.environmentPath, packageName);
672
+ public async addPackage(
673
+ packageName: string,
674
+ options: { allowAdmission?: boolean } = {},
675
+ ) {
676
+ assertSafePackageName(packageName);
677
+ const packagePath = safeJoinUnderRoot(this.environmentPath, packageName);
466
678
  if (
467
679
  !(await fs.promises
468
680
  .access(packagePath)
@@ -472,6 +684,14 @@ export class Environment {
472
684
  ) {
473
685
  throw new PackageNotFoundError(`Package ${packageName} not found`);
474
686
  }
687
+ // 404 takes precedence over 503 so a permanent "you forgot to
688
+ // upload the package" failure isn't masked as a transient
689
+ // "retry later" — the gate runs after the existence check.
690
+ this.assertCanAdmitNewPackage(
691
+ packageName,
692
+ "add a new package",
693
+ options.allowAdmission === true,
694
+ );
475
695
  logger.info(
476
696
  `Adding package ${packageName} to environment ${this.environmentName}`,
477
697
  {
@@ -479,6 +699,21 @@ export class Environment {
479
699
  malloyConfig: this.malloyConfig.malloyConfig,
480
700
  },
481
701
  );
702
+
703
+ return this.withPackageLock(packageName, () =>
704
+ this._addPackageLocked(packageName),
705
+ );
706
+ }
707
+
708
+ private async _addPackageLocked(
709
+ packageName: string,
710
+ ): Promise<Package | undefined> {
711
+ const packagePath = safeJoinUnderRoot(this.environmentPath, packageName);
712
+ const existingPackage = this.packages.get(packageName);
713
+ if (existingPackage !== undefined) {
714
+ return existingPackage;
715
+ }
716
+
482
717
  this.setPackageStatus(packageName, PackageStatus.LOADING);
483
718
  try {
484
719
  this.packages.set(
@@ -499,12 +734,184 @@ export class Environment {
499
734
  return this.packages.get(packageName);
500
735
  }
501
736
 
737
+ /**
738
+ * Replace a package on disk via stage-and-swap, then load it.
739
+ *
740
+ * - Phase 1 (no lock): run `downloader(stagingPath)`, writing the new
741
+ * content into a fresh sibling dir at `.staging/<pkg>-<uuid>/`. This
742
+ * is where multi-second downloads (git clone, GCS pull, ...) happen.
743
+ * - Phase 2 (lock held): atomically rename any existing canonical tree
744
+ * out to `.retired/<pkg>-<uuid>/`, rename staging into the canonical
745
+ * path, and run `Package.create` against the canonical path.
746
+ * - Phase 3 (after lock release): retire the old package's connections
747
+ * via the existing 30s drain and `fs.rm` the retired tree.
748
+ *
749
+ * Concurrent compiles / `getModelFileText` / `reloadAllModels` calls
750
+ * take the same mutex and so are mutually exclusive with the Phase 2
751
+ * swap, but they never queue behind a long Phase 1 download.
752
+ *
753
+ * On failure (Phase 1 download or Phase 2 `Package.create`), the staging
754
+ * dir is removed and — if we already renamed the old tree aside — the
755
+ * old tree is renamed back so the canonical path is restored.
756
+ */
757
+ public async installPackage(
758
+ packageName: string,
759
+ downloader: (stagingPath: string) => Promise<void>,
760
+ ): Promise<Package> {
761
+ assertSafePackageName(packageName);
762
+ const stagingPath = this.allocateStagingPath(packageName);
763
+ await fs.promises.mkdir(path.dirname(stagingPath), { recursive: true });
764
+
765
+ try {
766
+ await downloader(stagingPath);
767
+ } catch (err) {
768
+ await fs.promises
769
+ .rm(stagingPath, { recursive: true, force: true })
770
+ .catch(() => {});
771
+ throw err;
772
+ }
773
+
774
+ return this.withPackageLock(packageName, async () => {
775
+ const canonicalPath = safeJoinUnderRoot(
776
+ this.environmentPath,
777
+ packageName,
778
+ );
779
+ let retiredPath: string | undefined;
780
+
781
+ const oldPackage = this.packages.get(packageName);
782
+ const oldExistsOnDisk = await fs.promises
783
+ .access(canonicalPath)
784
+ .then(() => true)
785
+ .catch(() => false);
786
+
787
+ if (oldExistsOnDisk) {
788
+ retiredPath = this.allocateRetiredPath(packageName);
789
+ await fs.promises.mkdir(path.dirname(retiredPath), {
790
+ recursive: true,
791
+ });
792
+ await fs.promises.rename(canonicalPath, retiredPath);
793
+ }
794
+
795
+ let newPackage: Package;
796
+ try {
797
+ await fs.promises.rename(stagingPath, canonicalPath);
798
+
799
+ this.setPackageStatus(packageName, PackageStatus.LOADING);
800
+ newPackage = await Package.create(
801
+ this.environmentName,
802
+ packageName,
803
+ canonicalPath,
804
+ () => this.malloyConfig.malloyConfig,
805
+ );
806
+ } catch (err) {
807
+ // Rollback: clobber whatever (partial) content sits at canonical
808
+ // — Package.create's own failure-cleanup may have already rm'd
809
+ // the directory, so the most common outcome here is ENOENT.
810
+ // `force: true` plus the `.catch(() => {})` make this a
811
+ // best-effort wipe whose only job is to leave the rename-back
812
+ // below a clean destination. Then put the old tree back if we
813
+ // moved one aside.
814
+ await fs.promises
815
+ .rm(canonicalPath, { recursive: true, force: true })
816
+ .catch(() => {});
817
+ if (retiredPath) {
818
+ try {
819
+ await fs.promises.rename(retiredPath, canonicalPath);
820
+ } catch (restoreErr) {
821
+ logger.error(
822
+ "Failed to restore retired package after install rollback",
823
+ {
824
+ error: restoreErr,
825
+ retiredPath,
826
+ canonicalPath,
827
+ },
828
+ );
829
+ }
830
+ }
831
+ await fs.promises
832
+ .rm(stagingPath, { recursive: true, force: true })
833
+ .catch(() => {});
834
+ this.deletePackageStatus(packageName);
835
+ throw err;
836
+ }
837
+
838
+ this.packages.set(packageName, newPackage);
839
+ this.setPackageStatus(packageName, PackageStatus.SERVING);
840
+
841
+ if (oldPackage) {
842
+ this.retireConnectionGeneration(`package ${packageName}`, () =>
843
+ oldPackage.getMalloyConfig().releaseConnections(),
844
+ );
845
+ }
846
+
847
+ if (retiredPath) {
848
+ const pathToClean = retiredPath;
849
+ setImmediate(() => {
850
+ void fs.promises
851
+ .rm(pathToClean, { recursive: true, force: true })
852
+ .catch((err) => {
853
+ logger.warn(
854
+ `Failed to clean up retired package directory ${pathToClean}`,
855
+ { error: err },
856
+ );
857
+ });
858
+ });
859
+ }
860
+
861
+ return newPackage;
862
+ });
863
+ }
864
+
865
+ /**
866
+ * Reload every model in a package against the supplied build manifest,
867
+ * holding the per-package mutex for the duration of the disk reads.
868
+ * Replaces direct `Package.reloadAllModels` calls from outside
869
+ * `Environment`.
870
+ */
871
+ public async reloadAllModelsForPackage(
872
+ packageName: string,
873
+ manifest: BuildManifest["entries"],
874
+ ): Promise<void> {
875
+ assertSafePackageName(packageName);
876
+ return this.withPackageLock(packageName, async () => {
877
+ const pkg = this.packages.get(packageName);
878
+ if (!pkg) {
879
+ throw new PackageNotFoundError(
880
+ `Package ${packageName} is not loaded`,
881
+ );
882
+ }
883
+ await pkg.reloadAllModels(manifest);
884
+ });
885
+ }
886
+
887
+ /**
888
+ * Read a model's source text from disk, holding the per-package mutex
889
+ * so the read is serialized against {@link installPackage} /
890
+ * {@link deletePackage} / {@link updatePackage}.
891
+ */
892
+ public async getModelFileText(
893
+ packageName: string,
894
+ modelPath: string,
895
+ ): Promise<string> {
896
+ assertSafePackageName(packageName);
897
+ assertSafeRelativeModelPath(modelPath);
898
+ return this.withPackageLock(packageName, async () => {
899
+ const pkg = this.packages.get(packageName);
900
+ if (!pkg) {
901
+ throw new PackageNotFoundError(
902
+ `Package ${packageName} is not loaded`,
903
+ );
904
+ }
905
+ return pkg.getModelFileText(modelPath);
906
+ });
907
+ }
908
+
502
909
  private async writePackageManifest(
503
910
  packageName: string,
504
911
  metadata: { name: string; description?: string },
505
912
  ): Promise<void> {
506
- const packagePath = path.join(this.environmentPath, packageName);
507
- const manifestPath = path.join(packagePath, "publisher.json");
913
+ const packagePath = safeJoinUnderRoot(this.environmentPath, packageName);
914
+ const manifestPath = safeJoinUnderRoot(packagePath, "publisher.json");
508
915
 
509
916
  try {
510
917
  // Read existing manifest
@@ -538,26 +945,29 @@ export class Environment {
538
945
  }
539
946
 
540
947
  public async updatePackage(packageName: string, body: ApiPackage) {
541
- const _package = this.packages.get(packageName);
542
- if (!_package) {
543
- throw new PackageNotFoundError(`Package ${packageName} not found`);
544
- }
545
- if (body.name) {
546
- _package.setName(body.name);
547
- }
548
- _package.setPackageMetadata({
549
- name: body.name,
550
- description: body.description,
551
- resource: body.resource,
552
- location: body.location,
553
- });
948
+ assertSafePackageName(packageName);
949
+ return this.withPackageLock(packageName, async () => {
950
+ const _package = this.packages.get(packageName);
951
+ if (!_package) {
952
+ throw new PackageNotFoundError(`Package ${packageName} not found`);
953
+ }
954
+ if (body.name) {
955
+ _package.setName(body.name);
956
+ }
957
+ _package.setPackageMetadata({
958
+ name: body.name,
959
+ description: body.description,
960
+ resource: body.resource,
961
+ location: body.location,
962
+ });
554
963
 
555
- await this.writePackageManifest(packageName, {
556
- name: packageName,
557
- description: body.description,
558
- });
964
+ await this.writePackageManifest(packageName, {
965
+ name: packageName,
966
+ description: body.description,
967
+ });
559
968
 
560
- return _package.getPackageMetadata();
969
+ return _package.getPackageMetadata();
970
+ });
561
971
  }
562
972
 
563
973
  public getPackageStatus(packageName: string): PackageInfo | undefined {
@@ -578,48 +988,83 @@ export class Environment {
578
988
  }
579
989
 
580
990
  public async deletePackage(packageName: string): Promise<void> {
581
- const _package = this.packages.get(packageName);
582
- if (!_package) {
583
- return;
584
- }
585
- const packageStatus = this.packageStatuses.get(packageName);
586
-
587
- if (packageStatus?.status === PackageStatus.LOADING) {
588
- logger.error("Package loading. Can't unload.", {
589
- environmentName: this.environmentName,
590
- packageName,
591
- });
592
- throw new Error(
593
- "Package loading. Can't unload. " +
594
- this.environmentName +
595
- " " +
991
+ assertSafePackageName(packageName);
992
+ return this.withPackageLock(packageName, async () => {
993
+ const _package = this.packages.get(packageName);
994
+ if (!_package) {
995
+ return;
996
+ }
997
+ const packageStatus = this.packageStatuses.get(packageName);
998
+
999
+ // The mutex now serializes load/install/compile against delete, so
1000
+ // the LOADING-state guard is mostly vestigial — left in place for
1001
+ // backwards-compatible error messaging in case anything bypasses
1002
+ // the lock.
1003
+ if (packageStatus?.status === PackageStatus.LOADING) {
1004
+ logger.error("Package loading. Can't unload.", {
1005
+ environmentName: this.environmentName,
596
1006
  packageName,
597
- );
598
- } else if (packageStatus?.status === PackageStatus.SERVING) {
599
- this.setPackageStatus(packageName, PackageStatus.UNLOADING);
600
- }
1007
+ });
1008
+ throw new Error(
1009
+ "Package loading. Can't unload. " +
1010
+ this.environmentName +
1011
+ " " +
1012
+ packageName,
1013
+ );
1014
+ } else if (packageStatus?.status === PackageStatus.SERVING) {
1015
+ this.setPackageStatus(packageName, PackageStatus.UNLOADING);
1016
+ }
601
1017
 
602
- await _package.getMalloyConfig().releaseConnections();
1018
+ // Retire the package's connections via the existing 30s drain so
1019
+ // any in-flight queries that already acquired a connection finish
1020
+ // before the underlying duckdb handle is released.
1021
+ this.retireConnectionGeneration(`package ${packageName}`, () =>
1022
+ _package.getMalloyConfig().releaseConnections(),
1023
+ );
603
1024
 
604
- try {
605
- await fs.promises.rm(path.join(this.environmentPath, packageName), {
606
- recursive: true,
607
- force: true,
608
- });
609
- } catch (err) {
610
- logger.error(
611
- "Error removing package directory while unloading package",
612
- {
613
- error: err,
614
- environmentName: this.environmentName,
615
- packageName,
616
- },
1025
+ // Atomically rename the canonical tree out of the way so no reader
1026
+ // can stat into it after the lock is released. The actual fs.rm is
1027
+ // deferred to setImmediate to keep the lock-hold time at one
1028
+ // rename rather than a (potentially slow) recursive remove.
1029
+ const canonicalPath = safeJoinUnderRoot(
1030
+ this.environmentPath,
1031
+ packageName,
617
1032
  );
618
- }
1033
+ const retiredPath = this.allocateRetiredPath(packageName);
1034
+ let renamed = false;
1035
+ try {
1036
+ await fs.promises.mkdir(path.dirname(retiredPath), {
1037
+ recursive: true,
1038
+ });
1039
+ await fs.promises.rename(canonicalPath, retiredPath);
1040
+ renamed = true;
1041
+ } catch (err) {
1042
+ logger.error(
1043
+ "Error renaming package directory to retired during unload",
1044
+ {
1045
+ error: err,
1046
+ environmentName: this.environmentName,
1047
+ packageName,
1048
+ },
1049
+ );
1050
+ }
619
1051
 
620
- // Remove from internal tracking
621
- this.packages.delete(packageName);
622
- this.packageStatuses.delete(packageName);
1052
+ this.packages.delete(packageName);
1053
+ this.packageStatuses.delete(packageName);
1054
+
1055
+ if (renamed) {
1056
+ setImmediate(() => {
1057
+ void fs.promises
1058
+ .rm(retiredPath, { recursive: true, force: true })
1059
+ .catch((err) => {
1060
+ logger.warn(
1061
+ `Failed to clean up retired package directory ${retiredPath}`,
1062
+ { error: err },
1063
+ );
1064
+ });
1065
+ });
1066
+ }
1067
+ });
623
1068
  }
624
1069
 
625
1070
  public updateConnections(