@malloy-publisher/server 0.0.197 → 0.0.198

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.docker.md +47 -0
  2. package/dist/app/api-doc.yaml +3 -20
  3. package/dist/app/assets/{EnvironmentPage-BVkQH_xQ.js → EnvironmentPage-C7rtH4mC.js} +1 -1
  4. package/dist/app/assets/{HomePage-BgH9UkjK.js → HomePage-DwkH7OrS.js} +1 -1
  5. package/dist/app/assets/{MainPage-DiBxABem.js → MainPage-D38LtZDV.js} +1 -1
  6. package/dist/app/assets/{ModelPage-oS70fj83.js → ModelPage-DOol8Mz7.js} +1 -1
  7. package/dist/app/assets/{PackagePage-F_qLDAdv.js → PackagePage-0tgzA_kO.js} +1 -1
  8. package/dist/app/assets/{RouteError-WqpffppN.js → RouteError-BaMsOSly.js} +1 -1
  9. package/dist/app/assets/{WorkbookPage-_YmC-ebR.js → WorkbookPage-Cx4SePkx.js} +1 -1
  10. package/dist/app/assets/{core-B8L9xCYT.es-BcRLJTnC.js → core-CbsC6R_Y.es-Cwf6asf3.js} +1 -1
  11. package/dist/app/assets/{index-rg8Ok8nl.js → index-DL6BZTuw.js} +1 -1
  12. package/dist/app/assets/{index-C3XPaTaS.js → index-DNofXMxi.js} +1 -1
  13. package/dist/app/assets/{index-BMViiwtJ.js → index-U38AyjJL.js} +3 -3
  14. package/dist/app/assets/{index.umd-CCAfKkxY.js → index.umd-B68wGGkM.js} +1 -1
  15. package/dist/app/index.html +1 -1
  16. package/dist/server.mjs +812 -450
  17. package/package.json +1 -1
  18. package/src/config.spec.ts +81 -0
  19. package/src/config.ts +126 -0
  20. package/src/controller/package.controller.ts +70 -29
  21. package/src/errors.ts +13 -0
  22. package/src/health.ts +0 -26
  23. package/src/mcp/tools/discovery_tools.ts +6 -2
  24. package/src/path_safety.spec.ts +158 -0
  25. package/src/path_safety.ts +140 -0
  26. package/src/server.ts +13 -0
  27. package/src/service/environment.ts +614 -198
  28. package/src/service/environment_admission.spec.ts +180 -0
  29. package/src/service/environment_store.spec.ts +0 -19
  30. package/src/service/environment_store.ts +24 -21
  31. package/src/service/manifest_service.spec.ts +7 -2
  32. package/src/service/manifest_service.ts +8 -2
  33. package/src/service/materialization_service.ts +14 -3
  34. package/src/service/package_memory_governor.spec.ts +173 -0
  35. package/src/service/package_memory_governor.ts +233 -0
  36. package/src/service/package_race.spec.ts +208 -0
@@ -1,6 +1,7 @@
1
1
  import type { LogMessage } from "@malloydata/malloy";
2
2
  import { MalloyError, Runtime } from "@malloydata/malloy";
3
3
  import { Mutex } from "async-mutex";
4
+ import crypto from "crypto";
4
5
  import * as fs from "fs";
5
6
  import * as path from "path";
6
7
  import { components } from "../api";
@@ -9,8 +10,16 @@ import {
9
10
  ConnectionNotFoundError,
10
11
  EnvironmentNotFoundError,
11
12
  PackageNotFoundError,
13
+ ServiceUnavailableError,
12
14
  } from "../errors";
13
15
  import { logger } from "../logger";
16
+ import {
17
+ assertSafeEnvironmentPath,
18
+ assertSafePackageName,
19
+ assertSafeRelativeModelPath,
20
+ safeJoinUnderRoot,
21
+ } from "../path_safety";
22
+ import { BuildManifest } from "../storage/DatabaseInterface";
14
23
  import { URL_READER } from "../utils";
15
24
  import {
16
25
  buildEnvironmentMalloyConfig,
@@ -20,6 +29,23 @@ import {
20
29
  } from "./connection";
21
30
  import { ApiConnection } from "./model";
22
31
  import { Package } from "./package";
32
+ import type { PackageMemoryGovernor } from "./package_memory_governor";
33
+
34
+ /**
35
+ * Sibling dirs under `environmentPath` used by the install/delete pipeline so
36
+ * that long downloads do not hold the per-package mutex.
37
+ *
38
+ * - `.staging/<pkg>-<uuid>/` — a download in progress. Renamed to the
39
+ * canonical path under the lock once complete.
40
+ * - `.retired/<pkg>-<uuid>/` — the previous canonical tree, atomically
41
+ * renamed out of the way during a swap or delete. `fs.rm`'d asynchronously
42
+ * after the lock is released.
43
+ *
44
+ * Both names start with a `.` so the package walkers (which use
45
+ * {@link ignoreDotfiles}) skip them.
46
+ */
47
+ const STAGING_DIR_NAME = ".staging";
48
+ const RETIRED_DIR_NAME = ".retired";
23
49
 
24
50
  export enum PackageStatus {
25
51
  LOADING = "loading",
@@ -60,6 +86,12 @@ export class Environment {
60
86
  private environmentPath: string;
61
87
  private environmentName: string;
62
88
  public metadata: ApiEnvironment;
89
+ // The shared memory governor that consults process RSS. Optional —
90
+ // when null the gate is a no-op and the environment behaves exactly
91
+ // like it did before the governor was introduced. Set by
92
+ // EnvironmentStore.setMemoryGovernor at server start so we keep the
93
+ // governor as the single owner of the back-pressure boolean.
94
+ private memoryGovernor: PackageMemoryGovernor | null = null;
63
95
 
64
96
  constructor(
65
97
  environmentName: string,
@@ -67,6 +99,10 @@ export class Environment {
67
99
  malloyConfig: EnvironmentMalloyConfig,
68
100
  apiConnections: InternalConnection[],
69
101
  ) {
102
+ // Sanitizer barrier: every downstream `path.join(this.environmentPath,
103
+ // …)` site (including the static `sweepStaleInstallDirs` sweep) gets a
104
+ // value that has cleared an allowlist check at the gate.
105
+ assertSafeEnvironmentPath(environmentPath);
70
106
  this.environmentName = environmentName;
71
107
  this.environmentPath = environmentPath;
72
108
  this.malloyConfig = malloyConfig;
@@ -166,6 +202,14 @@ export class Environment {
166
202
  malloyConfig.apiConnections,
167
203
  );
168
204
 
205
+ // Best-effort: a previous run may have crashed mid-install or
206
+ // mid-delete and left orphan dirs under .staging/ or .retired/.
207
+ // Run against the validated constructor argument so the sink path
208
+ // here does NOT route through `this` (which CodeQL conservatively
209
+ // treats as tainted because other methods on this class touch
210
+ // request-derived `packageName` values).
211
+ await Environment.sweepStaleInstallDirs(environmentPath);
212
+
169
213
  return environment;
170
214
  }
171
215
 
@@ -195,73 +239,93 @@ export class Environment {
195
239
  source: string,
196
240
  includeSql: boolean = false,
197
241
  ): Promise<{ problems: LogMessage[]; sql?: string }> {
198
- // Place the virtual file in the model's directory so relative imports resolve correctly.
199
- const modelDir = path.dirname(
200
- path.join(this.environmentPath, packageName, modelName),
201
- );
202
- const virtualUri = `file://${path.join(modelDir, "__compile_check.malloy")}`;
203
- const virtualUrl = new URL(virtualUri);
242
+ assertSafePackageName(packageName);
243
+ assertSafeRelativeModelPath(modelName);
244
+ // Hold the per-package mutex for the duration of every disk read —
245
+ // both the explicit `fs.readFile(modelPath)` below and the implicit
246
+ // import resolution that `runtime.loadModel` does through the URL
247
+ // reader. This is mutually exclusive with `installPackage`'s Phase 2
248
+ // rename swap and with `deletePackage`'s rename-to-retired, so a
249
+ // compile can never observe a half-rewritten tree. The slow Phase 1
250
+ // download happens outside this lock, so a multi-second clone does
251
+ // not block compiles.
252
+ return this.withPackageLock(packageName, async () => {
253
+ // Sanitized join: input segments are allowlisted above; the
254
+ // resolve-and-contain check here is the secondary guard CodeQL's
255
+ // path-injection sanitizer recognises.
256
+ const modelPath = safeJoinUnderRoot(
257
+ this.environmentPath,
258
+ packageName,
259
+ modelName,
260
+ );
261
+ // Place the virtual file in the model's directory so relative imports resolve correctly.
262
+ const modelDir = path.dirname(modelPath);
263
+ const virtualUri = `file://${path.join(modelDir, "__compile_check.malloy")}`;
264
+ const virtualUrl = new URL(virtualUri);
265
+
266
+ // Read the full model file so the submitted source inherits the model's
267
+ // complete namespace — imports, source definitions, queries, etc.
268
+ let modelContent = "";
269
+ try {
270
+ modelContent = await fs.promises.readFile(modelPath, "utf8");
271
+ } catch {
272
+ // If the model file can't be read, proceed with empty content
273
+ // and let compilation surface any errors naturally.
274
+ }
275
+ const fullSource = modelContent
276
+ ? `${modelContent}\n${source}`
277
+ : source;
278
+
279
+ // Create a URL Reader that serves the source string for the virtual file,
280
+ // but falls back to the disk for everything else (imports).
281
+ const interceptingReader = {
282
+ readURL: async (url: URL) => {
283
+ if (url.toString() === virtualUri) {
284
+ return fullSource;
285
+ }
286
+ return URL_READER.readURL(url);
287
+ },
288
+ };
204
289
 
205
- // Read the full model file so the submitted source inherits the model's
206
- // complete namespace imports, source definitions, queries, etc.
207
- const modelPath = path.join(this.environmentPath, packageName, modelName);
208
- let modelContent = "";
209
- try {
210
- modelContent = await fs.promises.readFile(modelPath, "utf8");
211
- } catch {
212
- // If the model file can't be read, proceed with empty content
213
- // and let compilation surface any errors naturally.
214
- }
215
- const fullSource = modelContent ? `${modelContent}\n${source}` : source;
216
-
217
- // Create a URL Reader that serves the source string for the virtual file,
218
- // but falls back to the disk for everything else (imports).
219
- const interceptingReader = {
220
- readURL: async (url: URL) => {
221
- if (url.toString() === virtualUri) {
222
- return fullSource;
223
- }
224
- return URL_READER.readURL(url);
225
- },
226
- };
290
+ // Use the locked variant we already hold the per-package mutex.
291
+ const pkg = await this._loadOrGetPackageLocked(packageName);
227
292
 
228
- const pkg = await this.getPackage(packageName);
293
+ // Initialize Runtime with the package's active MalloyConfig so compile
294
+ // checks see the same package-scoped duckdb as execution. This runtime
295
+ // borrows the package config; the package/environment lifecycle owns release.
296
+ const runtime = new Runtime({
297
+ urlReader: interceptingReader,
298
+ config: pkg.getMalloyConfig(),
299
+ });
229
300
 
230
- // Initialize Runtime with the package's active MalloyConfig so compile
231
- // checks see the same package-scoped duckdb as execution. This runtime
232
- // borrows the package config; the package/environment lifecycle owns release.
233
- const runtime = new Runtime({
234
- urlReader: interceptingReader,
235
- config: pkg.getMalloyConfig(),
236
- });
301
+ // Attempt to compile
302
+ try {
303
+ const modelMaterializer = runtime.loadModel(virtualUrl);
304
+ const model = await modelMaterializer.getModel();
237
305
 
238
- // Attempt to compile
239
- try {
240
- const modelMaterializer = runtime.loadModel(virtualUrl);
241
- const model = await modelMaterializer.getModel();
242
-
243
- // If includeSql is requested and compilation succeeded, attempt to extract SQL
244
- let sql: string | undefined;
245
- if (includeSql) {
246
- try {
247
- const queryMaterializer = modelMaterializer.loadFinalQuery();
248
- sql = await queryMaterializer.getSQL();
249
- } catch {
250
- // Source may not contain a runnable query (e.g. only source definitions),
251
- // in which case we simply omit the sql field.
306
+ // If includeSql is requested and compilation succeeded, attempt to extract SQL
307
+ let sql: string | undefined;
308
+ if (includeSql) {
309
+ try {
310
+ const queryMaterializer = modelMaterializer.loadFinalQuery();
311
+ sql = await queryMaterializer.getSQL();
312
+ } catch {
313
+ // Source may not contain a runnable query (e.g. only source definitions),
314
+ // in which case we simply omit the sql field.
315
+ }
252
316
  }
253
- }
254
317
 
255
- // If successful, return any non-fatal warnings
256
- return { problems: model.problems, sql };
257
- } catch (error) {
258
- // If parsing/compilation fails, return the errors
259
- if (error instanceof MalloyError) {
260
- return { problems: error.problems };
318
+ // If successful, return any non-fatal warnings
319
+ return { problems: model.problems, sql };
320
+ } catch (error) {
321
+ // If parsing/compilation fails, return the errors
322
+ if (error instanceof MalloyError) {
323
+ return { problems: error.problems };
324
+ }
325
+ // If it's a system error (e.g. file not found), throw it up
326
+ throw error;
261
327
  }
262
- // If it's a system error (e.g. file not found), throw it up
263
- throw error;
264
- }
328
+ });
265
329
  }
266
330
 
267
331
  public listApiConnections(): ApiConnection[] {
@@ -389,7 +453,22 @@ export class Environment {
389
453
  }
390
454
  }
391
455
 
392
- /** One mutex per package name; never replace after create (avoids parallel loads). */
456
+ /**
457
+ * One mutex per package name; never replace after create — replacing
458
+ * would allow two loads of the same package to run in parallel and
459
+ * race on the canonical directory.
460
+ *
461
+ * `deletePackage` intentionally leaves the entry behind: a
462
+ * subsequent re-install must serialize against any straggling
463
+ * readers from the deleted generation that are still inside
464
+ * `withPackageLock`. The map therefore grows by the count of
465
+ * *distinct* package names the environment has ever served, not by
466
+ * install churn, so for the publisher's expected workload
467
+ * (config-declared packages, occasional ad-hoc additions) this is
468
+ * bounded in practice. Long-lived deployments that create and
469
+ * delete unique package names indefinitely would need an explicit
470
+ * sweep; we'll add one if/when that pattern appears.
471
+ */
393
472
  private getOrCreatePackageMutex(packageName: string): Mutex {
394
473
  let packageMutex = this.packageMutexes.get(packageName);
395
474
  if (packageMutex === undefined) {
@@ -399,78 +478,202 @@ export class Environment {
399
478
  return packageMutex;
400
479
  }
401
480
 
481
+ /**
482
+ * Run `fn` while holding the per-package mutex. This is the single
483
+ * synchronization primitive that protects a package directory: every
484
+ * code path that mutates `{environmentPath}/{packageName}/` or reads
485
+ * from disk under it must serialize through this lock. See the lock
486
+ * ordering note above the `packageMutexes` field for the wider
487
+ * invariant.
488
+ *
489
+ * `async-mutex` is **not reentrant** — `fn` must not call any other
490
+ * method that calls `withPackageLock` on the same package, or it will
491
+ * deadlock. Use the `_xxxLocked` variants below in that case.
492
+ */
493
+ public async withPackageLock<T>(
494
+ packageName: string,
495
+ fn: () => Promise<T>,
496
+ ): Promise<T> {
497
+ assertSafePackageName(packageName);
498
+ return this.getOrCreatePackageMutex(packageName).runExclusive(fn);
499
+ }
500
+
501
+ private allocateStagingPath(packageName: string): string {
502
+ return safeJoinUnderRoot(
503
+ this.environmentPath,
504
+ STAGING_DIR_NAME,
505
+ `${packageName}-${crypto.randomUUID()}`,
506
+ );
507
+ }
508
+
509
+ private allocateRetiredPath(packageName: string): string {
510
+ return safeJoinUnderRoot(
511
+ this.environmentPath,
512
+ RETIRED_DIR_NAME,
513
+ `${packageName}-${crypto.randomUUID()}`,
514
+ );
515
+ }
516
+
517
+ /**
518
+ * Best-effort sweep of `.staging/` and `.retired/` left over from a
519
+ * previous run (crash, OOM, etc). Safe because both dirs are managed
520
+ * exclusively by `installPackage` / `deletePackage`; no in-flight
521
+ * operation in this process can be using them yet.
522
+ *
523
+ * Static + path-as-parameter on purpose: the sink path here must
524
+ * derive from the validated factory argument, not from `this`,
525
+ * because CodeQL's path-injection query conservatively treats every
526
+ * field on this class as tainted (other methods on the same class
527
+ * receive request-derived `packageName` values).
528
+ */
529
+ public static async sweepStaleInstallDirs(
530
+ environmentPath: string,
531
+ ): Promise<void> {
532
+ assertSafeEnvironmentPath(environmentPath);
533
+ for (const dirName of [STAGING_DIR_NAME, RETIRED_DIR_NAME]) {
534
+ const dir = safeJoinUnderRoot(environmentPath, dirName);
535
+ // Inline sanitizer barriers in the precise shape CodeQL's
536
+ // `js/path-injection` query recognises (regex-test +
537
+ // `indexOf("..") !== -1` guard) so the sink right below is
538
+ // covered even when the call chain feeding `environmentPath`
539
+ // is taint-tracked from an HTTP request handler.
540
+ if (dir.indexOf("..") !== -1) continue;
541
+ if (path.basename(dir) !== dirName) continue;
542
+ try {
543
+ await fs.promises.rm(dir, { recursive: true, force: true });
544
+ } catch (err) {
545
+ logger.warn(`Failed to sweep stale ${dirName} dir at ${dir}`, {
546
+ error: err,
547
+ });
548
+ }
549
+ }
550
+ }
551
+
552
+ /**
553
+ * Attach (or detach with `null`) the memory governor that gates new
554
+ * package allocations. The single instance is owned by the
555
+ * EnvironmentStore and propagated to every Environment so the
556
+ * back-pressure decision is process-wide.
557
+ */
558
+ public setMemoryGovernor(governor: PackageMemoryGovernor | null): void {
559
+ this.memoryGovernor = governor;
560
+ }
561
+
562
+ /**
563
+ * Choke-point check called from every code path that would allocate
564
+ * a *new* package into the in-memory map (lazy load on cache miss,
565
+ * explicit reload, `addPackage`). Throws HTTP 503 when the governor
566
+ * is back-pressured; cheap no-op when the governor is unset or
567
+ * happy.
568
+ *
569
+ * `allowAdmission` is the documented opt-out for read paths that
570
+ * genuinely cannot tolerate 503s. None of the current callers set
571
+ * it; the parameter exists so a future caller (e.g. a
572
+ * health/warmup probe) can self-document its bypass intent.
573
+ */
574
+ private assertCanAdmitNewPackage(
575
+ packageName: string,
576
+ reason: string,
577
+ allowAdmission: boolean,
578
+ ): void {
579
+ if (allowAdmission) return;
580
+ if (!this.memoryGovernor?.isBackpressured()) return;
581
+ throw new ServiceUnavailableError(
582
+ `Publisher is under memory pressure and cannot ${reason} (package "${packageName}", environment "${this.environmentName}"). Retry after the server's memory usage drops below the configured low-water mark.`,
583
+ );
584
+ }
585
+
402
586
  public async getPackage(
403
587
  packageName: string,
404
588
  reload: boolean = false,
589
+ options: { allowAdmission?: boolean } = {},
405
590
  ): Promise<Package> {
406
- // Check if package is already loaded first
591
+ assertSafePackageName(packageName);
592
+ // Fast-path: serve from cache without acquiring the lock. Safe because
593
+ // `Package` references are immutable; the disk-reading methods that
594
+ // actually need protection (compileSource, getModelFileText,
595
+ // reloadAllModelsForPackage, ...) acquire the lock themselves.
596
+ //
597
+ // INVARIANT: callers that consume the returned Package on the fast
598
+ // path (notably MCP resource handlers and Model.getModel()) must
599
+ // remain in-memory only. If any code reachable from a `Package`
600
+ // method ever grows new disk I/O against the canonical tree, that
601
+ // path needs to be bracketed by `withPackageLock`; otherwise a
602
+ // concurrent install/delete will race against an unlocked reader.
407
603
  const _package = this.packages.get(packageName);
408
604
  if (_package !== undefined && !reload) {
409
605
  return _package;
410
606
  }
411
607
 
412
- // Serialize load per package name so concurrent callers share one Mutex and
413
- // failed loads cannot rm the tree while another load is still scanning it.
414
- const packageMutex = this.getOrCreatePackageMutex(packageName);
608
+ // We are either reloading or about to lazy-load on a cache miss
609
+ // both allocate a new package. This is the single choke point
610
+ // for admission control; controllers no longer need their own
611
+ // back-pressure check.
612
+ this.assertCanAdmitNewPackage(
613
+ packageName,
614
+ reload ? "reload a package" : "load a package",
615
+ options.allowAdmission === true,
616
+ );
415
617
 
416
- if (packageMutex.isLocked()) {
417
- logger.debug(
418
- `Package ${packageName} is being loaded, waiting for unlock...`,
419
- );
420
- await packageMutex.waitForUnlock();
421
- logger.debug(`Package ${packageName} unlocked`);
422
- const existingPackage = this.packages.get(packageName);
423
- if (existingPackage !== undefined && !reload) {
424
- logger.debug(`Package ${packageName} loaded by another request`);
425
- return existingPackage;
426
- }
427
- // Reload, or prior load failed — continue under the same mutex.
428
- }
618
+ return this.withPackageLock(packageName, () =>
619
+ this._loadOrGetPackageLocked(packageName, reload),
620
+ );
621
+ }
429
622
 
430
- return packageMutex.runExclusive(async () => {
431
- // Double-check after acquiring mutex
432
- const existingPackage = this.packages.get(packageName);
433
- if (existingPackage !== undefined && !reload) {
434
- return existingPackage;
435
- }
623
+ /**
624
+ * Load (or reload) a package from its canonical disk location. Assumes
625
+ * the caller holds the per-package mutex (via {@link withPackageLock}).
626
+ *
627
+ * Used by {@link getPackage} and by {@link compileSource} so the
628
+ * cache-miss path doesn't re-enter the mutex.
629
+ */
630
+ private async _loadOrGetPackageLocked(
631
+ packageName: string,
632
+ reload: boolean = false,
633
+ ): Promise<Package> {
634
+ const existingPackage = this.packages.get(packageName);
635
+ if (existingPackage !== undefined && !reload) {
636
+ return existingPackage;
637
+ }
436
638
 
437
- // Set package status to loading
438
- this.setPackageStatus(packageName, PackageStatus.LOADING);
639
+ this.setPackageStatus(packageName, PackageStatus.LOADING);
439
640
 
440
- try {
441
- logger.debug(`Loading package ${packageName}...`);
442
- const packagePath = path.join(this.environmentPath, packageName);
443
- const _package = await Package.create(
444
- this.environmentName,
445
- packageName,
446
- packagePath,
447
- () => this.malloyConfig.malloyConfig,
641
+ try {
642
+ logger.debug(`Loading package ${packageName}...`);
643
+ const packagePath = safeJoinUnderRoot(
644
+ this.environmentPath,
645
+ packageName,
646
+ );
647
+ const _package = await Package.create(
648
+ this.environmentName,
649
+ packageName,
650
+ packagePath,
651
+ () => this.malloyConfig.malloyConfig,
652
+ );
653
+ if (existingPackage !== undefined && reload) {
654
+ this.retireConnectionGeneration(`package ${packageName}`, () =>
655
+ existingPackage.getMalloyConfig().releaseConnections(),
448
656
  );
449
- if (existingPackage !== undefined && reload) {
450
- this.retireConnectionGeneration(`package ${packageName}`, () =>
451
- existingPackage.getMalloyConfig().releaseConnections(),
452
- );
453
- }
454
- this.packages.set(packageName, _package);
455
-
456
- // Set package status to serving
457
- this.setPackageStatus(packageName, PackageStatus.SERVING);
458
- logger.debug(`Successfully loaded package ${packageName}`);
459
-
460
- return _package;
461
- } catch (error) {
462
- logger.error(`Failed to load package ${packageName}`, { error });
463
- // Clean up on error - mutex will be automatically released by runExclusive
464
- this.packages.delete(packageName);
465
- this.packageStatuses.delete(packageName);
466
- throw error;
467
657
  }
468
- // Mutex is automatically released here by runExclusive
469
- });
658
+ this.packages.set(packageName, _package);
659
+ this.setPackageStatus(packageName, PackageStatus.SERVING);
660
+ logger.debug(`Successfully loaded package ${packageName}`);
661
+
662
+ return _package;
663
+ } catch (error) {
664
+ logger.error(`Failed to load package ${packageName}`, { error });
665
+ this.packages.delete(packageName);
666
+ this.packageStatuses.delete(packageName);
667
+ throw error;
668
+ }
470
669
  }
471
670
 
472
- public async addPackage(packageName: string) {
473
- const packagePath = path.join(this.environmentPath, packageName);
671
+ public async addPackage(
672
+ packageName: string,
673
+ options: { allowAdmission?: boolean } = {},
674
+ ) {
675
+ assertSafePackageName(packageName);
676
+ const packagePath = safeJoinUnderRoot(this.environmentPath, packageName);
474
677
  if (
475
678
  !(await fs.promises
476
679
  .access(packagePath)
@@ -480,6 +683,14 @@ export class Environment {
480
683
  ) {
481
684
  throw new PackageNotFoundError(`Package ${packageName} not found`);
482
685
  }
686
+ // 404 takes precedence over 503 so a permanent "you forgot to
687
+ // upload the package" failure isn't masked as a transient
688
+ // "retry later" — the gate runs after the existence check.
689
+ this.assertCanAdmitNewPackage(
690
+ packageName,
691
+ "add a new package",
692
+ options.allowAdmission === true,
693
+ );
483
694
  logger.info(
484
695
  `Adding package ${packageName} to environment ${this.environmentName}`,
485
696
  {
@@ -488,42 +699,209 @@ export class Environment {
488
699
  },
489
700
  );
490
701
 
491
- const packageMutex = this.getOrCreatePackageMutex(packageName);
492
- if (packageMutex.isLocked()) {
493
- logger.debug(
494
- `Package ${packageName} is being loaded, waiting before addPackage...`,
702
+ return this.withPackageLock(packageName, () =>
703
+ this._addPackageLocked(packageName),
704
+ );
705
+ }
706
+
707
+ private async _addPackageLocked(
708
+ packageName: string,
709
+ ): Promise<Package | undefined> {
710
+ const packagePath = safeJoinUnderRoot(this.environmentPath, packageName);
711
+ const existingPackage = this.packages.get(packageName);
712
+ if (existingPackage !== undefined) {
713
+ return existingPackage;
714
+ }
715
+
716
+ this.setPackageStatus(packageName, PackageStatus.LOADING);
717
+ try {
718
+ this.packages.set(
719
+ packageName,
720
+ await Package.create(
721
+ this.environmentName,
722
+ packageName,
723
+ packagePath,
724
+ () => this.malloyConfig.malloyConfig,
725
+ ),
495
726
  );
496
- await packageMutex.waitForUnlock();
497
- const alreadyLoaded = this.packages.get(packageName);
498
- if (alreadyLoaded !== undefined) {
499
- return alreadyLoaded;
500
- }
727
+ } catch (error) {
728
+ logger.error("Error adding package", { error });
729
+ this.deletePackageStatus(packageName);
730
+ throw error;
501
731
  }
732
+ this.setPackageStatus(packageName, PackageStatus.SERVING);
733
+ return this.packages.get(packageName);
734
+ }
502
735
 
503
- return packageMutex.runExclusive(async () => {
504
- const existingPackage = this.packages.get(packageName);
505
- if (existingPackage !== undefined) {
506
- return existingPackage;
736
+ /**
737
+ * Replace a package on disk via stage-and-swap, then load it.
738
+ *
739
+ * - Phase 1 (no lock): run `downloader(stagingPath)`, writing the new
740
+ * content into a fresh sibling dir at `.staging/<pkg>-<uuid>/`. This
741
+ * is where multi-second downloads (git clone, GCS pull, ...) happen.
742
+ * - Phase 2 (lock held): atomically rename any existing canonical tree
743
+ * out to `.retired/<pkg>-<uuid>/`, rename staging into the canonical
744
+ * path, and run `Package.create` against the canonical path.
745
+ * - Phase 3 (after lock release): retire the old package's connections
746
+ * via the existing 30s drain and `fs.rm` the retired tree.
747
+ *
748
+ * Concurrent compiles / `getModelFileText` / `reloadAllModels` calls
749
+ * take the same mutex and so are mutually exclusive with the Phase 2
750
+ * swap, but they never queue behind a long Phase 1 download.
751
+ *
752
+ * On failure (Phase 1 download or Phase 2 `Package.create`), the staging
753
+ * dir is removed and — if we already renamed the old tree aside — the
754
+ * old tree is renamed back so the canonical path is restored.
755
+ */
756
+ public async installPackage(
757
+ packageName: string,
758
+ downloader: (stagingPath: string) => Promise<void>,
759
+ ): Promise<Package> {
760
+ assertSafePackageName(packageName);
761
+ const stagingPath = this.allocateStagingPath(packageName);
762
+ await fs.promises.mkdir(path.dirname(stagingPath), { recursive: true });
763
+
764
+ try {
765
+ await downloader(stagingPath);
766
+ } catch (err) {
767
+ await fs.promises
768
+ .rm(stagingPath, { recursive: true, force: true })
769
+ .catch(() => {});
770
+ throw err;
771
+ }
772
+
773
+ return this.withPackageLock(packageName, async () => {
774
+ const canonicalPath = safeJoinUnderRoot(
775
+ this.environmentPath,
776
+ packageName,
777
+ );
778
+ let retiredPath: string | undefined;
779
+
780
+ const oldPackage = this.packages.get(packageName);
781
+ const oldExistsOnDisk = await fs.promises
782
+ .access(canonicalPath)
783
+ .then(() => true)
784
+ .catch(() => false);
785
+
786
+ if (oldExistsOnDisk) {
787
+ retiredPath = this.allocateRetiredPath(packageName);
788
+ await fs.promises.mkdir(path.dirname(retiredPath), {
789
+ recursive: true,
790
+ });
791
+ await fs.promises.rename(canonicalPath, retiredPath);
507
792
  }
508
793
 
509
- this.setPackageStatus(packageName, PackageStatus.LOADING);
794
+ let newPackage: Package;
510
795
  try {
511
- this.packages.set(
796
+ await fs.promises.rename(stagingPath, canonicalPath);
797
+
798
+ this.setPackageStatus(packageName, PackageStatus.LOADING);
799
+ newPackage = await Package.create(
800
+ this.environmentName,
512
801
  packageName,
513
- await Package.create(
514
- this.environmentName,
515
- packageName,
516
- packagePath,
517
- () => this.malloyConfig.malloyConfig,
518
- ),
802
+ canonicalPath,
803
+ () => this.malloyConfig.malloyConfig,
519
804
  );
520
- } catch (error) {
521
- logger.error("Error adding package", { error });
805
+ } catch (err) {
806
+ // Rollback: clobber whatever (partial) content sits at canonical
807
+ // — Package.create's own failure-cleanup may have already rm'd
808
+ // the directory, so the most common outcome here is ENOENT.
809
+ // `force: true` plus the `.catch(() => {})` make this a
810
+ // best-effort wipe whose only job is to leave the rename-back
811
+ // below a clean destination. Then put the old tree back if we
812
+ // moved one aside.
813
+ await fs.promises
814
+ .rm(canonicalPath, { recursive: true, force: true })
815
+ .catch(() => {});
816
+ if (retiredPath) {
817
+ try {
818
+ await fs.promises.rename(retiredPath, canonicalPath);
819
+ } catch (restoreErr) {
820
+ logger.error(
821
+ "Failed to restore retired package after install rollback",
822
+ {
823
+ error: restoreErr,
824
+ retiredPath,
825
+ canonicalPath,
826
+ },
827
+ );
828
+ }
829
+ }
830
+ await fs.promises
831
+ .rm(stagingPath, { recursive: true, force: true })
832
+ .catch(() => {});
522
833
  this.deletePackageStatus(packageName);
523
- throw error;
834
+ throw err;
524
835
  }
836
+
837
+ this.packages.set(packageName, newPackage);
525
838
  this.setPackageStatus(packageName, PackageStatus.SERVING);
526
- return this.packages.get(packageName);
839
+
840
+ if (oldPackage) {
841
+ this.retireConnectionGeneration(`package ${packageName}`, () =>
842
+ oldPackage.getMalloyConfig().releaseConnections(),
843
+ );
844
+ }
845
+
846
+ if (retiredPath) {
847
+ const pathToClean = retiredPath;
848
+ setImmediate(() => {
849
+ void fs.promises
850
+ .rm(pathToClean, { recursive: true, force: true })
851
+ .catch((err) => {
852
+ logger.warn(
853
+ `Failed to clean up retired package directory ${pathToClean}`,
854
+ { error: err },
855
+ );
856
+ });
857
+ });
858
+ }
859
+
860
+ return newPackage;
861
+ });
862
+ }
863
+
864
+ /**
865
+ * Reload every model in a package against the supplied build manifest,
866
+ * holding the per-package mutex for the duration of the disk reads.
867
+ * Replaces direct `Package.reloadAllModels` calls from outside
868
+ * `Environment`.
869
+ */
870
+ public async reloadAllModelsForPackage(
871
+ packageName: string,
872
+ manifest: BuildManifest["entries"],
873
+ ): Promise<void> {
874
+ assertSafePackageName(packageName);
875
+ return this.withPackageLock(packageName, async () => {
876
+ const pkg = this.packages.get(packageName);
877
+ if (!pkg) {
878
+ throw new PackageNotFoundError(
879
+ `Package ${packageName} is not loaded`,
880
+ );
881
+ }
882
+ await pkg.reloadAllModels(manifest);
883
+ });
884
+ }
885
+
886
+ /**
887
+ * Read a model's source text from disk, holding the per-package mutex
888
+ * so the read is serialized against {@link installPackage} /
889
+ * {@link deletePackage} / {@link updatePackage}.
890
+ */
891
+ public async getModelFileText(
892
+ packageName: string,
893
+ modelPath: string,
894
+ ): Promise<string> {
895
+ assertSafePackageName(packageName);
896
+ assertSafeRelativeModelPath(modelPath);
897
+ return this.withPackageLock(packageName, async () => {
898
+ const pkg = this.packages.get(packageName);
899
+ if (!pkg) {
900
+ throw new PackageNotFoundError(
901
+ `Package ${packageName} is not loaded`,
902
+ );
903
+ }
904
+ return pkg.getModelFileText(modelPath);
527
905
  });
528
906
  }
529
907
 
@@ -531,8 +909,8 @@ export class Environment {
531
909
  packageName: string,
532
910
  metadata: { name: string; description?: string },
533
911
  ): Promise<void> {
534
- const packagePath = path.join(this.environmentPath, packageName);
535
- const manifestPath = path.join(packagePath, "publisher.json");
912
+ const packagePath = safeJoinUnderRoot(this.environmentPath, packageName);
913
+ const manifestPath = safeJoinUnderRoot(packagePath, "publisher.json");
536
914
 
537
915
  try {
538
916
  // Read existing manifest
@@ -566,26 +944,29 @@ export class Environment {
566
944
  }
567
945
 
568
946
  public async updatePackage(packageName: string, body: ApiPackage) {
569
- const _package = this.packages.get(packageName);
570
- if (!_package) {
571
- throw new PackageNotFoundError(`Package ${packageName} not found`);
572
- }
573
- if (body.name) {
574
- _package.setName(body.name);
575
- }
576
- _package.setPackageMetadata({
577
- name: body.name,
578
- description: body.description,
579
- resource: body.resource,
580
- location: body.location,
581
- });
947
+ assertSafePackageName(packageName);
948
+ return this.withPackageLock(packageName, async () => {
949
+ const _package = this.packages.get(packageName);
950
+ if (!_package) {
951
+ throw new PackageNotFoundError(`Package ${packageName} not found`);
952
+ }
953
+ if (body.name) {
954
+ _package.setName(body.name);
955
+ }
956
+ _package.setPackageMetadata({
957
+ name: body.name,
958
+ description: body.description,
959
+ resource: body.resource,
960
+ location: body.location,
961
+ });
582
962
 
583
- await this.writePackageManifest(packageName, {
584
- name: packageName,
585
- description: body.description,
586
- });
963
+ await this.writePackageManifest(packageName, {
964
+ name: packageName,
965
+ description: body.description,
966
+ });
587
967
 
588
- return _package.getPackageMetadata();
968
+ return _package.getPackageMetadata();
969
+ });
589
970
  }
590
971
 
591
972
  public getPackageStatus(packageName: string): PackageInfo | undefined {
@@ -606,48 +987,83 @@ export class Environment {
606
987
  }
607
988
 
608
989
  public async deletePackage(packageName: string): Promise<void> {
609
- const _package = this.packages.get(packageName);
610
- if (!_package) {
611
- return;
612
- }
613
- const packageStatus = this.packageStatuses.get(packageName);
614
-
615
- if (packageStatus?.status === PackageStatus.LOADING) {
616
- logger.error("Package loading. Can't unload.", {
617
- environmentName: this.environmentName,
618
- packageName,
619
- });
620
- throw new Error(
621
- "Package loading. Can't unload. " +
622
- this.environmentName +
623
- " " +
990
+ assertSafePackageName(packageName);
991
+ return this.withPackageLock(packageName, async () => {
992
+ const _package = this.packages.get(packageName);
993
+ if (!_package) {
994
+ return;
995
+ }
996
+ const packageStatus = this.packageStatuses.get(packageName);
997
+
998
+ // The mutex now serializes load/install/compile against delete, so
999
+ // the LOADING-state guard is mostly vestigial — left in place for
1000
+ // backwards-compatible error messaging in case anything bypasses
1001
+ // the lock.
1002
+ if (packageStatus?.status === PackageStatus.LOADING) {
1003
+ logger.error("Package loading. Can't unload.", {
1004
+ environmentName: this.environmentName,
624
1005
  packageName,
625
- );
626
- } else if (packageStatus?.status === PackageStatus.SERVING) {
627
- this.setPackageStatus(packageName, PackageStatus.UNLOADING);
628
- }
1006
+ });
1007
+ throw new Error(
1008
+ "Package loading. Can't unload. " +
1009
+ this.environmentName +
1010
+ " " +
1011
+ packageName,
1012
+ );
1013
+ } else if (packageStatus?.status === PackageStatus.SERVING) {
1014
+ this.setPackageStatus(packageName, PackageStatus.UNLOADING);
1015
+ }
629
1016
 
630
- await _package.getMalloyConfig().releaseConnections();
1017
+ // Retire the package's connections via the existing 30s drain so
1018
+ // any in-flight queries that already acquired a connection finish
1019
+ // before the underlying duckdb handle is released.
1020
+ this.retireConnectionGeneration(`package ${packageName}`, () =>
1021
+ _package.getMalloyConfig().releaseConnections(),
1022
+ );
631
1023
 
632
- try {
633
- await fs.promises.rm(path.join(this.environmentPath, packageName), {
634
- recursive: true,
635
- force: true,
636
- });
637
- } catch (err) {
638
- logger.error(
639
- "Error removing package directory while unloading package",
640
- {
641
- error: err,
642
- environmentName: this.environmentName,
643
- packageName,
644
- },
1024
+ // Atomically rename the canonical tree out of the way so no reader
1025
+ // can stat into it after the lock is released. The actual fs.rm is
1026
+ // deferred to setImmediate to keep the lock-hold time at one
1027
+ // rename rather than a (potentially slow) recursive remove.
1028
+ const canonicalPath = safeJoinUnderRoot(
1029
+ this.environmentPath,
1030
+ packageName,
645
1031
  );
646
- }
1032
+ const retiredPath = this.allocateRetiredPath(packageName);
1033
+ let renamed = false;
1034
+ try {
1035
+ await fs.promises.mkdir(path.dirname(retiredPath), {
1036
+ recursive: true,
1037
+ });
1038
+ await fs.promises.rename(canonicalPath, retiredPath);
1039
+ renamed = true;
1040
+ } catch (err) {
1041
+ logger.error(
1042
+ "Error renaming package directory to retired during unload",
1043
+ {
1044
+ error: err,
1045
+ environmentName: this.environmentName,
1046
+ packageName,
1047
+ },
1048
+ );
1049
+ }
647
1050
 
648
- // Remove from internal tracking
649
- this.packages.delete(packageName);
650
- this.packageStatuses.delete(packageName);
1051
+ this.packages.delete(packageName);
1052
+ this.packageStatuses.delete(packageName);
1053
+
1054
+ if (renamed) {
1055
+ setImmediate(() => {
1056
+ void fs.promises
1057
+ .rm(retiredPath, { recursive: true, force: true })
1058
+ .catch((err) => {
1059
+ logger.warn(
1060
+ `Failed to clean up retired package directory ${retiredPath}`,
1061
+ { error: err },
1062
+ );
1063
+ });
1064
+ });
1065
+ }
1066
+ });
651
1067
  }
652
1068
 
653
1069
  public updateConnections(