@malloy-publisher/server 0.0.188 → 0.0.190

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/dist/app/api-doc.yaml +423 -60
  2. package/dist/app/assets/{HomePage-DsuUvSI_.js → HomePage-Dn3E4CuB.js} +1 -1
  3. package/dist/app/assets/{MainPage-DHWFkEN6.js → MainPage-BzB3yoqi.js} +1 -1
  4. package/dist/app/assets/{ModelPage-DNwcx1nE.js → ModelPage-C9O_sAXT.js} +1 -1
  5. package/dist/app/assets/{PackagePage-DSgz9G2V.js → PackagePage-DcxKEjBX.js} +1 -1
  6. package/dist/app/assets/{ProjectPage-CSdPosLV.js → ProjectPage-BDj307rF.js} +1 -1
  7. package/dist/app/assets/{RouteError-orw1RX8q.js → RouteError-DAShbVCG.js} +1 -1
  8. package/dist/app/assets/{WorkbookPage-Bp-BpGjL.js → WorkbookPage-Cs_XYEaB.js} +1 -1
  9. package/dist/app/assets/{core-B4ZYB7aS.es-8Zh0TkSr.js → core-CjeTkq8O.es-BqRc6yhC.js} +1 -1
  10. package/dist/app/assets/{index-BL2TJgTw.js → index-15BOvhp0.js} +4 -4
  11. package/dist/app/assets/{index-BWJkzsfl.js → index-Bb2jqquW.js} +1 -1
  12. package/dist/app/assets/{index-BefdHHMa.js → index-D68X76-7.js} +1 -1
  13. package/dist/app/assets/{index.umd-lY-87l4L.js → index.umd-DGBekgSu.js} +1 -1
  14. package/dist/app/index.html +1 -1
  15. package/dist/instrumentation.js +98 -77
  16. package/dist/server.js +1834 -450
  17. package/package.json +5 -3
  18. package/src/controller/connection.controller.ts +27 -20
  19. package/src/controller/manifest.controller.ts +29 -0
  20. package/src/controller/materialization.controller.ts +125 -0
  21. package/src/controller/model.controller.ts +0 -2
  22. package/src/controller/package.controller.ts +53 -2
  23. package/src/errors.ts +24 -0
  24. package/src/server.ts +196 -5
  25. package/src/service/manifest_service.spec.ts +201 -0
  26. package/src/service/manifest_service.ts +106 -0
  27. package/src/service/materialization_service.spec.ts +648 -0
  28. package/src/service/materialization_service.ts +929 -0
  29. package/src/service/materialized_table_gc.spec.ts +383 -0
  30. package/src/service/materialized_table_gc.ts +279 -0
  31. package/src/service/model.ts +25 -4
  32. package/src/service/package.ts +50 -0
  33. package/src/service/project_store.ts +21 -2
  34. package/src/service/quoting.ts +41 -0
  35. package/src/service/resolve_project.ts +13 -0
  36. package/src/storage/DatabaseInterface.ts +103 -1
  37. package/src/storage/{StorageManager.spec.ts → StorageManager.mock.ts} +9 -0
  38. package/src/storage/StorageManager.ts +119 -1
  39. package/src/storage/duckdb/DuckDBManifestStore.ts +70 -0
  40. package/src/storage/duckdb/DuckDBRepository.ts +99 -9
  41. package/src/storage/duckdb/ManifestRepository.ts +119 -0
  42. package/src/storage/duckdb/MaterializationRepository.ts +249 -0
  43. package/src/storage/duckdb/manifest_store.spec.ts +133 -0
  44. package/src/storage/duckdb/schema.ts +59 -1
  45. package/src/storage/ducklake/DuckLakeManifestStore.ts +146 -0
  46. package/tests/fixtures/persist-test/data/orders.csv +5 -0
  47. package/tests/fixtures/persist-test/persist_test.malloy +11 -0
  48. package/tests/fixtures/persist-test/publisher.json +5 -0
  49. package/tests/fixtures/publisher.config.json +15 -0
  50. package/tests/harness/rest_e2e.ts +68 -0
  51. package/tests/integration/materialization/materialization_lifecycle.integration.spec.ts +470 -0
  52. package/tests/integration/mcp/mcp_execute_query_tool.integration.spec.ts +2 -2
@@ -0,0 +1,929 @@
1
+ import type {
2
+ BuildGraph,
3
+ Connection as MalloyConnection,
4
+ PersistSource,
5
+ } from "@malloydata/malloy";
6
+ import { Manifest } from "@malloydata/malloy";
7
+ import {
8
+ BadRequestError,
9
+ InvalidStateTransitionError,
10
+ MaterializationConflictError,
11
+ MaterializationNotFoundError,
12
+ } from "../errors";
13
+ import { logger } from "../logger";
14
+ import type { ManifestEntry } from "../storage/DatabaseInterface";
15
+ import {
16
+ Materialization,
17
+ MaterializationStatus,
18
+ ResourceRepository,
19
+ } from "../storage/DatabaseInterface";
20
+ import { DuplicateActiveMaterializationError } from "../storage/duckdb/MaterializationRepository";
21
+ import { ManifestService } from "./manifest_service";
22
+ import {
23
+ dropManifestEntries,
24
+ GcResult,
25
+ liveTableKey,
26
+ } from "./materialized_table_gc";
27
+ import { Model } from "./model";
28
+ import { ProjectStore } from "./project_store";
29
+ import { quoteTablePath, splitTablePath } from "./quoting";
30
+ import { resolveProjectId } from "./resolve_project";
31
+
32
+ /**
33
+ * Length of the BuildID prefix used when synthesizing staging table names.
34
+ * BuildID is a 64-char SHA-256 hex string; 12 hex chars is 48 bits of entropy
35
+ * — plenty of uniqueness per source, and keeps the final identifier well
36
+ * inside every dialect's limit (Postgres is the tightest at 63).
37
+ */
38
+ const STAGING_BUILD_ID_LEN = 12;
39
+
40
+ /**
41
+ * Return the staging suffix `_<truncatedBuildId>` appended to a table name
42
+ * while it is being built. The suffix is fully derivable from a manifest
43
+ * entry's `buildId`, which is how GC finds and drops orphaned staging tables.
44
+ */
45
+ export function stagingSuffix(buildId: string): string {
46
+ return `_${buildId.substring(0, STAGING_BUILD_ID_LEN)}`;
47
+ }
48
+
49
+ /**
50
+ * Build a stable key for a `(connectionName, tableName)` pair.
51
+ * Used to check whether a persist target was created by a previous build.
52
+ */
53
+ export function manifestTableKey(
54
+ connectionName: string,
55
+ tableName: string,
56
+ ): string {
57
+ return `${connectionName}::${tableName}`;
58
+ }
59
+
60
+ /**
61
+ * Probe whether a table physically exists on the given connection by
62
+ * running a zero-row SELECT. Returns `true` if the table resolves,
63
+ * `false` if the query fails (assumed "table not found").
64
+ */
65
+ export async function tablePhysicallyExists(
66
+ connection: MalloyConnection,
67
+ quotedTableName: string,
68
+ ): Promise<boolean> {
69
+ try {
70
+ await connection.runSQL(`SELECT 1 FROM ${quotedTableName} WHERE 1=0`);
71
+ return true;
72
+ } catch {
73
+ return false;
74
+ }
75
+ }
76
+
77
+ /**
78
+ * Allowed execution status transitions. SUCCESS, FAILED, and CANCELLED are
79
+ * terminal — once an execution reaches one of these states it is immutable.
80
+ */
81
+ const VALID_TRANSITIONS: Record<
82
+ MaterializationStatus,
83
+ MaterializationStatus[]
84
+ > = {
85
+ PENDING: ["RUNNING", "CANCELLED"],
86
+ RUNNING: ["SUCCESS", "FAILED", "CANCELLED"],
87
+ SUCCESS: [],
88
+ FAILED: [],
89
+ CANCELLED: [],
90
+ };
91
+
92
+ /**
93
+ * Orchestrates package-level materialization builds: triggering builds,
94
+ * cancellation, and the actual Malloy build that materializes persist
95
+ * sources into database tables.
96
+ *
97
+ * A build targets an entire package — all models are compiled and all
98
+ * persist sources across all models are processed in dependency order.
99
+ * The manifest is optionally activated after a successful build so
100
+ * subsequent queries resolve persist references to materialized tables.
101
+ *
102
+ * Enforces at-most-one concurrent build per (project, package) via a
103
+ * DB-level unique index on `materializations.active_key` (see
104
+ * `MaterializationRepository`), and supports cooperative cancellation
105
+ * through `AbortController`.
106
+ *
107
+ * **Multi-worker caveat:** the `materializations` table lives in each
108
+ * worker's *local* DuckDB, so the active-materialization lock is only
109
+ * enforced within a single Publisher process. In orchestrated deployments
110
+ * (shared DuckLake manifest catalog), builds must be externally
111
+ * single-writer until a shared lease is added — see the scope note on
112
+ * `DuckLakeManifestStore`.
113
+ */
114
+ export class MaterializationService {
115
+ /**
116
+ * Tracks in-flight executions so they can be cancelled. This map only
117
+ * lives in-process memory — entries are lost on server restart, which is
118
+ * why `stopMaterialization` has an orphaned-execution fallback path.
119
+ */
120
+ private runningAbortControllers = new Map<string, AbortController>();
121
+
122
+ constructor(
123
+ private projectStore: ProjectStore,
124
+ private manifestService: ManifestService,
125
+ ) {}
126
+
127
+ private get repository(): ResourceRepository {
128
+ return this.projectStore.storageManager.getRepository();
129
+ }
130
+
131
+ // ==================== STATE MACHINE ====================
132
+
133
+ private validateTransition(
134
+ current: MaterializationStatus,
135
+ next: MaterializationStatus,
136
+ ): void {
137
+ const allowed = VALID_TRANSITIONS[current];
138
+ if (!allowed.includes(next)) {
139
+ throw new InvalidStateTransitionError(
140
+ `Cannot transition from ${current} to ${next}`,
141
+ );
142
+ }
143
+ }
144
+
145
+ private async transitionExecution(
146
+ executionId: string,
147
+ newStatus: MaterializationStatus,
148
+ extra?: {
149
+ startedAt?: Date;
150
+ completedAt?: Date;
151
+ error?: string | null;
152
+ metadata?: Record<string, unknown> | null;
153
+ },
154
+ ): Promise<Materialization> {
155
+ const execution =
156
+ await this.repository.getMaterializationById(executionId);
157
+ if (!execution) {
158
+ throw new MaterializationNotFoundError(
159
+ `Execution ${executionId} not found`,
160
+ );
161
+ }
162
+ this.validateTransition(execution.status, newStatus);
163
+ return this.repository.updateMaterialization(executionId, {
164
+ status: newStatus,
165
+ ...extra,
166
+ });
167
+ }
168
+
169
+ // ==================== BUILD QUERIES ====================
170
+
171
+ async listMaterializations(
172
+ projectName: string,
173
+ packageName: string,
174
+ options?: { limit?: number; offset?: number },
175
+ ): Promise<Materialization[]> {
176
+ const projectId = await this.resolveProjectId(projectName);
177
+ return this.repository.listMaterializations(
178
+ projectId,
179
+ packageName,
180
+ options,
181
+ );
182
+ }
183
+
184
+ async getMaterialization(
185
+ projectName: string,
186
+ packageName: string,
187
+ buildId: string,
188
+ ): Promise<Materialization> {
189
+ const projectId = await this.resolveProjectId(projectName);
190
+ const execution = await this.repository.getMaterializationById(buildId);
191
+ if (
192
+ !execution ||
193
+ execution.projectId !== projectId ||
194
+ execution.packageName !== packageName
195
+ ) {
196
+ throw new MaterializationNotFoundError(
197
+ `Materialization ${buildId} not found for package ${packageName}`,
198
+ );
199
+ }
200
+ return execution;
201
+ }
202
+
203
+ // ==================== BUILD LIFECYCLE ====================
204
+
205
+ /**
206
+ * Creates a new build in PENDING state. Build options are stored in
207
+ * metadata so `startMaterialization` can read them back.
208
+ */
209
+ async createMaterialization(
210
+ projectName: string,
211
+ packageName: string,
212
+ options: { forceRefresh?: boolean; autoLoadManifest?: boolean } = {},
213
+ ): Promise<Materialization> {
214
+ const projectId = await this.resolveProjectId(projectName);
215
+
216
+ // Verify the package exists.
217
+ const project = await this.projectStore.getProject(projectName, false);
218
+ await project.getPackage(packageName, false);
219
+
220
+ // A non-atomic probe for a helpful error message. The DB-level unique
221
+ // index on active_key is the actual race-free guard — see the catch
222
+ // block below.
223
+ const active = await this.repository.getActiveMaterialization(
224
+ projectId,
225
+ packageName,
226
+ );
227
+ if (active) {
228
+ throw new MaterializationConflictError(
229
+ `Package ${packageName} already has an active materialization (${active.id})`,
230
+ );
231
+ }
232
+
233
+ const metadata = {
234
+ forceRefresh: options.forceRefresh ?? false,
235
+ autoLoadManifest: options.autoLoadManifest ?? false,
236
+ };
237
+
238
+ try {
239
+ return await this.repository.createMaterialization(
240
+ projectId,
241
+ packageName,
242
+ "PENDING",
243
+ metadata,
244
+ );
245
+ } catch (err) {
246
+ if (err instanceof DuplicateActiveMaterializationError) {
247
+ // Lost the race with a concurrent create. Re-read to report the
248
+ // winner's id for parity with the non-racy error above.
249
+ const winner = await this.repository.getActiveMaterialization(
250
+ projectId,
251
+ packageName,
252
+ );
253
+ throw new MaterializationConflictError(
254
+ winner
255
+ ? `Package ${packageName} already has an active materialization (${winner.id})`
256
+ : `Package ${packageName} already has an active materialization`,
257
+ );
258
+ }
259
+ throw err;
260
+ }
261
+ }
262
+
263
+ /**
264
+ * Transitions a PENDING build to RUNNING and starts execution in the
265
+ * background. Returns the RUNNING execution immediately.
266
+ */
267
+ async startMaterialization(
268
+ projectName: string,
269
+ packageName: string,
270
+ buildId: string,
271
+ ): Promise<Materialization> {
272
+ const projectId = await this.resolveProjectId(projectName);
273
+ const execution = await this.getMaterialization(
274
+ projectName,
275
+ packageName,
276
+ buildId,
277
+ );
278
+
279
+ if (execution.status !== "PENDING") {
280
+ throw new InvalidStateTransitionError(
281
+ `Materialization ${buildId} is ${execution.status}, expected PENDING`,
282
+ );
283
+ }
284
+
285
+ // Check for a *different* active materialization on this package.
286
+ const active = await this.repository.getActiveMaterialization(
287
+ projectId,
288
+ packageName,
289
+ );
290
+ if (active && active.id !== execution.id) {
291
+ throw new MaterializationConflictError(
292
+ `Package ${packageName} already has an active materialization (${active.id})`,
293
+ );
294
+ }
295
+
296
+ const running = await this.transitionExecution(execution.id, "RUNNING", {
297
+ startedAt: new Date(),
298
+ });
299
+
300
+ const metadata = (execution.metadata ?? {}) as Record<string, unknown>;
301
+
302
+ // Fire-and-forget: run the build in the background.
303
+ this.runMaterialization(
304
+ execution.id,
305
+ projectName,
306
+ projectId,
307
+ packageName,
308
+ metadata,
309
+ ).catch((err) => {
310
+ logger.error("Unhandled error in background build", {
311
+ executionId: execution.id,
312
+ error: err instanceof Error ? err.message : String(err),
313
+ });
314
+ });
315
+
316
+ return running;
317
+ }
318
+
319
+ private async runMaterialization(
320
+ executionId: string,
321
+ projectName: string,
322
+ projectId: string,
323
+ packageName: string,
324
+ metadata: Record<string, unknown>,
325
+ ): Promise<void> {
326
+ const abortController = new AbortController();
327
+ this.runningAbortControllers.set(executionId, abortController);
328
+
329
+ try {
330
+ const buildMetadata = await this.executeBuild(
331
+ projectName,
332
+ projectId,
333
+ packageName,
334
+ !!metadata.forceRefresh,
335
+ abortController.signal,
336
+ );
337
+
338
+ if (metadata.autoLoadManifest) {
339
+ const updatedManifest = await this.manifestService.getManifest(
340
+ projectId,
341
+ packageName,
342
+ );
343
+ const project = await this.projectStore.getProject(
344
+ projectName,
345
+ false,
346
+ );
347
+ const pkg = await project.getPackage(packageName, false);
348
+ await pkg.reloadAllModels(updatedManifest.entries);
349
+ }
350
+
351
+ await this.transitionExecution(executionId, "SUCCESS", {
352
+ completedAt: new Date(),
353
+ metadata: { ...metadata, ...buildMetadata },
354
+ });
355
+ } catch (err) {
356
+ const errorMessage = err instanceof Error ? err.message : String(err);
357
+
358
+ try {
359
+ if (abortController.signal.aborted) {
360
+ await this.transitionExecution(executionId, "CANCELLED", {
361
+ completedAt: new Date(),
362
+ error: "Build cancelled",
363
+ });
364
+ } else {
365
+ await this.transitionExecution(executionId, "FAILED", {
366
+ completedAt: new Date(),
367
+ error: errorMessage,
368
+ });
369
+ }
370
+ } catch (transitionErr) {
371
+ logger.error("Failed to transition execution after build error", {
372
+ executionId,
373
+ originalError: errorMessage,
374
+ transitionError:
375
+ transitionErr instanceof Error
376
+ ? transitionErr.message
377
+ : String(transitionErr),
378
+ });
379
+ }
380
+ } finally {
381
+ this.runningAbortControllers.delete(executionId);
382
+ }
383
+ }
384
+
385
+ /**
386
+ * Cancels a running build. Takes a specific buildId.
387
+ */
388
+ async stopMaterialization(
389
+ projectName: string,
390
+ packageName: string,
391
+ buildId: string,
392
+ ): Promise<Materialization> {
393
+ const execution = await this.getMaterialization(
394
+ projectName,
395
+ packageName,
396
+ buildId,
397
+ );
398
+
399
+ if (execution.status !== "RUNNING" && execution.status !== "PENDING") {
400
+ throw new InvalidStateTransitionError(
401
+ `Materialization ${buildId} is ${execution.status}, cannot stop`,
402
+ );
403
+ }
404
+
405
+ if (execution.status === "PENDING") {
406
+ return this.transitionExecution(execution.id, "CANCELLED", {
407
+ completedAt: new Date(),
408
+ error: "Build cancelled before starting",
409
+ });
410
+ }
411
+
412
+ const abortController = this.runningAbortControllers.get(execution.id);
413
+ if (abortController) {
414
+ abortController.abort();
415
+ return execution;
416
+ } else {
417
+ return this.transitionExecution(execution.id, "CANCELLED", {
418
+ completedAt: new Date(),
419
+ error: "Force cancelled: execution was orphaned",
420
+ });
421
+ }
422
+ }
423
+
424
+ /**
425
+ * Deletes a materialization record. Only terminal materializations
426
+ * (SUCCESS, FAILED, CANCELLED) can be deleted.
427
+ */
428
+ async deleteMaterialization(
429
+ projectName: string,
430
+ packageName: string,
431
+ materializationId: string,
432
+ ): Promise<void> {
433
+ const execution = await this.getMaterialization(
434
+ projectName,
435
+ packageName,
436
+ materializationId,
437
+ );
438
+
439
+ if (execution.status === "PENDING" || execution.status === "RUNNING") {
440
+ throw new InvalidStateTransitionError(
441
+ `Cannot delete materialization ${materializationId} while it is ${execution.status}`,
442
+ );
443
+ }
444
+
445
+ await this.repository.deleteMaterialization(execution.id);
446
+ }
447
+
448
+ // ==================== PACKAGE TEARDOWN ====================
449
+
450
+ /**
451
+ * Drop every materialized table and manifest row for a package.
452
+ *
453
+ * This is the only out-of-band teardown surface exposed by the
454
+ * publisher and is intended for a single caller: the controlplane,
455
+ * invoking it on the truly destructive path before the package/project
456
+ * is torn down. The publisher's `DELETE` endpoints are *not* wired into
457
+ * teardown because the controlplane invokes them for non-destructive
458
+ * unload too (down-replicate, drain, archive) where the entity still
459
+ * exists on other replicas, and dropping shared tables there would
460
+ * corrupt surviving workers.
461
+ *
462
+ * Reconciliation of stale rows against the package's live source code
463
+ * happens inline at the end of every successful build (see
464
+ * {@link executeBuild} Step 5) — that is where "active" vs. "orphan"
465
+ * is authoritatively determined, using the manifest's own `touch()`
466
+ * bookkeeping. This endpoint does not re-derive that information; it
467
+ * drops the manifest in its entirety, which is the correct behavior
468
+ * for the pre-deletion use case and avoids pulling the package's
469
+ * source through the compiler just to reach a foregone conclusion.
470
+ *
471
+ * Refuses to run while a materialization is active for the package
472
+ * (same serialization the inline GC gets by piggy-backing on the
473
+ * build).
474
+ *
475
+ * `dryRun` returns what would be dropped without issuing any DROP or
476
+ * deleting manifest rows.
477
+ */
478
+ async teardownPackage(
479
+ projectName: string,
480
+ packageName: string,
481
+ options: { dryRun?: boolean } = {},
482
+ ): Promise<GcResult> {
483
+ const projectId = await this.resolveProjectId(projectName);
484
+
485
+ const active = await this.repository.getActiveMaterialization(
486
+ projectId,
487
+ packageName,
488
+ );
489
+ if (active) {
490
+ throw new MaterializationConflictError(
491
+ `Package ${packageName} has an active materialization (${active.id}); cannot tear down`,
492
+ );
493
+ }
494
+
495
+ const project = await this.projectStore.getProject(projectName, false);
496
+ const pkg = await project.getPackage(packageName, false);
497
+ const connections = pkg.getConnections();
498
+
499
+ const entries = await this.manifestService.listEntries(
500
+ projectId,
501
+ packageName,
502
+ );
503
+
504
+ // `forceDeleteRowOnMissingConnection`: teardown is the one place
505
+ // where we'd rather lose the manifest row than leave it pointing at
506
+ // a vanished connection. We also deliberately omit `liveTables`:
507
+ // in teardown everything is stale, nothing is live.
508
+ return dropManifestEntries(entries, {
509
+ connections,
510
+ manifestService: this.manifestService,
511
+ projectId,
512
+ dryRun: options.dryRun,
513
+ forceDeleteRowOnMissingConnection: true,
514
+ });
515
+ }
516
+
517
+ // ==================== BUILD LOGIC ====================
518
+
519
+ /**
520
+ * Core build pipeline (5 steps):
521
+ * 1. LOAD — Load existing manifest.
522
+ * 2. COMPILE & PLAN — Compile all models, collect dependency graphs.
523
+ * 3. BUILD — Walk graphs in dependency order, materialize each source.
524
+ * 4. GC — Drop stale physical tables + prune manifest rows.
525
+ *
526
+ * Build success is not gated on GC — failures are surfaced in
527
+ * `gcErrors` metadata so the controller/UI can show them.
528
+ */
529
+ private async executeBuild(
530
+ projectName: string,
531
+ projectId: string,
532
+ packageName: string,
533
+ forceRefresh: boolean,
534
+ signal: AbortSignal,
535
+ ): Promise<Record<string, unknown>> {
536
+ logger.info("Starting materialization build", {
537
+ projectName,
538
+ packageName,
539
+ });
540
+
541
+ const project = await this.projectStore.getProject(projectName, false);
542
+ const pkg = await project.getPackage(packageName, false);
543
+
544
+ // ── STEP 1: LOAD ───────────────────────────────────────────────
545
+ const manifest = new Manifest();
546
+ const existingManifest = await this.manifestService.getManifest(
547
+ projectId,
548
+ packageName,
549
+ );
550
+ manifest.loadText(JSON.stringify(existingManifest));
551
+
552
+ const existingEntries = await this.manifestService.listEntries(
553
+ projectId,
554
+ packageName,
555
+ );
556
+ const knownMaterializedTables = new Set(
557
+ existingEntries.map((e: ManifestEntry) =>
558
+ manifestTableKey(e.connectionName, e.tableName),
559
+ ),
560
+ );
561
+
562
+ // ── STEP 2: COMPILE & PLAN ─────────────────────────────────────
563
+ const { graphs, sources, connectionDigests } =
564
+ await this.compilePackageBuildPlan(pkg, signal);
565
+
566
+ if (graphs.length === 0) {
567
+ logger.info("No persist sources to build");
568
+ return { sourcesBuilt: 0, sourcesSkipped: 0 };
569
+ }
570
+
571
+ // ── STEP 3: BUILD ──────────────────────────────────────────────
572
+ const connections = pkg.getConnections();
573
+ let sourcesBuilt = 0;
574
+ let sourcesSkipped = 0;
575
+ const sourceResults: Record<string, unknown>[] = [];
576
+
577
+ for (const graph of graphs) {
578
+ const connection = connections.get(graph.connectionName);
579
+ if (!connection) {
580
+ throw new BadRequestError(
581
+ `Connection '${graph.connectionName}' not found`,
582
+ );
583
+ }
584
+
585
+ for (const level of graph.nodes) {
586
+ for (const node of level) {
587
+ if (signal.aborted) throw new Error("Build cancelled");
588
+
589
+ const persistSource = sources[node.sourceID];
590
+ if (!persistSource) {
591
+ logger.warn(
592
+ `Source ${node.sourceID} not found in build plan, skipping`,
593
+ );
594
+ continue;
595
+ }
596
+
597
+ const result = await this.buildOneSource(
598
+ persistSource,
599
+ manifest,
600
+ connection,
601
+ connectionDigests,
602
+ forceRefresh,
603
+ projectId,
604
+ packageName,
605
+ knownMaterializedTables,
606
+ );
607
+
608
+ sourceResults.push(result);
609
+ if (result.status === "built") sourcesBuilt++;
610
+ else sourcesSkipped++;
611
+ }
612
+ }
613
+ }
614
+
615
+ // ── STEP 4: GC ─────────────────────────────────────────────────
616
+ const gcResult = await this.runPostBuildGc(
617
+ manifest,
618
+ projectId,
619
+ packageName,
620
+ connections,
621
+ );
622
+
623
+ logger.info("Materialization build complete", {
624
+ sourcesBuilt,
625
+ sourcesSkipped,
626
+ gcDropped: gcResult.dropped.length,
627
+ gcErrors: gcResult.errors.length,
628
+ });
629
+
630
+ return {
631
+ sourcesBuilt,
632
+ sourcesSkipped,
633
+ sources: sourceResults,
634
+ gcDropped: gcResult.dropped,
635
+ gcErrors: gcResult.errors,
636
+ };
637
+ }
638
+
639
+ // ==================== BUILD HELPERS ====================
640
+
641
+ /**
642
+ * Compile every model in the package and collect the dependency-ordered
643
+ * build graphs, persist sources, and pre-computed connection digests.
644
+ */
645
+ private async compilePackageBuildPlan(
646
+ pkg: {
647
+ getModelPaths(): string[];
648
+ getPackagePath(): string;
649
+ getConnections(): Map<string, MalloyConnection>;
650
+ },
651
+ signal: AbortSignal,
652
+ ): Promise<{
653
+ graphs: BuildGraph[];
654
+ sources: Record<string, PersistSource>;
655
+ connectionDigests: Record<string, string>;
656
+ }> {
657
+ const modelPaths = pkg.getModelPaths();
658
+ const allGraphs: BuildGraph[] = [];
659
+ const allSources: Record<string, PersistSource> = {};
660
+
661
+ for (const modelPath of modelPaths) {
662
+ if (signal.aborted) throw new Error("Build cancelled");
663
+
664
+ const { runtime, modelURL, importBaseURL } =
665
+ await Model.getModelRuntime(
666
+ pkg.getPackagePath(),
667
+ modelPath,
668
+ pkg.getConnections(),
669
+ );
670
+
671
+ const modelMaterializer = runtime.loadModel(modelURL, {
672
+ importBaseURL,
673
+ });
674
+ const malloyModel = await modelMaterializer.getModel();
675
+
676
+ // getBuildPlan() throws if the tag is missing, so check first to
677
+ // keep plain models in the same package buildable.
678
+ const modelTag = malloyModel.tagParse({ prefix: /^##! / }).tag;
679
+ if (!modelTag.has("experimental", "persistence")) {
680
+ logger.debug(
681
+ "Model has no ##! experimental.persistence tag, skipping",
682
+ { modelPath },
683
+ );
684
+ continue;
685
+ }
686
+
687
+ const buildPlan = malloyModel.getBuildPlan();
688
+
689
+ for (const msg of buildPlan.tagParseLog) {
690
+ logger.warn("Persist annotation issue", {
691
+ modelPath,
692
+ message: msg.message,
693
+ severity: msg.severity,
694
+ code: msg.code,
695
+ });
696
+ }
697
+
698
+ if (buildPlan.graphs.length > 0) {
699
+ allGraphs.push(...buildPlan.graphs);
700
+ for (const [sourceID, source] of Object.entries(
701
+ buildPlan.sources,
702
+ )) {
703
+ if (allSources[sourceID]) {
704
+ logger.warn(
705
+ `Duplicate sourceID "${sourceID}" from model ${modelPath}, overwriting previous definition`,
706
+ );
707
+ }
708
+ allSources[sourceID] = source;
709
+ }
710
+ }
711
+ }
712
+
713
+ logger.info("Build plan", {
714
+ sourceCount: Object.keys(allSources).length,
715
+ graphCount: allGraphs.length,
716
+ });
717
+
718
+ // Fail fast if two persist sources target the same (connection, table).
719
+ const tableOwners = new Map<string, string>();
720
+ for (const [sourceID, source] of Object.entries(allSources)) {
721
+ const tableName =
722
+ source.tagParse({ prefix: /^#@ / }).tag.text("name") || source.name;
723
+ const key = `${source.connectionName}::${tableName}`;
724
+ const existing = tableOwners.get(key);
725
+ if (existing) {
726
+ throw new BadRequestError(
727
+ `Persist target collision: sources '${existing}' and '${sourceID}' both resolve to table '${tableName}' on connection '${source.connectionName}'. Disambiguate with '#@ persist name=...'.`,
728
+ );
729
+ }
730
+ tableOwners.set(key, sourceID);
731
+ }
732
+
733
+ const connections = pkg.getConnections();
734
+ const connectionDigests: Record<string, string> = {};
735
+ for (const graph of allGraphs) {
736
+ const conn = connections.get(graph.connectionName);
737
+ if (conn && !connectionDigests[graph.connectionName]) {
738
+ connectionDigests[graph.connectionName] = await conn.getDigest();
739
+ }
740
+ }
741
+
742
+ return { graphs: allGraphs, sources: allSources, connectionDigests };
743
+ }
744
+
745
+ /**
746
+ * Materialize a single persist source: skip if up-to-date, otherwise
747
+ * build via staging table (CREATE → DROP old → RENAME), then write
748
+ * the manifest entry. Stale entries are cleaned up by post-build GC.
749
+ */
750
+ private async buildOneSource(
751
+ persistSource: PersistSource,
752
+ manifest: Manifest,
753
+ connection: MalloyConnection,
754
+ connectionDigests: Record<string, string>,
755
+ forceRefresh: boolean,
756
+ projectId: string,
757
+ packageName: string,
758
+ knownMaterializedTables: Set<string>,
759
+ ): Promise<Record<string, unknown>> {
760
+ const buildIdSQL = persistSource.getSQL();
761
+ const digest = connectionDigests[persistSource.connectionName];
762
+ const buildId = persistSource.makeBuildId(digest, buildIdSQL);
763
+
764
+ // Already built — mark active so it survives GC.
765
+ if (manifest.buildManifest.entries[buildId] && !forceRefresh) {
766
+ manifest.touch(buildId);
767
+ logger.info(`Source ${persistSource.name} up to date, skipping`, {
768
+ buildId,
769
+ });
770
+ return { name: persistSource.name, status: "skipped", buildId };
771
+ }
772
+
773
+ const buildSQL = persistSource.getSQL({
774
+ buildManifest: manifest.buildManifest,
775
+ connectionDigests,
776
+ });
777
+
778
+ const connectionName = persistSource.connectionName;
779
+ const tableName =
780
+ persistSource.tagParse({ prefix: /^#@ / }).tag.text("name") ||
781
+ persistSource.name;
782
+ const { schemaPrefix, bareName } = splitTablePath(tableName);
783
+ const stagingTableName = `${schemaPrefix}${bareName}${stagingSuffix(buildId)}`;
784
+ const dialect = persistSource.dialect;
785
+ const quoted = (p: string) => quoteTablePath(p, dialect);
786
+
787
+ // Guard: refuse to overwrite a pre-existing table that was not
788
+ // created by a previous materialization build. Without this check a
789
+ // model author could accidentally target a table name that already
790
+ // holds real data (e.g. `#@ persist name=customers`), and the
791
+ // DROP TABLE below would silently destroy it.
792
+ const tableKey = manifestTableKey(connectionName, tableName);
793
+ if (!knownMaterializedTables.has(tableKey)) {
794
+ if (await tablePhysicallyExists(connection, quoted(tableName))) {
795
+ throw new BadRequestError(
796
+ `Refusing to materialize source '${persistSource.name}': ` +
797
+ `target table '${tableName}' already exists on connection ` +
798
+ `'${connectionName}' but was not created by a previous ` +
799
+ `materialization build. Use '#@ persist name=...' to ` +
800
+ `choose a different table name, or drop the existing ` +
801
+ `table manually if it is no longer needed.`,
802
+ );
803
+ }
804
+ }
805
+
806
+ logger.info(`Building source ${persistSource.name}`, {
807
+ tableName,
808
+ connectionName,
809
+ });
810
+
811
+ const startTime = performance.now();
812
+
813
+ await connection.runSQL(
814
+ `DROP TABLE IF EXISTS ${quoted(stagingTableName)}`,
815
+ );
816
+
817
+ // If any step after CREATE throws we must best-effort drop the
818
+ // staging table, else it orphans under a name that GC will never
819
+ // find (no manifest row is written for a failed build).
820
+ try {
821
+ await connection.runSQL(
822
+ `CREATE TABLE ${quoted(stagingTableName)} AS (${buildSQL})`,
823
+ );
824
+ await connection.runSQL(`DROP TABLE IF EXISTS ${quoted(tableName)}`);
825
+ await connection.runSQL(
826
+ `ALTER TABLE ${quoted(stagingTableName)} RENAME TO ${dialect.quoteTablePath(bareName)}`,
827
+ );
828
+ } catch (err) {
829
+ try {
830
+ await connection.runSQL(
831
+ `DROP TABLE IF EXISTS ${quoted(stagingTableName)}`,
832
+ );
833
+ } catch (cleanupErr) {
834
+ logger.warn(
835
+ "Build: failed to clean up staging table after a failed rebuild; physical leak",
836
+ {
837
+ stagingTableName,
838
+ connectionName,
839
+ cleanupError:
840
+ cleanupErr instanceof Error
841
+ ? cleanupErr.message
842
+ : String(cleanupErr),
843
+ },
844
+ );
845
+ }
846
+ throw err;
847
+ }
848
+
849
+ const duration = performance.now() - startTime;
850
+
851
+ knownMaterializedTables.add(tableKey);
852
+ manifest.update(buildId, { tableName });
853
+
854
+ await this.manifestService.writeEntry(
855
+ projectId,
856
+ packageName,
857
+ buildId,
858
+ tableName,
859
+ persistSource.name,
860
+ connectionName,
861
+ );
862
+
863
+ logger.info(`Built source ${persistSource.name}`, {
864
+ tableName,
865
+ durationMs: Math.round(duration),
866
+ });
867
+
868
+ return {
869
+ name: persistSource.name,
870
+ status: "built",
871
+ buildId,
872
+ tableName,
873
+ durationMs: Math.round(duration),
874
+ };
875
+ }
876
+
877
+ /**
878
+ * Post-build GC: drop physical tables + manifest rows for entries whose
879
+ * BuildID is no longer produced by an active persist source.
880
+ *
881
+ * `liveTables` prevents a fresh build from having its table dropped when
882
+ * a stale row still references the same `(connection, tableName)` pair.
883
+ */
884
+ private async runPostBuildGc(
885
+ manifest: Manifest,
886
+ projectId: string,
887
+ packageName: string,
888
+ connections: Map<string, MalloyConnection>,
889
+ ): Promise<GcResult> {
890
+ const activeManifest = manifest.activeEntries;
891
+ const allDbEntries = await this.manifestService.listEntries(
892
+ projectId,
893
+ packageName,
894
+ );
895
+
896
+ const liveTables = new Set<string>();
897
+ for (const entry of allDbEntries) {
898
+ if (activeManifest.entries[entry.buildId]) {
899
+ liveTables.add(liveTableKey(entry.connectionName, entry.tableName));
900
+ }
901
+ }
902
+
903
+ const staleEntries = allDbEntries.filter(
904
+ (entry) => !activeManifest.entries[entry.buildId],
905
+ );
906
+
907
+ const gcResult = await dropManifestEntries(staleEntries, {
908
+ connections,
909
+ manifestService: this.manifestService,
910
+ projectId,
911
+ liveTables,
912
+ });
913
+
914
+ if (gcResult.errors.length > 0) {
915
+ logger.warn("Materialization GC surfaced errors", {
916
+ errorCount: gcResult.errors.length,
917
+ droppedCount: gcResult.dropped.length,
918
+ });
919
+ }
920
+
921
+ return gcResult;
922
+ }
923
+
924
+ // ==================== HELPERS ====================
925
+
926
+ private resolveProjectId(projectName: string): Promise<string> {
927
+ return resolveProjectId(this.repository, projectName);
928
+ }
929
+ }