@malloy-publisher/server 0.0.180 → 0.0.181-dev-v1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/build.ts +3 -3
  2. package/dist/app/api-doc.yaml +505 -52
  3. package/dist/app/assets/HomePage-Dn3E4CuB.js +1 -0
  4. package/dist/app/assets/{MainPage-BLhfzy47.js → MainPage-BzB3yoqi.js} +2 -2
  5. package/dist/app/assets/{ModelPage-bgdjxhyc.js → ModelPage-C9O_sAXT.js} +1 -1
  6. package/dist/app/assets/PackagePage-DcxKEjBX.js +1 -0
  7. package/dist/app/assets/ProjectPage-BDj307rF.js +1 -0
  8. package/dist/app/assets/{RouteError-CsFH2AdT.js → RouteError-DAShbVCG.js} +1 -1
  9. package/dist/app/assets/{WorkbookPage-CQ37Bfli.js → WorkbookPage-Cs_XYEaB.js} +1 -1
  10. package/dist/app/assets/core-CjeTkq8O.es-BqRc6yhC.js +148 -0
  11. package/dist/app/assets/engine-oniguruma-C4vnmooL.es-jdkXmgTr.js +1 -0
  12. package/dist/app/assets/github-light-JYsPkUQd.es-DAi9KRSo.js +1 -0
  13. package/dist/app/assets/index-15BOvhp0.js +456 -0
  14. package/dist/app/assets/{index-Cev5PtEG.js → index-Bb2jqquW.js} +1 -1
  15. package/dist/app/assets/{index-DcnbmCmI.js → index-D68X76-7.js} +168 -166
  16. package/dist/app/assets/index.umd-DGBekgSu.js +1145 -0
  17. package/dist/app/assets/json-71t8ZF9g.es-BQoSv7ci.js +1 -0
  18. package/dist/app/assets/sql-DCkt643-.es-COK4E0Yg.js +1 -0
  19. package/dist/app/assets/typescript-buWNZFwO.es-Dj6nwHGl.js +1 -0
  20. package/dist/app/index.html +1 -1
  21. package/dist/instrumentation.js +10567 -10584
  22. package/dist/server.js +16972 -15366
  23. package/package.json +17 -15
  24. package/src/controller/connection.controller.ts +27 -20
  25. package/src/controller/manifest.controller.ts +29 -0
  26. package/src/controller/materialization.controller.ts +125 -0
  27. package/src/controller/model.controller.ts +4 -3
  28. package/src/controller/package.controller.ts +53 -2
  29. package/src/controller/query.controller.ts +5 -0
  30. package/src/errors.ts +24 -0
  31. package/src/mcp/resources/model_resource.ts +12 -9
  32. package/src/mcp/resources/source_resource.ts +7 -6
  33. package/src/mcp/resources/view_resource.ts +0 -1
  34. package/src/mcp/tools/execute_query_tool.ts +9 -0
  35. package/src/server.ts +217 -5
  36. package/src/service/connection.ts +1 -4
  37. package/src/service/filter.spec.ts +447 -0
  38. package/src/service/filter.ts +337 -0
  39. package/src/service/filter_integration.spec.ts +825 -0
  40. package/src/service/manifest_service.spec.ts +201 -0
  41. package/src/service/manifest_service.ts +106 -0
  42. package/src/service/materialization_service.spec.ts +648 -0
  43. package/src/service/materialization_service.ts +929 -0
  44. package/src/service/materialized_table_gc.spec.ts +383 -0
  45. package/src/service/materialized_table_gc.ts +279 -0
  46. package/src/service/model.ts +221 -47
  47. package/src/service/package.ts +50 -0
  48. package/src/service/project_store.ts +21 -2
  49. package/src/service/quoting.ts +41 -0
  50. package/src/service/resolve_project.ts +13 -0
  51. package/src/storage/DatabaseInterface.ts +103 -1
  52. package/src/storage/{StorageManager.spec.ts → StorageManager.mock.ts} +9 -0
  53. package/src/storage/StorageManager.ts +119 -1
  54. package/src/storage/duckdb/DuckDBManifestStore.ts +70 -0
  55. package/src/storage/duckdb/DuckDBRepository.ts +99 -9
  56. package/src/storage/duckdb/ManifestRepository.ts +119 -0
  57. package/src/storage/duckdb/MaterializationRepository.ts +249 -0
  58. package/src/storage/duckdb/manifest_store.spec.ts +133 -0
  59. package/src/storage/duckdb/schema.ts +59 -1
  60. package/src/storage/ducklake/DuckLakeManifestStore.ts +146 -0
  61. package/tests/fixtures/persist-test/data/orders.csv +5 -0
  62. package/tests/fixtures/persist-test/persist_test.malloy +11 -0
  63. package/tests/fixtures/persist-test/publisher.json +5 -0
  64. package/tests/fixtures/publisher.config.json +15 -0
  65. package/tests/harness/rest_e2e.ts +68 -0
  66. package/tests/integration/materialization/materialization_lifecycle.integration.spec.ts +470 -0
  67. package/tests/integration/mcp/mcp_execute_query_tool.integration.spec.ts +2 -2
  68. package/dist/app/assets/HomePage-DRmAsRAP.js +0 -1
  69. package/dist/app/assets/PackagePage-rPw0OAJY.js +0 -1
  70. package/dist/app/assets/ProjectPage-D0DYloUr.js +0 -1
  71. package/dist/app/assets/index-C2IkGoJ8.js +0 -467
  72. package/dist/app/assets/index.umd-BwIMLH79.js +0 -1145
@@ -0,0 +1,249 @@
1
+ import { Materialization, MaterializationStatus } from "../DatabaseInterface";
2
+ import { DuckDBConnection } from "./DuckDBConnection";
3
+
4
+ const TERMINAL_STATUSES: ReadonlySet<MaterializationStatus> = new Set([
5
+ "SUCCESS",
6
+ "FAILED",
7
+ "CANCELLED",
8
+ ]);
9
+
10
+ function activeKeyFor(projectId: string, packageName: string): string {
11
+ return `${projectId}|${packageName}`;
12
+ }
13
+
14
+ /**
15
+ * Thrown when an atomic insert loses a race on (project, package) active
16
+ * materialization. Surfaced separately from a generic DB error so the service
17
+ * layer can translate to `MaterializationConflictError`.
18
+ */
19
+ export class DuplicateActiveMaterializationError extends Error {
20
+ constructor(projectId: string, packageName: string) {
21
+ super(
22
+ `Active materialization already exists for (${projectId}, ${packageName})`,
23
+ );
24
+ }
25
+ }
26
+
27
+ /**
28
+ * DuckDB-backed repository for package materializations.
29
+ *
30
+ * A Materialization tracks a single build run for a (project, package) pair
31
+ * through its lifecycle: PENDING -> RUNNING -> SUCCESS | FAILED | CANCELLED.
32
+ */
33
+ export class MaterializationRepository {
34
+ constructor(private db: DuckDBConnection) {}
35
+
36
+ private generateId(): string {
37
+ return `${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
38
+ }
39
+
40
+ private now(): Date {
41
+ return new Date();
42
+ }
43
+
44
+ async list(
45
+ projectId: string,
46
+ packageName: string,
47
+ options?: { limit?: number; offset?: number },
48
+ ): Promise<Materialization[]> {
49
+ let sql =
50
+ "SELECT * FROM materializations WHERE project_id = ? AND package_name = ? ORDER BY created_at DESC";
51
+ const params: unknown[] = [projectId, packageName];
52
+ if (options?.limit !== undefined) {
53
+ sql += " LIMIT ?";
54
+ params.push(options.limit);
55
+ }
56
+ if (options?.offset !== undefined) {
57
+ sql += " OFFSET ?";
58
+ params.push(options.offset);
59
+ }
60
+ const rows = await this.db.all<Record<string, unknown>>(sql, params);
61
+ return rows.map(this.mapRow);
62
+ }
63
+
64
+ async getById(id: string): Promise<Materialization | null> {
65
+ const row = await this.db.get<Record<string, unknown>>(
66
+ "SELECT * FROM materializations WHERE id = ?",
67
+ [id],
68
+ );
69
+ return row ? this.mapRow(row) : null;
70
+ }
71
+
72
+ async getActive(
73
+ projectId: string,
74
+ packageName: string,
75
+ ): Promise<Materialization | null> {
76
+ const row = await this.db.get<Record<string, unknown>>(
77
+ "SELECT * FROM materializations WHERE project_id = ? AND package_name = ? AND status IN ('PENDING', 'RUNNING')",
78
+ [projectId, packageName],
79
+ );
80
+ return row ? this.mapRow(row) : null;
81
+ }
82
+
83
+ async create(
84
+ projectId: string,
85
+ packageName: string,
86
+ status: MaterializationStatus = "PENDING",
87
+ metadata: Record<string, unknown> | null = null,
88
+ ): Promise<Materialization> {
89
+ const id = this.generateId();
90
+ const now = this.now();
91
+ const iso = now.toISOString();
92
+ // Set active_key iff the row is in a non-terminal state. The unique
93
+ // index on active_key makes the race-free conditional insert: a second
94
+ // concurrent create on the same (project, package) fails here rather
95
+ // than in a check-then-write window.
96
+ const activeKey = TERMINAL_STATUSES.has(status)
97
+ ? null
98
+ : activeKeyFor(projectId, packageName);
99
+ const metadataJson = metadata ? JSON.stringify(metadata) : null;
100
+
101
+ try {
102
+ const rows = await this.db.all<Record<string, unknown>>(
103
+ `INSERT INTO materializations (id, project_id, package_name, status, active_key, metadata, created_at, updated_at)
104
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
105
+ RETURNING *`,
106
+ [
107
+ id,
108
+ projectId,
109
+ packageName,
110
+ status,
111
+ activeKey,
112
+ metadataJson,
113
+ iso,
114
+ iso,
115
+ ],
116
+ );
117
+ return this.mapRow(rows[0]);
118
+ } catch (err) {
119
+ if (isUniqueViolation(err, "idx_materializations_active_key")) {
120
+ throw new DuplicateActiveMaterializationError(
121
+ projectId,
122
+ packageName,
123
+ );
124
+ }
125
+ throw err;
126
+ }
127
+ }
128
+
129
+ async update(
130
+ id: string,
131
+ updates: {
132
+ status?: MaterializationStatus;
133
+ startedAt?: Date;
134
+ completedAt?: Date;
135
+ error?: string | null;
136
+ metadata?: Record<string, unknown> | null;
137
+ },
138
+ ): Promise<Materialization> {
139
+ const now = this.now();
140
+ const setClauses: string[] = [];
141
+ const params: unknown[] = [];
142
+
143
+ if (updates.status !== undefined) {
144
+ setClauses.push(`status = ?`);
145
+ params.push(updates.status);
146
+ // Clear active_key on any transition to a terminal state; set it on
147
+ // any transition to a non-terminal state. The unique index
148
+ // guarantees we can never end up with two active rows for the same
149
+ // (project, package).
150
+ if (TERMINAL_STATUSES.has(updates.status)) {
151
+ setClauses.push(`active_key = NULL`);
152
+ } else {
153
+ setClauses.push(`active_key = project_id || '|' || package_name`);
154
+ }
155
+ }
156
+ if (updates.startedAt !== undefined) {
157
+ setClauses.push(`started_at = ?`);
158
+ params.push(updates.startedAt.toISOString());
159
+ }
160
+ if (updates.completedAt !== undefined) {
161
+ setClauses.push(`completed_at = ?`);
162
+ params.push(updates.completedAt.toISOString());
163
+ }
164
+ if (updates.error !== undefined) {
165
+ setClauses.push(`error = ?`);
166
+ params.push(updates.error);
167
+ }
168
+ if (updates.metadata !== undefined) {
169
+ setClauses.push(`metadata = ?`);
170
+ params.push(
171
+ updates.metadata ? JSON.stringify(updates.metadata) : null,
172
+ );
173
+ }
174
+
175
+ setClauses.push(`updated_at = ?`);
176
+ params.push(now.toISOString());
177
+ params.push(id);
178
+
179
+ await this.db.run(
180
+ `UPDATE materializations SET ${setClauses.join(", ")} WHERE id = ?`,
181
+ params,
182
+ );
183
+
184
+ const updated = await this.getById(id);
185
+ if (!updated) {
186
+ throw new Error(`Materialization ${id} not found after update`);
187
+ }
188
+ return updated;
189
+ }
190
+
191
+ async deleteByProjectId(projectId: string): Promise<void> {
192
+ await this.db.run("DELETE FROM materializations WHERE project_id = ?", [
193
+ projectId,
194
+ ]);
195
+ }
196
+
197
+ async deleteById(id: string): Promise<void> {
198
+ await this.db.run("DELETE FROM materializations WHERE id = ?", [id]);
199
+ }
200
+
201
+ async deleteByPackage(
202
+ projectId: string,
203
+ packageName: string,
204
+ ): Promise<void> {
205
+ await this.db.run(
206
+ "DELETE FROM materializations WHERE project_id = ? AND package_name = ?",
207
+ [projectId, packageName],
208
+ );
209
+ }
210
+
211
+ private mapRow(row: Record<string, unknown>): Materialization {
212
+ let metadata: Record<string, unknown> | null = null;
213
+ if (row.metadata) {
214
+ try {
215
+ metadata = JSON.parse(row.metadata as string);
216
+ } catch {
217
+ metadata = null;
218
+ }
219
+ }
220
+
221
+ return {
222
+ id: row.id as string,
223
+ projectId: row.project_id as string,
224
+ packageName: row.package_name as string,
225
+ status: row.status as MaterializationStatus,
226
+ startedAt: row.started_at ? new Date(row.started_at as string) : null,
227
+ completedAt: row.completed_at
228
+ ? new Date(row.completed_at as string)
229
+ : null,
230
+ error: row.error != null ? (row.error as string) : null,
231
+ metadata,
232
+ createdAt: new Date(row.created_at as string),
233
+ updatedAt: new Date(row.updated_at as string),
234
+ };
235
+ }
236
+ }
237
+
238
+ /**
239
+ * DuckDB surfaces unique-constraint violations as plain Errors whose message
240
+ * mentions the violated index. We match on the index name rather than a
241
+ * generic substring so we don't misclassify unrelated constraint errors.
242
+ */
243
+ function isUniqueViolation(err: unknown, indexName: string): boolean {
244
+ if (!(err instanceof Error)) return false;
245
+ const msg = err.message;
246
+ return (
247
+ msg.includes(indexName) || /duplicate key|unique constraint/i.test(msg)
248
+ );
249
+ }
@@ -0,0 +1,133 @@
1
+ import { beforeEach, describe, expect, it } from "bun:test";
2
+ import * as sinon from "sinon";
3
+ import { ManifestEntry, ResourceRepository } from "../DatabaseInterface";
4
+ import { DuckDBManifestStore } from "./DuckDBManifestStore";
5
+
6
+ function makeEntry(overrides: Partial<ManifestEntry> = {}): ManifestEntry {
7
+ return {
8
+ id: "entry-1",
9
+ projectId: "proj-1",
10
+ packageName: "pkg",
11
+ buildId: "build-abc",
12
+ tableName: "my_table",
13
+ sourceName: "my_source",
14
+ connectionName: "duckdb",
15
+ createdAt: new Date("2026-04-03"),
16
+ updatedAt: new Date("2026-04-03"),
17
+ ...overrides,
18
+ };
19
+ }
20
+
21
+ function createMocks() {
22
+ const sandbox = sinon.createSandbox();
23
+
24
+ const repository = {
25
+ listManifestEntries: sandbox.stub(),
26
+ upsertManifestEntry: sandbox.stub(),
27
+ deleteManifestEntry: sandbox.stub(),
28
+ };
29
+
30
+ const store = new DuckDBManifestStore(
31
+ repository as unknown as ResourceRepository,
32
+ );
33
+
34
+ return { sandbox, repository, store };
35
+ }
36
+
37
+ describe("DuckDBManifestStore", () => {
38
+ let ctx: ReturnType<typeof createMocks>;
39
+
40
+ beforeEach(() => {
41
+ ctx = createMocks();
42
+ });
43
+
44
+ describe("getManifest", () => {
45
+ it("should assemble a BuildManifest from repository entries", async () => {
46
+ ctx.repository.listManifestEntries.resolves([
47
+ makeEntry({ buildId: "b1", tableName: "tbl_a" }),
48
+ makeEntry({ buildId: "b2", tableName: "tbl_b" }),
49
+ ]);
50
+
51
+ const manifest = await ctx.store.getManifest("proj-1", "pkg");
52
+
53
+ expect(manifest.strict).toBe(false);
54
+ expect(manifest.entries).toEqual({
55
+ b1: { tableName: "tbl_a" },
56
+ b2: { tableName: "tbl_b" },
57
+ });
58
+ });
59
+
60
+ it("should return empty entries when no rows exist", async () => {
61
+ ctx.repository.listManifestEntries.resolves([]);
62
+
63
+ const manifest = await ctx.store.getManifest("proj-1", "pkg");
64
+
65
+ expect(manifest.strict).toBe(false);
66
+ expect(manifest.entries).toEqual({});
67
+ });
68
+ });
69
+
70
+ describe("writeEntry", () => {
71
+ it("should upsert an entry with all fields", async () => {
72
+ ctx.repository.upsertManifestEntry.resolves(makeEntry());
73
+
74
+ await ctx.store.writeEntry(
75
+ "proj-1",
76
+ "pkg",
77
+ "build-abc",
78
+ "tbl",
79
+ "src",
80
+ "conn",
81
+ );
82
+
83
+ expect(ctx.repository.upsertManifestEntry.calledOnce).toBe(true);
84
+ const arg = ctx.repository.upsertManifestEntry.firstCall.args[0];
85
+ expect(arg).toEqual({
86
+ projectId: "proj-1",
87
+ packageName: "pkg",
88
+ buildId: "build-abc",
89
+ tableName: "tbl",
90
+ sourceName: "src",
91
+ connectionName: "conn",
92
+ });
93
+ });
94
+ });
95
+
96
+ describe("deleteEntry", () => {
97
+ it("should delegate to repository", async () => {
98
+ ctx.repository.deleteManifestEntry.resolves();
99
+
100
+ await ctx.store.deleteEntry("entry-1");
101
+
102
+ expect(ctx.repository.deleteManifestEntry.calledOnce).toBe(true);
103
+ expect(ctx.repository.deleteManifestEntry.firstCall.args[0]).toBe(
104
+ "entry-1",
105
+ );
106
+ });
107
+ });
108
+
109
+ describe("listEntries", () => {
110
+ it("should return entries from the repository", async () => {
111
+ const entries = [
112
+ makeEntry(),
113
+ makeEntry({ id: "entry-2", buildId: "build-def" }),
114
+ ];
115
+ ctx.repository.listManifestEntries.resolves(entries);
116
+
117
+ const result = await ctx.store.listEntries("proj-1", "pkg");
118
+
119
+ expect(result).toEqual(entries);
120
+ expect(
121
+ ctx.repository.listManifestEntries.calledWith("proj-1", "pkg"),
122
+ ).toBe(true);
123
+ });
124
+
125
+ it("should return empty array when no entries exist", async () => {
126
+ ctx.repository.listManifestEntries.resolves([]);
127
+
128
+ const result = await ctx.store.listEntries("proj-1", "pkg");
129
+
130
+ expect(result).toEqual([]);
131
+ });
132
+ });
133
+ });
@@ -64,6 +64,49 @@ export async function initializeSchema(
64
64
  )
65
65
  `);
66
66
 
67
+ // Materializations table.
68
+ //
69
+ // `active_key` enforces at-most-one active (PENDING or RUNNING)
70
+ // materialization per (project, package) at the DB layer. It is set to
71
+ // `{project_id}|{package_name}` while the row is active and cleared
72
+ // to NULL on transition to any terminal state. A unique index on
73
+ // `active_key` (see below) makes the insert-then-check race impossible —
74
+ // a second concurrent create fails with a constraint violation, which the
75
+ // service layer translates to `MaterializationConflictError`.
76
+ await db.run(`
77
+ CREATE TABLE IF NOT EXISTS materializations (
78
+ id VARCHAR PRIMARY KEY,
79
+ project_id VARCHAR NOT NULL,
80
+ package_name VARCHAR NOT NULL,
81
+ status VARCHAR NOT NULL,
82
+ active_key VARCHAR,
83
+ started_at TIMESTAMP,
84
+ completed_at TIMESTAMP,
85
+ error TEXT,
86
+ metadata JSON,
87
+ created_at TIMESTAMP NOT NULL,
88
+ updated_at TIMESTAMP NOT NULL,
89
+ FOREIGN KEY (project_id) REFERENCES projects(id)
90
+ )
91
+ `);
92
+
93
+ // Build manifests table
94
+ await db.run(`
95
+ CREATE TABLE IF NOT EXISTS build_manifests (
96
+ id VARCHAR PRIMARY KEY,
97
+ project_id VARCHAR NOT NULL,
98
+ package_name VARCHAR NOT NULL,
99
+ build_id VARCHAR NOT NULL,
100
+ table_name VARCHAR NOT NULL,
101
+ source_name VARCHAR NOT NULL,
102
+ connection_name VARCHAR NOT NULL,
103
+ created_at TIMESTAMP NOT NULL,
104
+ updated_at TIMESTAMP NOT NULL,
105
+ FOREIGN KEY (project_id) REFERENCES projects(id),
106
+ UNIQUE (project_id, package_name, build_id)
107
+ )
108
+ `);
109
+
67
110
  // Create indexes for better query performance
68
111
  await db.run(
69
112
  "CREATE INDEX IF NOT EXISTS idx_packages_project_id ON packages(project_id)",
@@ -71,10 +114,25 @@ export async function initializeSchema(
71
114
  await db.run(
72
115
  "CREATE INDEX IF NOT EXISTS idx_connections_project_id ON connections(project_id)",
73
116
  );
117
+ await db.run(
118
+ "CREATE INDEX IF NOT EXISTS idx_materializations_project_package ON materializations(project_id, package_name)",
119
+ );
120
+ await db.run(
121
+ "CREATE UNIQUE INDEX IF NOT EXISTS idx_materializations_active_key ON materializations(active_key)",
122
+ );
123
+ await db.run(
124
+ "CREATE INDEX IF NOT EXISTS idx_build_manifests_project_package ON build_manifests(project_id, package_name)",
125
+ );
74
126
  }
75
127
 
76
128
  async function dropAllTables(db: DuckDBConnection): Promise<void> {
77
- const tables = ["packages", "connections", "projects"];
129
+ const tables = [
130
+ "build_manifests",
131
+ "materializations",
132
+ "packages",
133
+ "connections",
134
+ "projects",
135
+ ];
78
136
 
79
137
  logger.info("Dropping tables:", tables.join(", "));
80
138
 
@@ -0,0 +1,146 @@
1
+ import { logger } from "../../logger";
2
+ import {
3
+ BuildManifest,
4
+ ManifestEntry,
5
+ ManifestStore,
6
+ } from "../DatabaseInterface";
7
+ import { DuckDBConnection } from "../duckdb/DuckDBConnection";
8
+
9
+ /**
10
+ * DuckLake-backed ManifestStore used in orchestrated mode.
11
+ *
12
+ * Reads and writes manifest entries through a DuckLake catalog attached to
13
+ * the publisher's internal DuckDB. All workers sharing the same DuckLake
14
+ * catalog see the same manifest, enabling multi-worker coordination.
15
+ *
16
+ * The catalog is attached by {@link StorageManager} before this store is
17
+ * instantiated. The first worker to call {@link bootstrapSchema} creates the
18
+ * `build_manifests` table idempotently. Schema ownership lives in the
19
+ * publisher so that DDL and code co-evolve in the same repo.
20
+ *
21
+ * **Scope: manifest sync only.** Orchestrated mode only shares manifest
22
+ * state across workers. The active-materialization lock (the unique index
23
+ * on `materializations.active_key`) still lives in each worker's local
24
+ * DuckDB, so two workers can run builds concurrently and race on physical
25
+ * table names, staging table names, and manifest writes. Until a shared
26
+ * build lease lives in the DuckLake catalog, deployments running
27
+ * orchestrated mode must ensure builds are externally single-writer (e.g.,
28
+ * one designated build worker, or an external job scheduler). Other
29
+ * workers should only call `manifest?action=reload` to pick up manifests produced
30
+ * by the build worker.
31
+ */
32
+ export class DuckLakeManifestStore implements ManifestStore {
33
+ private readonly table: string;
34
+
35
+ constructor(
36
+ private db: DuckDBConnection,
37
+ catalogName: string,
38
+ ) {
39
+ this.table = `${catalogName}.build_manifests`;
40
+ }
41
+
42
+ /**
43
+ * Idempotently creates the `build_manifests` table and indices in the
44
+ * DuckLake catalog. Safe to call from every worker on startup.
45
+ */
46
+ async bootstrapSchema(): Promise<void> {
47
+ await this.db.run(`
48
+ CREATE TABLE IF NOT EXISTS ${this.table} (
49
+ id VARCHAR,
50
+ project_id VARCHAR NOT NULL,
51
+ package_name VARCHAR NOT NULL,
52
+ build_id VARCHAR NOT NULL,
53
+ table_name VARCHAR NOT NULL,
54
+ source_name VARCHAR NOT NULL,
55
+ connection_name VARCHAR NOT NULL,
56
+ created_at TIMESTAMP NOT NULL,
57
+ updated_at TIMESTAMP NOT NULL
58
+ )
59
+ `);
60
+ logger.info(`DuckLake manifest table bootstrapped: ${this.table}`);
61
+ }
62
+
63
+ async getManifest(
64
+ projectId: string,
65
+ packageName: string,
66
+ ): Promise<BuildManifest> {
67
+ const rows = await this.db.all<Record<string, unknown>>(
68
+ `SELECT * FROM ${this.table} WHERE project_id = ? AND package_name = ? ORDER BY created_at DESC`,
69
+ [projectId, packageName],
70
+ );
71
+ const manifest: BuildManifest = { entries: {}, strict: false };
72
+ for (const row of rows) {
73
+ const buildId = row.build_id as string;
74
+ // Rows are ordered newest-first; keep only the latest per build_id
75
+ // to handle rare duplicates from cross-worker races.
76
+ if (!manifest.entries[buildId]) {
77
+ manifest.entries[buildId] = {
78
+ tableName: row.table_name as string,
79
+ };
80
+ }
81
+ }
82
+ return manifest;
83
+ }
84
+
85
+ /**
86
+ * Insert a manifest entry. If a row with the same `build_id` already
87
+ * exists (retry after crash), the duplicate is harmless:
88
+ * {@link getManifest} deduplicates by build_id keeping the newest row.
89
+ */
90
+ async writeEntry(
91
+ projectId: string,
92
+ packageName: string,
93
+ buildId: string,
94
+ tableName: string,
95
+ sourceName: string,
96
+ connectionName: string,
97
+ ): Promise<void> {
98
+ const now = new Date().toISOString();
99
+ const id = `${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
100
+
101
+ await this.db.run(
102
+ `INSERT INTO ${this.table} (id, project_id, package_name, build_id, table_name, source_name, connection_name, created_at, updated_at)
103
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
104
+ [
105
+ id,
106
+ projectId,
107
+ packageName,
108
+ buildId,
109
+ tableName,
110
+ sourceName,
111
+ connectionName,
112
+ now,
113
+ now,
114
+ ],
115
+ );
116
+ }
117
+
118
+ async deleteEntry(id: string): Promise<void> {
119
+ await this.db.run(`DELETE FROM ${this.table} WHERE id = ?`, [id]);
120
+ }
121
+
122
+ async listEntries(
123
+ projectId: string,
124
+ packageName: string,
125
+ ): Promise<ManifestEntry[]> {
126
+ const rows = await this.db.all<Record<string, unknown>>(
127
+ `SELECT * FROM ${this.table} WHERE project_id = ? AND package_name = ? ORDER BY created_at DESC`,
128
+ [projectId, packageName],
129
+ );
130
+ return rows.map(this.mapToEntry);
131
+ }
132
+
133
+ private mapToEntry(row: Record<string, unknown>): ManifestEntry {
134
+ return {
135
+ id: row.id as string,
136
+ projectId: row.project_id as string,
137
+ packageName: row.package_name as string,
138
+ buildId: row.build_id as string,
139
+ tableName: row.table_name as string,
140
+ sourceName: row.source_name as string,
141
+ connectionName: row.connection_name as string,
142
+ createdAt: new Date(row.created_at as string),
143
+ updatedAt: new Date(row.updated_at as string),
144
+ };
145
+ }
146
+ }
@@ -0,0 +1,5 @@
1
+ category,amount
2
+ electronics,100
3
+ clothing,50
4
+ electronics,200
5
+ clothing,75
@@ -0,0 +1,11 @@
1
+ ##! experimental.persistence
2
+
3
+ source: raw_orders is duckdb.table('data/orders.csv')
4
+
5
+ #@ persist name="order_summary"
6
+ source: order_summary is raw_orders -> {
7
+ group_by: category
8
+ aggregate:
9
+ total_orders is count()
10
+ total_revenue is amount.sum()
11
+ }
@@ -0,0 +1,5 @@
1
+ {
2
+ "name": "persist-test",
3
+ "version": "1.0.0",
4
+ "description": "DuckDB persist test fixture"
5
+ }
@@ -0,0 +1,15 @@
1
+ {
2
+ "frozenConfig": false,
3
+ "projects": [
4
+ {
5
+ "name": "test-project",
6
+ "packages": [
7
+ {
8
+ "name": "persist-test",
9
+ "location": "./persist-test"
10
+ }
11
+ ],
12
+ "connections": []
13
+ }
14
+ ]
15
+ }