@malloy-publisher/server 0.0.178 → 0.0.180-dev-v1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build.ts +1 -1
- package/dist/app/api-doc.yaml +505 -52
- package/dist/app/assets/HomePage-Dn3E4CuB.js +1 -0
- package/dist/app/assets/{MainPage-JYvDXOkC.js → MainPage-BzB3yoqi.js} +2 -2
- package/dist/app/assets/{ModelPage-TEQrhaqq.js → ModelPage-C9O_sAXT.js} +1 -1
- package/dist/app/assets/PackagePage-DcxKEjBX.js +1 -0
- package/dist/app/assets/ProjectPage-BDj307rF.js +1 -0
- package/dist/app/assets/{RouteError-DnSZEzkT.js → RouteError-DAShbVCG.js} +1 -1
- package/dist/app/assets/{WorkbookPage-DjQ8u5DD.js → WorkbookPage-Cs_XYEaB.js} +1 -1
- package/dist/app/assets/core-CjeTkq8O.es-BqRc6yhC.js +148 -0
- package/dist/app/assets/engine-oniguruma-C4vnmooL.es-jdkXmgTr.js +1 -0
- package/dist/app/assets/github-light-JYsPkUQd.es-DAi9KRSo.js +1 -0
- package/dist/app/assets/index-15BOvhp0.js +456 -0
- package/dist/app/assets/{index--80Q7qw1.js → index-Bb2jqquW.js} +1 -1
- package/dist/app/assets/{index-CZ4G_NMp.js → index-D68X76-7.js} +168 -166
- package/dist/app/assets/index.umd-DGBekgSu.js +1145 -0
- package/dist/app/assets/json-71t8ZF9g.es-BQoSv7ci.js +1 -0
- package/dist/app/assets/sql-DCkt643-.es-COK4E0Yg.js +1 -0
- package/dist/app/assets/typescript-buWNZFwO.es-Dj6nwHGl.js +1 -0
- package/dist/app/index.html +1 -1
- package/dist/instrumentation.js +10567 -10584
- package/dist/server.js +16973 -15367
- package/package.json +14 -12
- package/src/controller/connection.controller.ts +27 -20
- package/src/controller/manifest.controller.ts +29 -0
- package/src/controller/materialization.controller.ts +125 -0
- package/src/controller/model.controller.ts +4 -3
- package/src/controller/package.controller.ts +53 -2
- package/src/controller/query.controller.ts +5 -0
- package/src/errors.ts +24 -0
- package/src/mcp/resources/model_resource.ts +12 -9
- package/src/mcp/resources/source_resource.ts +7 -6
- package/src/mcp/resources/view_resource.ts +0 -1
- package/src/mcp/tools/execute_query_tool.ts +9 -0
- package/src/server.ts +217 -5
- package/src/service/connection.ts +1 -4
- package/src/service/db_utils.spec.ts +4 -2
- package/src/service/db_utils.ts +6 -2
- package/src/service/filter.spec.ts +447 -0
- package/src/service/filter.ts +337 -0
- package/src/service/filter_integration.spec.ts +825 -0
- package/src/service/manifest_service.spec.ts +201 -0
- package/src/service/manifest_service.ts +106 -0
- package/src/service/materialization_service.spec.ts +648 -0
- package/src/service/materialization_service.ts +929 -0
- package/src/service/materialized_table_gc.spec.ts +383 -0
- package/src/service/materialized_table_gc.ts +279 -0
- package/src/service/model.ts +221 -47
- package/src/service/package.ts +50 -0
- package/src/service/project_store.ts +21 -2
- package/src/service/quoting.ts +41 -0
- package/src/service/resolve_project.ts +13 -0
- package/src/storage/DatabaseInterface.ts +103 -1
- package/src/storage/{StorageManager.spec.ts → StorageManager.mock.ts} +9 -0
- package/src/storage/StorageManager.ts +119 -1
- package/src/storage/duckdb/DuckDBManifestStore.ts +70 -0
- package/src/storage/duckdb/DuckDBRepository.ts +99 -9
- package/src/storage/duckdb/ManifestRepository.ts +119 -0
- package/src/storage/duckdb/MaterializationRepository.ts +249 -0
- package/src/storage/duckdb/manifest_store.spec.ts +133 -0
- package/src/storage/duckdb/schema.ts +59 -1
- package/src/storage/ducklake/DuckLakeManifestStore.ts +146 -0
- package/tests/fixtures/persist-test/data/orders.csv +5 -0
- package/tests/fixtures/persist-test/persist_test.malloy +11 -0
- package/tests/fixtures/persist-test/publisher.json +5 -0
- package/tests/fixtures/publisher.config.json +15 -0
- package/tests/harness/rest_e2e.ts +68 -0
- package/tests/integration/materialization/materialization_lifecycle.integration.spec.ts +470 -0
- package/tests/integration/mcp/mcp_execute_query_tool.integration.spec.ts +2 -2
- package/dist/app/assets/HomePage-CwUkFsA8.js +0 -1
- package/dist/app/assets/PackagePage-CgE-izLw.js +0 -1
- package/dist/app/assets/ProjectPage-PiMPpFX8.js +0 -1
- package/dist/app/assets/index-BJUsHnGO.js +0 -467
- package/dist/app/assets/index.umd-Cf-wqh-R.js +0 -1145
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
import { Materialization, MaterializationStatus } from "../DatabaseInterface";
|
|
2
|
+
import { DuckDBConnection } from "./DuckDBConnection";
|
|
3
|
+
|
|
4
|
+
const TERMINAL_STATUSES: ReadonlySet<MaterializationStatus> = new Set([
|
|
5
|
+
"SUCCESS",
|
|
6
|
+
"FAILED",
|
|
7
|
+
"CANCELLED",
|
|
8
|
+
]);
|
|
9
|
+
|
|
10
|
+
function activeKeyFor(projectId: string, packageName: string): string {
|
|
11
|
+
return `${projectId}|${packageName}`;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Thrown when an atomic insert loses a race on (project, package) active
|
|
16
|
+
* materialization. Surfaced separately from a generic DB error so the service
|
|
17
|
+
* layer can translate to `MaterializationConflictError`.
|
|
18
|
+
*/
|
|
19
|
+
export class DuplicateActiveMaterializationError extends Error {
|
|
20
|
+
constructor(projectId: string, packageName: string) {
|
|
21
|
+
super(
|
|
22
|
+
`Active materialization already exists for (${projectId}, ${packageName})`,
|
|
23
|
+
);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* DuckDB-backed repository for package materializations.
|
|
29
|
+
*
|
|
30
|
+
* A Materialization tracks a single build run for a (project, package) pair
|
|
31
|
+
* through its lifecycle: PENDING -> RUNNING -> SUCCESS | FAILED | CANCELLED.
|
|
32
|
+
*/
|
|
33
|
+
export class MaterializationRepository {
|
|
34
|
+
constructor(private db: DuckDBConnection) {}
|
|
35
|
+
|
|
36
|
+
private generateId(): string {
|
|
37
|
+
return `${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
private now(): Date {
|
|
41
|
+
return new Date();
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async list(
|
|
45
|
+
projectId: string,
|
|
46
|
+
packageName: string,
|
|
47
|
+
options?: { limit?: number; offset?: number },
|
|
48
|
+
): Promise<Materialization[]> {
|
|
49
|
+
let sql =
|
|
50
|
+
"SELECT * FROM materializations WHERE project_id = ? AND package_name = ? ORDER BY created_at DESC";
|
|
51
|
+
const params: unknown[] = [projectId, packageName];
|
|
52
|
+
if (options?.limit !== undefined) {
|
|
53
|
+
sql += " LIMIT ?";
|
|
54
|
+
params.push(options.limit);
|
|
55
|
+
}
|
|
56
|
+
if (options?.offset !== undefined) {
|
|
57
|
+
sql += " OFFSET ?";
|
|
58
|
+
params.push(options.offset);
|
|
59
|
+
}
|
|
60
|
+
const rows = await this.db.all<Record<string, unknown>>(sql, params);
|
|
61
|
+
return rows.map(this.mapRow);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
async getById(id: string): Promise<Materialization | null> {
|
|
65
|
+
const row = await this.db.get<Record<string, unknown>>(
|
|
66
|
+
"SELECT * FROM materializations WHERE id = ?",
|
|
67
|
+
[id],
|
|
68
|
+
);
|
|
69
|
+
return row ? this.mapRow(row) : null;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async getActive(
|
|
73
|
+
projectId: string,
|
|
74
|
+
packageName: string,
|
|
75
|
+
): Promise<Materialization | null> {
|
|
76
|
+
const row = await this.db.get<Record<string, unknown>>(
|
|
77
|
+
"SELECT * FROM materializations WHERE project_id = ? AND package_name = ? AND status IN ('PENDING', 'RUNNING')",
|
|
78
|
+
[projectId, packageName],
|
|
79
|
+
);
|
|
80
|
+
return row ? this.mapRow(row) : null;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
async create(
|
|
84
|
+
projectId: string,
|
|
85
|
+
packageName: string,
|
|
86
|
+
status: MaterializationStatus = "PENDING",
|
|
87
|
+
metadata: Record<string, unknown> | null = null,
|
|
88
|
+
): Promise<Materialization> {
|
|
89
|
+
const id = this.generateId();
|
|
90
|
+
const now = this.now();
|
|
91
|
+
const iso = now.toISOString();
|
|
92
|
+
// Set active_key iff the row is in a non-terminal state. The unique
|
|
93
|
+
// index on active_key makes the race-free conditional insert: a second
|
|
94
|
+
// concurrent create on the same (project, package) fails here rather
|
|
95
|
+
// than in a check-then-write window.
|
|
96
|
+
const activeKey = TERMINAL_STATUSES.has(status)
|
|
97
|
+
? null
|
|
98
|
+
: activeKeyFor(projectId, packageName);
|
|
99
|
+
const metadataJson = metadata ? JSON.stringify(metadata) : null;
|
|
100
|
+
|
|
101
|
+
try {
|
|
102
|
+
const rows = await this.db.all<Record<string, unknown>>(
|
|
103
|
+
`INSERT INTO materializations (id, project_id, package_name, status, active_key, metadata, created_at, updated_at)
|
|
104
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
105
|
+
RETURNING *`,
|
|
106
|
+
[
|
|
107
|
+
id,
|
|
108
|
+
projectId,
|
|
109
|
+
packageName,
|
|
110
|
+
status,
|
|
111
|
+
activeKey,
|
|
112
|
+
metadataJson,
|
|
113
|
+
iso,
|
|
114
|
+
iso,
|
|
115
|
+
],
|
|
116
|
+
);
|
|
117
|
+
return this.mapRow(rows[0]);
|
|
118
|
+
} catch (err) {
|
|
119
|
+
if (isUniqueViolation(err, "idx_materializations_active_key")) {
|
|
120
|
+
throw new DuplicateActiveMaterializationError(
|
|
121
|
+
projectId,
|
|
122
|
+
packageName,
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
throw err;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
async update(
|
|
130
|
+
id: string,
|
|
131
|
+
updates: {
|
|
132
|
+
status?: MaterializationStatus;
|
|
133
|
+
startedAt?: Date;
|
|
134
|
+
completedAt?: Date;
|
|
135
|
+
error?: string | null;
|
|
136
|
+
metadata?: Record<string, unknown> | null;
|
|
137
|
+
},
|
|
138
|
+
): Promise<Materialization> {
|
|
139
|
+
const now = this.now();
|
|
140
|
+
const setClauses: string[] = [];
|
|
141
|
+
const params: unknown[] = [];
|
|
142
|
+
|
|
143
|
+
if (updates.status !== undefined) {
|
|
144
|
+
setClauses.push(`status = ?`);
|
|
145
|
+
params.push(updates.status);
|
|
146
|
+
// Clear active_key on any transition to a terminal state; set it on
|
|
147
|
+
// any transition to a non-terminal state. The unique index
|
|
148
|
+
// guarantees we can never end up with two active rows for the same
|
|
149
|
+
// (project, package).
|
|
150
|
+
if (TERMINAL_STATUSES.has(updates.status)) {
|
|
151
|
+
setClauses.push(`active_key = NULL`);
|
|
152
|
+
} else {
|
|
153
|
+
setClauses.push(`active_key = project_id || '|' || package_name`);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
if (updates.startedAt !== undefined) {
|
|
157
|
+
setClauses.push(`started_at = ?`);
|
|
158
|
+
params.push(updates.startedAt.toISOString());
|
|
159
|
+
}
|
|
160
|
+
if (updates.completedAt !== undefined) {
|
|
161
|
+
setClauses.push(`completed_at = ?`);
|
|
162
|
+
params.push(updates.completedAt.toISOString());
|
|
163
|
+
}
|
|
164
|
+
if (updates.error !== undefined) {
|
|
165
|
+
setClauses.push(`error = ?`);
|
|
166
|
+
params.push(updates.error);
|
|
167
|
+
}
|
|
168
|
+
if (updates.metadata !== undefined) {
|
|
169
|
+
setClauses.push(`metadata = ?`);
|
|
170
|
+
params.push(
|
|
171
|
+
updates.metadata ? JSON.stringify(updates.metadata) : null,
|
|
172
|
+
);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
setClauses.push(`updated_at = ?`);
|
|
176
|
+
params.push(now.toISOString());
|
|
177
|
+
params.push(id);
|
|
178
|
+
|
|
179
|
+
await this.db.run(
|
|
180
|
+
`UPDATE materializations SET ${setClauses.join(", ")} WHERE id = ?`,
|
|
181
|
+
params,
|
|
182
|
+
);
|
|
183
|
+
|
|
184
|
+
const updated = await this.getById(id);
|
|
185
|
+
if (!updated) {
|
|
186
|
+
throw new Error(`Materialization ${id} not found after update`);
|
|
187
|
+
}
|
|
188
|
+
return updated;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
async deleteByProjectId(projectId: string): Promise<void> {
|
|
192
|
+
await this.db.run("DELETE FROM materializations WHERE project_id = ?", [
|
|
193
|
+
projectId,
|
|
194
|
+
]);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
async deleteById(id: string): Promise<void> {
|
|
198
|
+
await this.db.run("DELETE FROM materializations WHERE id = ?", [id]);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
async deleteByPackage(
|
|
202
|
+
projectId: string,
|
|
203
|
+
packageName: string,
|
|
204
|
+
): Promise<void> {
|
|
205
|
+
await this.db.run(
|
|
206
|
+
"DELETE FROM materializations WHERE project_id = ? AND package_name = ?",
|
|
207
|
+
[projectId, packageName],
|
|
208
|
+
);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
private mapRow(row: Record<string, unknown>): Materialization {
|
|
212
|
+
let metadata: Record<string, unknown> | null = null;
|
|
213
|
+
if (row.metadata) {
|
|
214
|
+
try {
|
|
215
|
+
metadata = JSON.parse(row.metadata as string);
|
|
216
|
+
} catch {
|
|
217
|
+
metadata = null;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
return {
|
|
222
|
+
id: row.id as string,
|
|
223
|
+
projectId: row.project_id as string,
|
|
224
|
+
packageName: row.package_name as string,
|
|
225
|
+
status: row.status as MaterializationStatus,
|
|
226
|
+
startedAt: row.started_at ? new Date(row.started_at as string) : null,
|
|
227
|
+
completedAt: row.completed_at
|
|
228
|
+
? new Date(row.completed_at as string)
|
|
229
|
+
: null,
|
|
230
|
+
error: row.error != null ? (row.error as string) : null,
|
|
231
|
+
metadata,
|
|
232
|
+
createdAt: new Date(row.created_at as string),
|
|
233
|
+
updatedAt: new Date(row.updated_at as string),
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/**
|
|
239
|
+
* DuckDB surfaces unique-constraint violations as plain Errors whose message
|
|
240
|
+
* mentions the violated index. We match on the index name rather than a
|
|
241
|
+
* generic substring so we don't misclassify unrelated constraint errors.
|
|
242
|
+
*/
|
|
243
|
+
function isUniqueViolation(err: unknown, indexName: string): boolean {
|
|
244
|
+
if (!(err instanceof Error)) return false;
|
|
245
|
+
const msg = err.message;
|
|
246
|
+
return (
|
|
247
|
+
msg.includes(indexName) || /duplicate key|unique constraint/i.test(msg)
|
|
248
|
+
);
|
|
249
|
+
}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import { beforeEach, describe, expect, it } from "bun:test";
|
|
2
|
+
import * as sinon from "sinon";
|
|
3
|
+
import { ManifestEntry, ResourceRepository } from "../DatabaseInterface";
|
|
4
|
+
import { DuckDBManifestStore } from "./DuckDBManifestStore";
|
|
5
|
+
|
|
6
|
+
function makeEntry(overrides: Partial<ManifestEntry> = {}): ManifestEntry {
|
|
7
|
+
return {
|
|
8
|
+
id: "entry-1",
|
|
9
|
+
projectId: "proj-1",
|
|
10
|
+
packageName: "pkg",
|
|
11
|
+
buildId: "build-abc",
|
|
12
|
+
tableName: "my_table",
|
|
13
|
+
sourceName: "my_source",
|
|
14
|
+
connectionName: "duckdb",
|
|
15
|
+
createdAt: new Date("2026-04-03"),
|
|
16
|
+
updatedAt: new Date("2026-04-03"),
|
|
17
|
+
...overrides,
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function createMocks() {
|
|
22
|
+
const sandbox = sinon.createSandbox();
|
|
23
|
+
|
|
24
|
+
const repository = {
|
|
25
|
+
listManifestEntries: sandbox.stub(),
|
|
26
|
+
upsertManifestEntry: sandbox.stub(),
|
|
27
|
+
deleteManifestEntry: sandbox.stub(),
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
const store = new DuckDBManifestStore(
|
|
31
|
+
repository as unknown as ResourceRepository,
|
|
32
|
+
);
|
|
33
|
+
|
|
34
|
+
return { sandbox, repository, store };
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
describe("DuckDBManifestStore", () => {
|
|
38
|
+
let ctx: ReturnType<typeof createMocks>;
|
|
39
|
+
|
|
40
|
+
beforeEach(() => {
|
|
41
|
+
ctx = createMocks();
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
describe("getManifest", () => {
|
|
45
|
+
it("should assemble a BuildManifest from repository entries", async () => {
|
|
46
|
+
ctx.repository.listManifestEntries.resolves([
|
|
47
|
+
makeEntry({ buildId: "b1", tableName: "tbl_a" }),
|
|
48
|
+
makeEntry({ buildId: "b2", tableName: "tbl_b" }),
|
|
49
|
+
]);
|
|
50
|
+
|
|
51
|
+
const manifest = await ctx.store.getManifest("proj-1", "pkg");
|
|
52
|
+
|
|
53
|
+
expect(manifest.strict).toBe(false);
|
|
54
|
+
expect(manifest.entries).toEqual({
|
|
55
|
+
b1: { tableName: "tbl_a" },
|
|
56
|
+
b2: { tableName: "tbl_b" },
|
|
57
|
+
});
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it("should return empty entries when no rows exist", async () => {
|
|
61
|
+
ctx.repository.listManifestEntries.resolves([]);
|
|
62
|
+
|
|
63
|
+
const manifest = await ctx.store.getManifest("proj-1", "pkg");
|
|
64
|
+
|
|
65
|
+
expect(manifest.strict).toBe(false);
|
|
66
|
+
expect(manifest.entries).toEqual({});
|
|
67
|
+
});
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
describe("writeEntry", () => {
|
|
71
|
+
it("should upsert an entry with all fields", async () => {
|
|
72
|
+
ctx.repository.upsertManifestEntry.resolves(makeEntry());
|
|
73
|
+
|
|
74
|
+
await ctx.store.writeEntry(
|
|
75
|
+
"proj-1",
|
|
76
|
+
"pkg",
|
|
77
|
+
"build-abc",
|
|
78
|
+
"tbl",
|
|
79
|
+
"src",
|
|
80
|
+
"conn",
|
|
81
|
+
);
|
|
82
|
+
|
|
83
|
+
expect(ctx.repository.upsertManifestEntry.calledOnce).toBe(true);
|
|
84
|
+
const arg = ctx.repository.upsertManifestEntry.firstCall.args[0];
|
|
85
|
+
expect(arg).toEqual({
|
|
86
|
+
projectId: "proj-1",
|
|
87
|
+
packageName: "pkg",
|
|
88
|
+
buildId: "build-abc",
|
|
89
|
+
tableName: "tbl",
|
|
90
|
+
sourceName: "src",
|
|
91
|
+
connectionName: "conn",
|
|
92
|
+
});
|
|
93
|
+
});
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
describe("deleteEntry", () => {
|
|
97
|
+
it("should delegate to repository", async () => {
|
|
98
|
+
ctx.repository.deleteManifestEntry.resolves();
|
|
99
|
+
|
|
100
|
+
await ctx.store.deleteEntry("entry-1");
|
|
101
|
+
|
|
102
|
+
expect(ctx.repository.deleteManifestEntry.calledOnce).toBe(true);
|
|
103
|
+
expect(ctx.repository.deleteManifestEntry.firstCall.args[0]).toBe(
|
|
104
|
+
"entry-1",
|
|
105
|
+
);
|
|
106
|
+
});
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
describe("listEntries", () => {
|
|
110
|
+
it("should return entries from the repository", async () => {
|
|
111
|
+
const entries = [
|
|
112
|
+
makeEntry(),
|
|
113
|
+
makeEntry({ id: "entry-2", buildId: "build-def" }),
|
|
114
|
+
];
|
|
115
|
+
ctx.repository.listManifestEntries.resolves(entries);
|
|
116
|
+
|
|
117
|
+
const result = await ctx.store.listEntries("proj-1", "pkg");
|
|
118
|
+
|
|
119
|
+
expect(result).toEqual(entries);
|
|
120
|
+
expect(
|
|
121
|
+
ctx.repository.listManifestEntries.calledWith("proj-1", "pkg"),
|
|
122
|
+
).toBe(true);
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
it("should return empty array when no entries exist", async () => {
|
|
126
|
+
ctx.repository.listManifestEntries.resolves([]);
|
|
127
|
+
|
|
128
|
+
const result = await ctx.store.listEntries("proj-1", "pkg");
|
|
129
|
+
|
|
130
|
+
expect(result).toEqual([]);
|
|
131
|
+
});
|
|
132
|
+
});
|
|
133
|
+
});
|
|
@@ -64,6 +64,49 @@ export async function initializeSchema(
|
|
|
64
64
|
)
|
|
65
65
|
`);
|
|
66
66
|
|
|
67
|
+
// Materializations table.
|
|
68
|
+
//
|
|
69
|
+
// `active_key` enforces at-most-one active (PENDING or RUNNING)
|
|
70
|
+
// materialization per (project, package) at the DB layer. It is set to
|
|
71
|
+
// `{project_id}|{package_name}` while the row is active and cleared
|
|
72
|
+
// to NULL on transition to any terminal state. A unique index on
|
|
73
|
+
// `active_key` (see below) makes the insert-then-check race impossible —
|
|
74
|
+
// a second concurrent create fails with a constraint violation, which the
|
|
75
|
+
// service layer translates to `MaterializationConflictError`.
|
|
76
|
+
await db.run(`
|
|
77
|
+
CREATE TABLE IF NOT EXISTS materializations (
|
|
78
|
+
id VARCHAR PRIMARY KEY,
|
|
79
|
+
project_id VARCHAR NOT NULL,
|
|
80
|
+
package_name VARCHAR NOT NULL,
|
|
81
|
+
status VARCHAR NOT NULL,
|
|
82
|
+
active_key VARCHAR,
|
|
83
|
+
started_at TIMESTAMP,
|
|
84
|
+
completed_at TIMESTAMP,
|
|
85
|
+
error TEXT,
|
|
86
|
+
metadata JSON,
|
|
87
|
+
created_at TIMESTAMP NOT NULL,
|
|
88
|
+
updated_at TIMESTAMP NOT NULL,
|
|
89
|
+
FOREIGN KEY (project_id) REFERENCES projects(id)
|
|
90
|
+
)
|
|
91
|
+
`);
|
|
92
|
+
|
|
93
|
+
// Build manifests table
|
|
94
|
+
await db.run(`
|
|
95
|
+
CREATE TABLE IF NOT EXISTS build_manifests (
|
|
96
|
+
id VARCHAR PRIMARY KEY,
|
|
97
|
+
project_id VARCHAR NOT NULL,
|
|
98
|
+
package_name VARCHAR NOT NULL,
|
|
99
|
+
build_id VARCHAR NOT NULL,
|
|
100
|
+
table_name VARCHAR NOT NULL,
|
|
101
|
+
source_name VARCHAR NOT NULL,
|
|
102
|
+
connection_name VARCHAR NOT NULL,
|
|
103
|
+
created_at TIMESTAMP NOT NULL,
|
|
104
|
+
updated_at TIMESTAMP NOT NULL,
|
|
105
|
+
FOREIGN KEY (project_id) REFERENCES projects(id),
|
|
106
|
+
UNIQUE (project_id, package_name, build_id)
|
|
107
|
+
)
|
|
108
|
+
`);
|
|
109
|
+
|
|
67
110
|
// Create indexes for better query performance
|
|
68
111
|
await db.run(
|
|
69
112
|
"CREATE INDEX IF NOT EXISTS idx_packages_project_id ON packages(project_id)",
|
|
@@ -71,10 +114,25 @@ export async function initializeSchema(
|
|
|
71
114
|
await db.run(
|
|
72
115
|
"CREATE INDEX IF NOT EXISTS idx_connections_project_id ON connections(project_id)",
|
|
73
116
|
);
|
|
117
|
+
await db.run(
|
|
118
|
+
"CREATE INDEX IF NOT EXISTS idx_materializations_project_package ON materializations(project_id, package_name)",
|
|
119
|
+
);
|
|
120
|
+
await db.run(
|
|
121
|
+
"CREATE UNIQUE INDEX IF NOT EXISTS idx_materializations_active_key ON materializations(active_key)",
|
|
122
|
+
);
|
|
123
|
+
await db.run(
|
|
124
|
+
"CREATE INDEX IF NOT EXISTS idx_build_manifests_project_package ON build_manifests(project_id, package_name)",
|
|
125
|
+
);
|
|
74
126
|
}
|
|
75
127
|
|
|
76
128
|
async function dropAllTables(db: DuckDBConnection): Promise<void> {
|
|
77
|
-
const tables = [
|
|
129
|
+
const tables = [
|
|
130
|
+
"build_manifests",
|
|
131
|
+
"materializations",
|
|
132
|
+
"packages",
|
|
133
|
+
"connections",
|
|
134
|
+
"projects",
|
|
135
|
+
];
|
|
78
136
|
|
|
79
137
|
logger.info("Dropping tables:", tables.join(", "));
|
|
80
138
|
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import { logger } from "../../logger";
|
|
2
|
+
import {
|
|
3
|
+
BuildManifest,
|
|
4
|
+
ManifestEntry,
|
|
5
|
+
ManifestStore,
|
|
6
|
+
} from "../DatabaseInterface";
|
|
7
|
+
import { DuckDBConnection } from "../duckdb/DuckDBConnection";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* DuckLake-backed ManifestStore used in orchestrated mode.
|
|
11
|
+
*
|
|
12
|
+
* Reads and writes manifest entries through a DuckLake catalog attached to
|
|
13
|
+
* the publisher's internal DuckDB. All workers sharing the same DuckLake
|
|
14
|
+
* catalog see the same manifest, enabling multi-worker coordination.
|
|
15
|
+
*
|
|
16
|
+
* The catalog is attached by {@link StorageManager} before this store is
|
|
17
|
+
* instantiated. The first worker to call {@link bootstrapSchema} creates the
|
|
18
|
+
* `build_manifests` table idempotently. Schema ownership lives in the
|
|
19
|
+
* publisher so that DDL and code co-evolve in the same repo.
|
|
20
|
+
*
|
|
21
|
+
* **Scope: manifest sync only.** Orchestrated mode only shares manifest
|
|
22
|
+
* state across workers. The active-materialization lock (the unique index
|
|
23
|
+
* on `materializations.active_key`) still lives in each worker's local
|
|
24
|
+
* DuckDB, so two workers can run builds concurrently and race on physical
|
|
25
|
+
* table names, staging table names, and manifest writes. Until a shared
|
|
26
|
+
* build lease lives in the DuckLake catalog, deployments running
|
|
27
|
+
* orchestrated mode must ensure builds are externally single-writer (e.g.,
|
|
28
|
+
* one designated build worker, or an external job scheduler). Other
|
|
29
|
+
* workers should only call `manifest?action=reload` to pick up manifests produced
|
|
30
|
+
* by the build worker.
|
|
31
|
+
*/
|
|
32
|
+
export class DuckLakeManifestStore implements ManifestStore {
|
|
33
|
+
private readonly table: string;
|
|
34
|
+
|
|
35
|
+
constructor(
|
|
36
|
+
private db: DuckDBConnection,
|
|
37
|
+
catalogName: string,
|
|
38
|
+
) {
|
|
39
|
+
this.table = `${catalogName}.build_manifests`;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Idempotently creates the `build_manifests` table and indices in the
|
|
44
|
+
* DuckLake catalog. Safe to call from every worker on startup.
|
|
45
|
+
*/
|
|
46
|
+
async bootstrapSchema(): Promise<void> {
|
|
47
|
+
await this.db.run(`
|
|
48
|
+
CREATE TABLE IF NOT EXISTS ${this.table} (
|
|
49
|
+
id VARCHAR,
|
|
50
|
+
project_id VARCHAR NOT NULL,
|
|
51
|
+
package_name VARCHAR NOT NULL,
|
|
52
|
+
build_id VARCHAR NOT NULL,
|
|
53
|
+
table_name VARCHAR NOT NULL,
|
|
54
|
+
source_name VARCHAR NOT NULL,
|
|
55
|
+
connection_name VARCHAR NOT NULL,
|
|
56
|
+
created_at TIMESTAMP NOT NULL,
|
|
57
|
+
updated_at TIMESTAMP NOT NULL
|
|
58
|
+
)
|
|
59
|
+
`);
|
|
60
|
+
logger.info(`DuckLake manifest table bootstrapped: ${this.table}`);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
async getManifest(
|
|
64
|
+
projectId: string,
|
|
65
|
+
packageName: string,
|
|
66
|
+
): Promise<BuildManifest> {
|
|
67
|
+
const rows = await this.db.all<Record<string, unknown>>(
|
|
68
|
+
`SELECT * FROM ${this.table} WHERE project_id = ? AND package_name = ? ORDER BY created_at DESC`,
|
|
69
|
+
[projectId, packageName],
|
|
70
|
+
);
|
|
71
|
+
const manifest: BuildManifest = { entries: {}, strict: false };
|
|
72
|
+
for (const row of rows) {
|
|
73
|
+
const buildId = row.build_id as string;
|
|
74
|
+
// Rows are ordered newest-first; keep only the latest per build_id
|
|
75
|
+
// to handle rare duplicates from cross-worker races.
|
|
76
|
+
if (!manifest.entries[buildId]) {
|
|
77
|
+
manifest.entries[buildId] = {
|
|
78
|
+
tableName: row.table_name as string,
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return manifest;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Insert a manifest entry. If a row with the same `build_id` already
|
|
87
|
+
* exists (retry after crash), the duplicate is harmless:
|
|
88
|
+
* {@link getManifest} deduplicates by build_id keeping the newest row.
|
|
89
|
+
*/
|
|
90
|
+
async writeEntry(
|
|
91
|
+
projectId: string,
|
|
92
|
+
packageName: string,
|
|
93
|
+
buildId: string,
|
|
94
|
+
tableName: string,
|
|
95
|
+
sourceName: string,
|
|
96
|
+
connectionName: string,
|
|
97
|
+
): Promise<void> {
|
|
98
|
+
const now = new Date().toISOString();
|
|
99
|
+
const id = `${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
|
|
100
|
+
|
|
101
|
+
await this.db.run(
|
|
102
|
+
`INSERT INTO ${this.table} (id, project_id, package_name, build_id, table_name, source_name, connection_name, created_at, updated_at)
|
|
103
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
104
|
+
[
|
|
105
|
+
id,
|
|
106
|
+
projectId,
|
|
107
|
+
packageName,
|
|
108
|
+
buildId,
|
|
109
|
+
tableName,
|
|
110
|
+
sourceName,
|
|
111
|
+
connectionName,
|
|
112
|
+
now,
|
|
113
|
+
now,
|
|
114
|
+
],
|
|
115
|
+
);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
async deleteEntry(id: string): Promise<void> {
|
|
119
|
+
await this.db.run(`DELETE FROM ${this.table} WHERE id = ?`, [id]);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
async listEntries(
|
|
123
|
+
projectId: string,
|
|
124
|
+
packageName: string,
|
|
125
|
+
): Promise<ManifestEntry[]> {
|
|
126
|
+
const rows = await this.db.all<Record<string, unknown>>(
|
|
127
|
+
`SELECT * FROM ${this.table} WHERE project_id = ? AND package_name = ? ORDER BY created_at DESC`,
|
|
128
|
+
[projectId, packageName],
|
|
129
|
+
);
|
|
130
|
+
return rows.map(this.mapToEntry);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
private mapToEntry(row: Record<string, unknown>): ManifestEntry {
|
|
134
|
+
return {
|
|
135
|
+
id: row.id as string,
|
|
136
|
+
projectId: row.project_id as string,
|
|
137
|
+
packageName: row.package_name as string,
|
|
138
|
+
buildId: row.build_id as string,
|
|
139
|
+
tableName: row.table_name as string,
|
|
140
|
+
sourceName: row.source_name as string,
|
|
141
|
+
connectionName: row.connection_name as string,
|
|
142
|
+
createdAt: new Date(row.created_at as string),
|
|
143
|
+
updatedAt: new Date(row.updated_at as string),
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
##! experimental.persistence
|
|
2
|
+
|
|
3
|
+
source: raw_orders is duckdb.table('data/orders.csv')
|
|
4
|
+
|
|
5
|
+
#@ persist name="order_summary"
|
|
6
|
+
source: order_summary is raw_orders -> {
|
|
7
|
+
group_by: category
|
|
8
|
+
aggregate:
|
|
9
|
+
total_orders is count()
|
|
10
|
+
total_revenue is amount.sum()
|
|
11
|
+
}
|