@malloy-publisher/server 0.0.188 → 0.0.382-dev
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/app/api-doc.yaml +423 -60
- package/dist/app/assets/{HomePage-DsuUvSI_.js → HomePage-Dn3E4CuB.js} +1 -1
- package/dist/app/assets/{MainPage-DHWFkEN6.js → MainPage-BzB3yoqi.js} +1 -1
- package/dist/app/assets/{ModelPage-DNwcx1nE.js → ModelPage-C9O_sAXT.js} +1 -1
- package/dist/app/assets/{PackagePage-DSgz9G2V.js → PackagePage-DcxKEjBX.js} +1 -1
- package/dist/app/assets/{ProjectPage-CSdPosLV.js → ProjectPage-BDj307rF.js} +1 -1
- package/dist/app/assets/{RouteError-orw1RX8q.js → RouteError-DAShbVCG.js} +1 -1
- package/dist/app/assets/{WorkbookPage-Bp-BpGjL.js → WorkbookPage-Cs_XYEaB.js} +1 -1
- package/dist/app/assets/{core-B4ZYB7aS.es-8Zh0TkSr.js → core-CjeTkq8O.es-BqRc6yhC.js} +1 -1
- package/dist/app/assets/{index-BL2TJgTw.js → index-15BOvhp0.js} +4 -4
- package/dist/app/assets/{index-BWJkzsfl.js → index-Bb2jqquW.js} +1 -1
- package/dist/app/assets/{index-BefdHHMa.js → index-D68X76-7.js} +1 -1
- package/dist/app/assets/{index.umd-lY-87l4L.js → index.umd-DGBekgSu.js} +1 -1
- package/dist/app/index.html +1 -1
- package/dist/instrumentation.js +98 -77
- package/dist/server.js +1834 -450
- package/package.json +5 -3
- package/src/controller/connection.controller.ts +27 -20
- package/src/controller/manifest.controller.ts +29 -0
- package/src/controller/materialization.controller.ts +125 -0
- package/src/controller/model.controller.ts +0 -2
- package/src/controller/package.controller.ts +53 -2
- package/src/errors.ts +24 -0
- package/src/server.ts +196 -5
- package/src/service/manifest_service.spec.ts +201 -0
- package/src/service/manifest_service.ts +106 -0
- package/src/service/materialization_service.spec.ts +648 -0
- package/src/service/materialization_service.ts +929 -0
- package/src/service/materialized_table_gc.spec.ts +383 -0
- package/src/service/materialized_table_gc.ts +279 -0
- package/src/service/model.ts +25 -4
- package/src/service/package.ts +50 -0
- package/src/service/project_store.ts +21 -2
- package/src/service/quoting.ts +41 -0
- package/src/service/resolve_project.ts +13 -0
- package/src/storage/DatabaseInterface.ts +103 -1
- package/src/storage/{StorageManager.spec.ts → StorageManager.mock.ts} +9 -0
- package/src/storage/StorageManager.ts +119 -1
- package/src/storage/duckdb/DuckDBManifestStore.ts +70 -0
- package/src/storage/duckdb/DuckDBRepository.ts +99 -9
- package/src/storage/duckdb/ManifestRepository.ts +119 -0
- package/src/storage/duckdb/MaterializationRepository.ts +249 -0
- package/src/storage/duckdb/manifest_store.spec.ts +133 -0
- package/src/storage/duckdb/schema.ts +59 -1
- package/src/storage/ducklake/DuckLakeManifestStore.ts +146 -0
- package/tests/fixtures/persist-test/data/orders.csv +5 -0
- package/tests/fixtures/persist-test/persist_test.malloy +11 -0
- package/tests/fixtures/persist-test/publisher.json +5 -0
- package/tests/fixtures/publisher.config.json +15 -0
- package/tests/harness/rest_e2e.ts +68 -0
- package/tests/integration/materialization/materialization_lifecycle.integration.spec.ts +470 -0
- package/tests/integration/mcp/mcp_execute_query_tool.integration.spec.ts +2 -2
|
@@ -64,6 +64,49 @@ export async function initializeSchema(
|
|
|
64
64
|
)
|
|
65
65
|
`);
|
|
66
66
|
|
|
67
|
+
// Materializations table.
|
|
68
|
+
//
|
|
69
|
+
// `active_key` enforces at-most-one active (PENDING or RUNNING)
|
|
70
|
+
// materialization per (project, package) at the DB layer. It is set to
|
|
71
|
+
// `{project_id}|{package_name}` while the row is active and cleared
|
|
72
|
+
// to NULL on transition to any terminal state. A unique index on
|
|
73
|
+
// `active_key` (see below) makes the insert-then-check race impossible —
|
|
74
|
+
// a second concurrent create fails with a constraint violation, which the
|
|
75
|
+
// service layer translates to `MaterializationConflictError`.
|
|
76
|
+
await db.run(`
|
|
77
|
+
CREATE TABLE IF NOT EXISTS materializations (
|
|
78
|
+
id VARCHAR PRIMARY KEY,
|
|
79
|
+
project_id VARCHAR NOT NULL,
|
|
80
|
+
package_name VARCHAR NOT NULL,
|
|
81
|
+
status VARCHAR NOT NULL,
|
|
82
|
+
active_key VARCHAR,
|
|
83
|
+
started_at TIMESTAMP,
|
|
84
|
+
completed_at TIMESTAMP,
|
|
85
|
+
error TEXT,
|
|
86
|
+
metadata JSON,
|
|
87
|
+
created_at TIMESTAMP NOT NULL,
|
|
88
|
+
updated_at TIMESTAMP NOT NULL,
|
|
89
|
+
FOREIGN KEY (project_id) REFERENCES projects(id)
|
|
90
|
+
)
|
|
91
|
+
`);
|
|
92
|
+
|
|
93
|
+
// Build manifests table
|
|
94
|
+
await db.run(`
|
|
95
|
+
CREATE TABLE IF NOT EXISTS build_manifests (
|
|
96
|
+
id VARCHAR PRIMARY KEY,
|
|
97
|
+
project_id VARCHAR NOT NULL,
|
|
98
|
+
package_name VARCHAR NOT NULL,
|
|
99
|
+
build_id VARCHAR NOT NULL,
|
|
100
|
+
table_name VARCHAR NOT NULL,
|
|
101
|
+
source_name VARCHAR NOT NULL,
|
|
102
|
+
connection_name VARCHAR NOT NULL,
|
|
103
|
+
created_at TIMESTAMP NOT NULL,
|
|
104
|
+
updated_at TIMESTAMP NOT NULL,
|
|
105
|
+
FOREIGN KEY (project_id) REFERENCES projects(id),
|
|
106
|
+
UNIQUE (project_id, package_name, build_id)
|
|
107
|
+
)
|
|
108
|
+
`);
|
|
109
|
+
|
|
67
110
|
// Create indexes for better query performance
|
|
68
111
|
await db.run(
|
|
69
112
|
"CREATE INDEX IF NOT EXISTS idx_packages_project_id ON packages(project_id)",
|
|
@@ -71,10 +114,25 @@ export async function initializeSchema(
|
|
|
71
114
|
await db.run(
|
|
72
115
|
"CREATE INDEX IF NOT EXISTS idx_connections_project_id ON connections(project_id)",
|
|
73
116
|
);
|
|
117
|
+
await db.run(
|
|
118
|
+
"CREATE INDEX IF NOT EXISTS idx_materializations_project_package ON materializations(project_id, package_name)",
|
|
119
|
+
);
|
|
120
|
+
await db.run(
|
|
121
|
+
"CREATE UNIQUE INDEX IF NOT EXISTS idx_materializations_active_key ON materializations(active_key)",
|
|
122
|
+
);
|
|
123
|
+
await db.run(
|
|
124
|
+
"CREATE INDEX IF NOT EXISTS idx_build_manifests_project_package ON build_manifests(project_id, package_name)",
|
|
125
|
+
);
|
|
74
126
|
}
|
|
75
127
|
|
|
76
128
|
async function dropAllTables(db: DuckDBConnection): Promise<void> {
|
|
77
|
-
const tables = [
|
|
129
|
+
const tables = [
|
|
130
|
+
"build_manifests",
|
|
131
|
+
"materializations",
|
|
132
|
+
"packages",
|
|
133
|
+
"connections",
|
|
134
|
+
"projects",
|
|
135
|
+
];
|
|
78
136
|
|
|
79
137
|
logger.info("Dropping tables:", tables.join(", "));
|
|
80
138
|
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import { logger } from "../../logger";
|
|
2
|
+
import {
|
|
3
|
+
BuildManifest,
|
|
4
|
+
ManifestEntry,
|
|
5
|
+
ManifestStore,
|
|
6
|
+
} from "../DatabaseInterface";
|
|
7
|
+
import { DuckDBConnection } from "../duckdb/DuckDBConnection";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* DuckLake-backed ManifestStore used in orchestrated mode.
|
|
11
|
+
*
|
|
12
|
+
* Reads and writes manifest entries through a DuckLake catalog attached to
|
|
13
|
+
* the publisher's internal DuckDB. All workers sharing the same DuckLake
|
|
14
|
+
* catalog see the same manifest, enabling multi-worker coordination.
|
|
15
|
+
*
|
|
16
|
+
* The catalog is attached by {@link StorageManager} before this store is
|
|
17
|
+
* instantiated. The first worker to call {@link bootstrapSchema} creates the
|
|
18
|
+
* `build_manifests` table idempotently. Schema ownership lives in the
|
|
19
|
+
* publisher so that DDL and code co-evolve in the same repo.
|
|
20
|
+
*
|
|
21
|
+
* **Scope: manifest sync only.** Orchestrated mode only shares manifest
|
|
22
|
+
* state across workers. The active-materialization lock (the unique index
|
|
23
|
+
* on `materializations.active_key`) still lives in each worker's local
|
|
24
|
+
* DuckDB, so two workers can run builds concurrently and race on physical
|
|
25
|
+
* table names, staging table names, and manifest writes. Until a shared
|
|
26
|
+
* build lease lives in the DuckLake catalog, deployments running
|
|
27
|
+
* orchestrated mode must ensure builds are externally single-writer (e.g.,
|
|
28
|
+
* one designated build worker, or an external job scheduler). Other
|
|
29
|
+
* workers should only call `manifest?action=reload` to pick up manifests produced
|
|
30
|
+
* by the build worker.
|
|
31
|
+
*/
|
|
32
|
+
export class DuckLakeManifestStore implements ManifestStore {
|
|
33
|
+
private readonly table: string;
|
|
34
|
+
|
|
35
|
+
constructor(
|
|
36
|
+
private db: DuckDBConnection,
|
|
37
|
+
catalogName: string,
|
|
38
|
+
) {
|
|
39
|
+
this.table = `${catalogName}.build_manifests`;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Idempotently creates the `build_manifests` table and indices in the
|
|
44
|
+
* DuckLake catalog. Safe to call from every worker on startup.
|
|
45
|
+
*/
|
|
46
|
+
async bootstrapSchema(): Promise<void> {
|
|
47
|
+
await this.db.run(`
|
|
48
|
+
CREATE TABLE IF NOT EXISTS ${this.table} (
|
|
49
|
+
id VARCHAR,
|
|
50
|
+
project_id VARCHAR NOT NULL,
|
|
51
|
+
package_name VARCHAR NOT NULL,
|
|
52
|
+
build_id VARCHAR NOT NULL,
|
|
53
|
+
table_name VARCHAR NOT NULL,
|
|
54
|
+
source_name VARCHAR NOT NULL,
|
|
55
|
+
connection_name VARCHAR NOT NULL,
|
|
56
|
+
created_at TIMESTAMP NOT NULL,
|
|
57
|
+
updated_at TIMESTAMP NOT NULL
|
|
58
|
+
)
|
|
59
|
+
`);
|
|
60
|
+
logger.info(`DuckLake manifest table bootstrapped: ${this.table}`);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
async getManifest(
|
|
64
|
+
projectId: string,
|
|
65
|
+
packageName: string,
|
|
66
|
+
): Promise<BuildManifest> {
|
|
67
|
+
const rows = await this.db.all<Record<string, unknown>>(
|
|
68
|
+
`SELECT * FROM ${this.table} WHERE project_id = ? AND package_name = ? ORDER BY created_at DESC`,
|
|
69
|
+
[projectId, packageName],
|
|
70
|
+
);
|
|
71
|
+
const manifest: BuildManifest = { entries: {}, strict: false };
|
|
72
|
+
for (const row of rows) {
|
|
73
|
+
const buildId = row.build_id as string;
|
|
74
|
+
// Rows are ordered newest-first; keep only the latest per build_id
|
|
75
|
+
// to handle rare duplicates from cross-worker races.
|
|
76
|
+
if (!manifest.entries[buildId]) {
|
|
77
|
+
manifest.entries[buildId] = {
|
|
78
|
+
tableName: row.table_name as string,
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return manifest;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Insert a manifest entry. If a row with the same `build_id` already
|
|
87
|
+
* exists (retry after crash), the duplicate is harmless:
|
|
88
|
+
* {@link getManifest} deduplicates by build_id keeping the newest row.
|
|
89
|
+
*/
|
|
90
|
+
async writeEntry(
|
|
91
|
+
projectId: string,
|
|
92
|
+
packageName: string,
|
|
93
|
+
buildId: string,
|
|
94
|
+
tableName: string,
|
|
95
|
+
sourceName: string,
|
|
96
|
+
connectionName: string,
|
|
97
|
+
): Promise<void> {
|
|
98
|
+
const now = new Date().toISOString();
|
|
99
|
+
const id = `${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
|
|
100
|
+
|
|
101
|
+
await this.db.run(
|
|
102
|
+
`INSERT INTO ${this.table} (id, project_id, package_name, build_id, table_name, source_name, connection_name, created_at, updated_at)
|
|
103
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
104
|
+
[
|
|
105
|
+
id,
|
|
106
|
+
projectId,
|
|
107
|
+
packageName,
|
|
108
|
+
buildId,
|
|
109
|
+
tableName,
|
|
110
|
+
sourceName,
|
|
111
|
+
connectionName,
|
|
112
|
+
now,
|
|
113
|
+
now,
|
|
114
|
+
],
|
|
115
|
+
);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
async deleteEntry(id: string): Promise<void> {
|
|
119
|
+
await this.db.run(`DELETE FROM ${this.table} WHERE id = ?`, [id]);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
async listEntries(
|
|
123
|
+
projectId: string,
|
|
124
|
+
packageName: string,
|
|
125
|
+
): Promise<ManifestEntry[]> {
|
|
126
|
+
const rows = await this.db.all<Record<string, unknown>>(
|
|
127
|
+
`SELECT * FROM ${this.table} WHERE project_id = ? AND package_name = ? ORDER BY created_at DESC`,
|
|
128
|
+
[projectId, packageName],
|
|
129
|
+
);
|
|
130
|
+
return rows.map(this.mapToEntry);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
private mapToEntry(row: Record<string, unknown>): ManifestEntry {
|
|
134
|
+
return {
|
|
135
|
+
id: row.id as string,
|
|
136
|
+
projectId: row.project_id as string,
|
|
137
|
+
packageName: row.package_name as string,
|
|
138
|
+
buildId: row.build_id as string,
|
|
139
|
+
tableName: row.table_name as string,
|
|
140
|
+
sourceName: row.source_name as string,
|
|
141
|
+
connectionName: row.connection_name as string,
|
|
142
|
+
createdAt: new Date(row.created_at as string),
|
|
143
|
+
updatedAt: new Date(row.updated_at as string),
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
##! experimental.persistence
|
|
2
|
+
|
|
3
|
+
source: raw_orders is duckdb.table('data/orders.csv')
|
|
4
|
+
|
|
5
|
+
#@ persist name="order_summary"
|
|
6
|
+
source: order_summary is raw_orders -> {
|
|
7
|
+
group_by: category
|
|
8
|
+
aggregate:
|
|
9
|
+
total_orders is count()
|
|
10
|
+
total_revenue is amount.sum()
|
|
11
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import http from "http";
|
|
2
|
+
|
|
3
|
+
export interface RestE2EEnv {
|
|
4
|
+
httpServer: http.Server;
|
|
5
|
+
baseUrl: string;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Spin up an HTTP server wrapping the real Express REST app.
|
|
10
|
+
*
|
|
11
|
+
* Works regardless of which test file first imported server.ts —
|
|
12
|
+
* reuses the cached Express app and binds on an OS-assigned port
|
|
13
|
+
* to avoid collisions.
|
|
14
|
+
*
|
|
15
|
+
* Callers are responsible for creating any test-specific projects
|
|
16
|
+
* via the REST API (POST /api/v0/projects) and cleaning them up.
|
|
17
|
+
*/
|
|
18
|
+
export async function startRestE2E(): Promise<
|
|
19
|
+
RestE2EEnv & { stop(): Promise<void> }
|
|
20
|
+
> {
|
|
21
|
+
const { app } = await import("../../src/server");
|
|
22
|
+
|
|
23
|
+
const httpServer: http.Server = await new Promise<http.Server>(
|
|
24
|
+
(resolve, reject) => {
|
|
25
|
+
const srv = http
|
|
26
|
+
.createServer(app)
|
|
27
|
+
.listen(0, "127.0.0.1", () => resolve(srv));
|
|
28
|
+
srv.on("error", (err: NodeJS.ErrnoException) => {
|
|
29
|
+
console.error("[REST E2E] server listen error", err);
|
|
30
|
+
reject(err);
|
|
31
|
+
});
|
|
32
|
+
},
|
|
33
|
+
);
|
|
34
|
+
|
|
35
|
+
const addr = httpServer.address() as { port: number };
|
|
36
|
+
const baseUrl = `http://127.0.0.1:${addr.port}`;
|
|
37
|
+
|
|
38
|
+
const maxWait = 180_000;
|
|
39
|
+
const start = Date.now();
|
|
40
|
+
let ready = false;
|
|
41
|
+
while (!ready && Date.now() - start < maxWait) {
|
|
42
|
+
try {
|
|
43
|
+
const res = await fetch(`${baseUrl}/health/readiness`);
|
|
44
|
+
if (res.ok) {
|
|
45
|
+
const data = (await res.json()) as { status: string };
|
|
46
|
+
if (data.status === "UP") {
|
|
47
|
+
ready = true;
|
|
48
|
+
break;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
} catch {
|
|
52
|
+
// server not ready yet
|
|
53
|
+
}
|
|
54
|
+
await new Promise((r) => setTimeout(r, 500));
|
|
55
|
+
}
|
|
56
|
+
if (!ready) {
|
|
57
|
+
httpServer.closeAllConnections?.();
|
|
58
|
+
await new Promise<void>((r) => httpServer.close(() => r()));
|
|
59
|
+
throw new Error("REST E2E server did not become ready in time");
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const stop = async (): Promise<void> => {
|
|
63
|
+
httpServer.closeAllConnections?.();
|
|
64
|
+
await new Promise<void>((r) => httpServer.close(() => r()));
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
return { httpServer, baseUrl, stop };
|
|
68
|
+
}
|