@malloy-publisher/server 0.0.188 → 0.0.382-dev

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/dist/app/api-doc.yaml +423 -60
  2. package/dist/app/assets/{HomePage-DsuUvSI_.js → HomePage-Dn3E4CuB.js} +1 -1
  3. package/dist/app/assets/{MainPage-DHWFkEN6.js → MainPage-BzB3yoqi.js} +1 -1
  4. package/dist/app/assets/{ModelPage-DNwcx1nE.js → ModelPage-C9O_sAXT.js} +1 -1
  5. package/dist/app/assets/{PackagePage-DSgz9G2V.js → PackagePage-DcxKEjBX.js} +1 -1
  6. package/dist/app/assets/{ProjectPage-CSdPosLV.js → ProjectPage-BDj307rF.js} +1 -1
  7. package/dist/app/assets/{RouteError-orw1RX8q.js → RouteError-DAShbVCG.js} +1 -1
  8. package/dist/app/assets/{WorkbookPage-Bp-BpGjL.js → WorkbookPage-Cs_XYEaB.js} +1 -1
  9. package/dist/app/assets/{core-B4ZYB7aS.es-8Zh0TkSr.js → core-CjeTkq8O.es-BqRc6yhC.js} +1 -1
  10. package/dist/app/assets/{index-BL2TJgTw.js → index-15BOvhp0.js} +4 -4
  11. package/dist/app/assets/{index-BWJkzsfl.js → index-Bb2jqquW.js} +1 -1
  12. package/dist/app/assets/{index-BefdHHMa.js → index-D68X76-7.js} +1 -1
  13. package/dist/app/assets/{index.umd-lY-87l4L.js → index.umd-DGBekgSu.js} +1 -1
  14. package/dist/app/index.html +1 -1
  15. package/dist/instrumentation.js +98 -77
  16. package/dist/server.js +1834 -450
  17. package/package.json +5 -3
  18. package/src/controller/connection.controller.ts +27 -20
  19. package/src/controller/manifest.controller.ts +29 -0
  20. package/src/controller/materialization.controller.ts +125 -0
  21. package/src/controller/model.controller.ts +0 -2
  22. package/src/controller/package.controller.ts +53 -2
  23. package/src/errors.ts +24 -0
  24. package/src/server.ts +196 -5
  25. package/src/service/manifest_service.spec.ts +201 -0
  26. package/src/service/manifest_service.ts +106 -0
  27. package/src/service/materialization_service.spec.ts +648 -0
  28. package/src/service/materialization_service.ts +929 -0
  29. package/src/service/materialized_table_gc.spec.ts +383 -0
  30. package/src/service/materialized_table_gc.ts +279 -0
  31. package/src/service/model.ts +25 -4
  32. package/src/service/package.ts +50 -0
  33. package/src/service/project_store.ts +21 -2
  34. package/src/service/quoting.ts +41 -0
  35. package/src/service/resolve_project.ts +13 -0
  36. package/src/storage/DatabaseInterface.ts +103 -1
  37. package/src/storage/{StorageManager.spec.ts → StorageManager.mock.ts} +9 -0
  38. package/src/storage/StorageManager.ts +119 -1
  39. package/src/storage/duckdb/DuckDBManifestStore.ts +70 -0
  40. package/src/storage/duckdb/DuckDBRepository.ts +99 -9
  41. package/src/storage/duckdb/ManifestRepository.ts +119 -0
  42. package/src/storage/duckdb/MaterializationRepository.ts +249 -0
  43. package/src/storage/duckdb/manifest_store.spec.ts +133 -0
  44. package/src/storage/duckdb/schema.ts +59 -1
  45. package/src/storage/ducklake/DuckLakeManifestStore.ts +146 -0
  46. package/tests/fixtures/persist-test/data/orders.csv +5 -0
  47. package/tests/fixtures/persist-test/persist_test.malloy +11 -0
  48. package/tests/fixtures/persist-test/publisher.json +5 -0
  49. package/tests/fixtures/publisher.config.json +15 -0
  50. package/tests/harness/rest_e2e.ts +68 -0
  51. package/tests/integration/materialization/materialization_lifecycle.integration.spec.ts +470 -0
  52. package/tests/integration/mcp/mcp_execute_query_tool.integration.spec.ts +2 -2
@@ -64,6 +64,49 @@ export async function initializeSchema(
64
64
  )
65
65
  `);
66
66
 
67
+ // Materializations table.
68
+ //
69
+ // `active_key` enforces at-most-one active (PENDING or RUNNING)
70
+ // materialization per (project, package) at the DB layer. It is set to
71
+ // `{project_id}|{package_name}` while the row is active and cleared
72
+ // to NULL on transition to any terminal state. A unique index on
73
+ // `active_key` (see below) makes the insert-then-check race impossible —
74
+ // a second concurrent create fails with a constraint violation, which the
75
+ // service layer translates to `MaterializationConflictError`.
76
+ await db.run(`
77
+ CREATE TABLE IF NOT EXISTS materializations (
78
+ id VARCHAR PRIMARY KEY,
79
+ project_id VARCHAR NOT NULL,
80
+ package_name VARCHAR NOT NULL,
81
+ status VARCHAR NOT NULL,
82
+ active_key VARCHAR,
83
+ started_at TIMESTAMP,
84
+ completed_at TIMESTAMP,
85
+ error TEXT,
86
+ metadata JSON,
87
+ created_at TIMESTAMP NOT NULL,
88
+ updated_at TIMESTAMP NOT NULL,
89
+ FOREIGN KEY (project_id) REFERENCES projects(id)
90
+ )
91
+ `);
92
+
93
+ // Build manifests table
94
+ await db.run(`
95
+ CREATE TABLE IF NOT EXISTS build_manifests (
96
+ id VARCHAR PRIMARY KEY,
97
+ project_id VARCHAR NOT NULL,
98
+ package_name VARCHAR NOT NULL,
99
+ build_id VARCHAR NOT NULL,
100
+ table_name VARCHAR NOT NULL,
101
+ source_name VARCHAR NOT NULL,
102
+ connection_name VARCHAR NOT NULL,
103
+ created_at TIMESTAMP NOT NULL,
104
+ updated_at TIMESTAMP NOT NULL,
105
+ FOREIGN KEY (project_id) REFERENCES projects(id),
106
+ UNIQUE (project_id, package_name, build_id)
107
+ )
108
+ `);
109
+
67
110
  // Create indexes for better query performance
68
111
  await db.run(
69
112
  "CREATE INDEX IF NOT EXISTS idx_packages_project_id ON packages(project_id)",
@@ -71,10 +114,25 @@ export async function initializeSchema(
71
114
  await db.run(
72
115
  "CREATE INDEX IF NOT EXISTS idx_connections_project_id ON connections(project_id)",
73
116
  );
117
+ await db.run(
118
+ "CREATE INDEX IF NOT EXISTS idx_materializations_project_package ON materializations(project_id, package_name)",
119
+ );
120
+ await db.run(
121
+ "CREATE UNIQUE INDEX IF NOT EXISTS idx_materializations_active_key ON materializations(active_key)",
122
+ );
123
+ await db.run(
124
+ "CREATE INDEX IF NOT EXISTS idx_build_manifests_project_package ON build_manifests(project_id, package_name)",
125
+ );
74
126
  }
75
127
 
76
128
  async function dropAllTables(db: DuckDBConnection): Promise<void> {
77
- const tables = ["packages", "connections", "projects"];
129
+ const tables = [
130
+ "build_manifests",
131
+ "materializations",
132
+ "packages",
133
+ "connections",
134
+ "projects",
135
+ ];
78
136
 
79
137
  logger.info("Dropping tables:", tables.join(", "));
80
138
 
@@ -0,0 +1,146 @@
1
+ import { logger } from "../../logger";
2
+ import {
3
+ BuildManifest,
4
+ ManifestEntry,
5
+ ManifestStore,
6
+ } from "../DatabaseInterface";
7
+ import { DuckDBConnection } from "../duckdb/DuckDBConnection";
8
+
9
+ /**
10
+ * DuckLake-backed ManifestStore used in orchestrated mode.
11
+ *
12
+ * Reads and writes manifest entries through a DuckLake catalog attached to
13
+ * the publisher's internal DuckDB. All workers sharing the same DuckLake
14
+ * catalog see the same manifest, enabling multi-worker coordination.
15
+ *
16
+ * The catalog is attached by {@link StorageManager} before this store is
17
+ * instantiated. The first worker to call {@link bootstrapSchema} creates the
18
+ * `build_manifests` table idempotently. Schema ownership lives in the
19
+ * publisher so that DDL and code co-evolve in the same repo.
20
+ *
21
+ * **Scope: manifest sync only.** Orchestrated mode only shares manifest
22
+ * state across workers. The active-materialization lock (the unique index
23
+ * on `materializations.active_key`) still lives in each worker's local
24
+ * DuckDB, so two workers can run builds concurrently and race on physical
25
+ * table names, staging table names, and manifest writes. Until a shared
26
+ * build lease lives in the DuckLake catalog, deployments running
27
+ * orchestrated mode must ensure builds are externally single-writer (e.g.,
28
+ * one designated build worker, or an external job scheduler). Other
29
+ * workers should only call `manifest?action=reload` to pick up manifests produced
30
+ * by the build worker.
31
+ */
32
+ export class DuckLakeManifestStore implements ManifestStore {
33
+ private readonly table: string;
34
+
35
+ constructor(
36
+ private db: DuckDBConnection,
37
+ catalogName: string,
38
+ ) {
39
+ this.table = `${catalogName}.build_manifests`;
40
+ }
41
+
42
+ /**
43
+ * Idempotently creates the `build_manifests` table and indices in the
44
+ * DuckLake catalog. Safe to call from every worker on startup.
45
+ */
46
+ async bootstrapSchema(): Promise<void> {
47
+ await this.db.run(`
48
+ CREATE TABLE IF NOT EXISTS ${this.table} (
49
+ id VARCHAR,
50
+ project_id VARCHAR NOT NULL,
51
+ package_name VARCHAR NOT NULL,
52
+ build_id VARCHAR NOT NULL,
53
+ table_name VARCHAR NOT NULL,
54
+ source_name VARCHAR NOT NULL,
55
+ connection_name VARCHAR NOT NULL,
56
+ created_at TIMESTAMP NOT NULL,
57
+ updated_at TIMESTAMP NOT NULL
58
+ )
59
+ `);
60
+ logger.info(`DuckLake manifest table bootstrapped: ${this.table}`);
61
+ }
62
+
63
+ async getManifest(
64
+ projectId: string,
65
+ packageName: string,
66
+ ): Promise<BuildManifest> {
67
+ const rows = await this.db.all<Record<string, unknown>>(
68
+ `SELECT * FROM ${this.table} WHERE project_id = ? AND package_name = ? ORDER BY created_at DESC`,
69
+ [projectId, packageName],
70
+ );
71
+ const manifest: BuildManifest = { entries: {}, strict: false };
72
+ for (const row of rows) {
73
+ const buildId = row.build_id as string;
74
+ // Rows are ordered newest-first; keep only the latest per build_id
75
+ // to handle rare duplicates from cross-worker races.
76
+ if (!manifest.entries[buildId]) {
77
+ manifest.entries[buildId] = {
78
+ tableName: row.table_name as string,
79
+ };
80
+ }
81
+ }
82
+ return manifest;
83
+ }
84
+
85
+ /**
86
+ * Insert a manifest entry. If a row with the same `build_id` already
87
+ * exists (retry after crash), the duplicate is harmless:
88
+ * {@link getManifest} deduplicates by build_id keeping the newest row.
89
+ */
90
+ async writeEntry(
91
+ projectId: string,
92
+ packageName: string,
93
+ buildId: string,
94
+ tableName: string,
95
+ sourceName: string,
96
+ connectionName: string,
97
+ ): Promise<void> {
98
+ const now = new Date().toISOString();
99
+ const id = `${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
100
+
101
+ await this.db.run(
102
+ `INSERT INTO ${this.table} (id, project_id, package_name, build_id, table_name, source_name, connection_name, created_at, updated_at)
103
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
104
+ [
105
+ id,
106
+ projectId,
107
+ packageName,
108
+ buildId,
109
+ tableName,
110
+ sourceName,
111
+ connectionName,
112
+ now,
113
+ now,
114
+ ],
115
+ );
116
+ }
117
+
118
+ async deleteEntry(id: string): Promise<void> {
119
+ await this.db.run(`DELETE FROM ${this.table} WHERE id = ?`, [id]);
120
+ }
121
+
122
+ async listEntries(
123
+ projectId: string,
124
+ packageName: string,
125
+ ): Promise<ManifestEntry[]> {
126
+ const rows = await this.db.all<Record<string, unknown>>(
127
+ `SELECT * FROM ${this.table} WHERE project_id = ? AND package_name = ? ORDER BY created_at DESC`,
128
+ [projectId, packageName],
129
+ );
130
+ return rows.map(this.mapToEntry);
131
+ }
132
+
133
+ private mapToEntry(row: Record<string, unknown>): ManifestEntry {
134
+ return {
135
+ id: row.id as string,
136
+ projectId: row.project_id as string,
137
+ packageName: row.package_name as string,
138
+ buildId: row.build_id as string,
139
+ tableName: row.table_name as string,
140
+ sourceName: row.source_name as string,
141
+ connectionName: row.connection_name as string,
142
+ createdAt: new Date(row.created_at as string),
143
+ updatedAt: new Date(row.updated_at as string),
144
+ };
145
+ }
146
+ }
@@ -0,0 +1,5 @@
1
+ category,amount
2
+ electronics,100
3
+ clothing,50
4
+ electronics,200
5
+ clothing,75
@@ -0,0 +1,11 @@
1
+ ##! experimental.persistence
2
+
3
+ source: raw_orders is duckdb.table('data/orders.csv')
4
+
5
+ #@ persist name="order_summary"
6
+ source: order_summary is raw_orders -> {
7
+ group_by: category
8
+ aggregate:
9
+ total_orders is count()
10
+ total_revenue is amount.sum()
11
+ }
@@ -0,0 +1,5 @@
1
+ {
2
+ "name": "persist-test",
3
+ "version": "1.0.0",
4
+ "description": "DuckDB persist test fixture"
5
+ }
@@ -0,0 +1,15 @@
1
+ {
2
+ "frozenConfig": false,
3
+ "projects": [
4
+ {
5
+ "name": "test-project",
6
+ "packages": [
7
+ {
8
+ "name": "persist-test",
9
+ "location": "./persist-test"
10
+ }
11
+ ],
12
+ "connections": []
13
+ }
14
+ ]
15
+ }
@@ -0,0 +1,68 @@
1
+ import http from "http";
2
+
3
+ export interface RestE2EEnv {
4
+ httpServer: http.Server;
5
+ baseUrl: string;
6
+ }
7
+
8
+ /**
9
+ * Spin up an HTTP server wrapping the real Express REST app.
10
+ *
11
+ * Works regardless of which test file first imported server.ts —
12
+ * reuses the cached Express app and binds on an OS-assigned port
13
+ * to avoid collisions.
14
+ *
15
+ * Callers are responsible for creating any test-specific projects
16
+ * via the REST API (POST /api/v0/projects) and cleaning them up.
17
+ */
18
+ export async function startRestE2E(): Promise<
19
+ RestE2EEnv & { stop(): Promise<void> }
20
+ > {
21
+ const { app } = await import("../../src/server");
22
+
23
+ const httpServer: http.Server = await new Promise<http.Server>(
24
+ (resolve, reject) => {
25
+ const srv = http
26
+ .createServer(app)
27
+ .listen(0, "127.0.0.1", () => resolve(srv));
28
+ srv.on("error", (err: NodeJS.ErrnoException) => {
29
+ console.error("[REST E2E] server listen error", err);
30
+ reject(err);
31
+ });
32
+ },
33
+ );
34
+
35
+ const addr = httpServer.address() as { port: number };
36
+ const baseUrl = `http://127.0.0.1:${addr.port}`;
37
+
38
+ const maxWait = 180_000;
39
+ const start = Date.now();
40
+ let ready = false;
41
+ while (!ready && Date.now() - start < maxWait) {
42
+ try {
43
+ const res = await fetch(`${baseUrl}/health/readiness`);
44
+ if (res.ok) {
45
+ const data = (await res.json()) as { status: string };
46
+ if (data.status === "UP") {
47
+ ready = true;
48
+ break;
49
+ }
50
+ }
51
+ } catch {
52
+ // server not ready yet
53
+ }
54
+ await new Promise((r) => setTimeout(r, 500));
55
+ }
56
+ if (!ready) {
57
+ httpServer.closeAllConnections?.();
58
+ await new Promise<void>((r) => httpServer.close(() => r()));
59
+ throw new Error("REST E2E server did not become ready in time");
60
+ }
61
+
62
+ const stop = async (): Promise<void> => {
63
+ httpServer.closeAllConnections?.();
64
+ await new Promise<void>((r) => httpServer.close(() => r()));
65
+ };
66
+
67
+ return { httpServer, baseUrl, stop };
68
+ }