@malloy-publisher/server 0.0.198-dev → 0.0.198-dev2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.docker.md +135 -20
- package/README.md +15 -0
- package/build.ts +32 -1
- package/dist/app/api-doc.yaml +51 -0
- package/dist/app/assets/EnvironmentPage-Dpee_Kn6.js +1 -0
- package/dist/app/assets/HomePage-DLRWTNoL.js +1 -0
- package/dist/app/assets/MainPage-DsVt5QGM.js +2 -0
- package/dist/app/assets/ModelPage-AwAugZ37.js +1 -0
- package/dist/app/assets/PackagePage-XQ-EWGTC.js +1 -0
- package/dist/app/assets/RouteError-3Mv8JQw7.js +1 -0
- package/dist/app/assets/WorkbookPage-DHYYpcYc.js +1 -0
- package/dist/app/assets/{core-w79IMXAG.es-Bd0UlzOL.js → core-DfcpQGVP.es-DQggNOdX.js} +14 -14
- package/dist/app/assets/{index-C513UodQ.js → index-BUp81Qdm.js} +15 -15
- package/dist/app/assets/index-D1pdwrUW.js +1803 -0
- package/dist/app/assets/index-Dv5bF4Ii.js +451 -0
- package/dist/app/assets/{index.umd-BMeMPq_9.js → index.umd-CQH4LZU8.js} +1 -1
- package/dist/app/index.html +2 -3
- package/dist/default-publisher.config.json +23 -0
- package/dist/instrumentation.mjs +22 -3
- package/dist/server.mjs +1522 -651
- package/dist/service/schema_worker.mjs +61 -0
- package/package.json +11 -12
- package/publisher.config.example.bigquery.json +33 -0
- package/publisher.config.example.duckdb.json +23 -0
- package/publisher.config.json +1 -11
- package/src/config.spec.ts +306 -0
- package/src/config.ts +222 -2
- package/src/controller/compile.controller.ts +3 -1
- package/src/controller/connection.controller.ts +1 -1
- package/src/controller/model.controller.ts +8 -1
- package/src/controller/package.controller.ts +70 -29
- package/src/controller/query.controller.ts +3 -0
- package/src/default-publisher.config.json +23 -0
- package/src/errors.spec.ts +42 -0
- package/src/errors.ts +21 -0
- package/src/health.spec.ts +90 -0
- package/src/health.ts +73 -45
- package/src/instrumentation.ts +50 -0
- package/src/logger.ts +1 -3
- package/src/mcp/tools/discovery_tools.ts +6 -2
- package/src/mcp/tools/execute_query_tool.ts +12 -0
- package/src/path_safety.spec.ts +158 -0
- package/src/path_safety.ts +140 -0
- package/src/pg_helpers.spec.ts +226 -0
- package/src/pg_helpers.ts +129 -0
- package/src/server-old.ts +3 -23
- package/src/server.ts +54 -0
- package/src/service/connection.spec.ts +6 -4
- package/src/service/connection.ts +8 -3
- package/src/service/connection_config.ts +2 -2
- package/src/service/environment.ts +621 -176
- package/src/service/environment_admission.spec.ts +180 -0
- package/src/service/environment_store.ts +31 -0
- package/src/service/filter_integration.spec.ts +110 -0
- package/src/service/givens_integration.spec.ts +192 -0
- package/src/service/manifest_service.spec.ts +7 -2
- package/src/service/manifest_service.ts +8 -2
- package/src/service/materialization_service.ts +14 -3
- package/src/service/model.spec.ts +105 -0
- package/src/service/model.ts +91 -7
- package/src/service/package.spec.ts +11 -7
- package/src/service/package.ts +53 -56
- package/src/service/package_memory_governor.spec.ts +173 -0
- package/src/service/package_memory_governor.ts +233 -0
- package/src/service/package_race.spec.ts +208 -0
- package/src/service/process_stats_reporter.ts +169 -0
- package/src/service/schema_worker.ts +123 -0
- package/src/service/schema_worker_pool.ts +278 -0
- package/src/storage/StorageManager.ts +71 -11
- package/src/storage/duckdb/schema.ts +41 -0
- package/src/utils.ts +11 -0
- package/tests/harness/rest_e2e.ts +2 -2
- package/tests/integration/concurrent_environment/concurrent_environment.integration.spec.ts +235 -0
- package/tests/integration/concurrent_package/concurrent_package.integration.spec.ts +280 -0
- package/tests/integration/legacy_routes/legacy_routes.integration.spec.ts +259 -0
- package/tests/unit/duckdb/attached_databases.test.ts +5 -5
- package/tests/unit/duckdb/legacy_schema_migration.test.ts +194 -0
- package/tests/unit/storage/StorageManager.test.ts +166 -0
- package/dist/app/assets/EnvironmentPage-1j6QDWAy.js +0 -1
- package/dist/app/assets/HomePage-DMop21VG.js +0 -1
- package/dist/app/assets/MainPage-BbE8ETz1.js +0 -2
- package/dist/app/assets/ModelPage-D2jvfe3t.js +0 -1
- package/dist/app/assets/PackagePage-BbnhGoD3.js +0 -1
- package/dist/app/assets/RouteError-D3LGEZ3i.js +0 -1
- package/dist/app/assets/WorkbookPage-DttVIj4u.js +0 -1
- package/dist/app/assets/index-5K9YjIxF.js +0 -456
- package/dist/app/assets/index-DIgzgp69.js +0 -1742
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
import * as fs from "fs";
|
|
2
|
+
|
|
3
|
+
import { logger } from "../logger";
|
|
4
|
+
import type { PackageMemoryGovernor } from "./package_memory_governor";
|
|
5
|
+
|
|
6
|
+
const DEFAULT_INTERVAL_MS = 30_000;
|
|
7
|
+
|
|
8
|
+
interface LinuxProcStatus {
|
|
9
|
+
threads?: number;
|
|
10
|
+
vmRssBytes?: number;
|
|
11
|
+
vmSizeBytes?: number;
|
|
12
|
+
vmPeakBytes?: number;
|
|
13
|
+
vmDataBytes?: number;
|
|
14
|
+
voluntaryCtxSwitches?: number;
|
|
15
|
+
nonvoluntaryCtxSwitches?: number;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Parse the subset of `/proc/self/status` that matters for diagnosing
|
|
20
|
+
* thread / virtual-memory leaks. The file is small (<5KB), so reading
|
|
21
|
+
* it synchronously here is cheap and avoids fs-promise queueing.
|
|
22
|
+
*
|
|
23
|
+
* Format is `Key:\t<value> [unit]` per line. Sizes are reported in kB;
|
|
24
|
+
* we normalize to bytes so log output matches `process.memoryUsage()`.
|
|
25
|
+
*/
|
|
26
|
+
function readLinuxProcStatus(): LinuxProcStatus | null {
|
|
27
|
+
try {
|
|
28
|
+
const raw = fs.readFileSync("/proc/self/status", "utf8");
|
|
29
|
+
const out: LinuxProcStatus = {};
|
|
30
|
+
for (const line of raw.split("\n")) {
|
|
31
|
+
const [keyRaw, valueRaw] = line.split(":");
|
|
32
|
+
if (!keyRaw || !valueRaw) continue;
|
|
33
|
+
const key = keyRaw.trim();
|
|
34
|
+
const value = valueRaw.trim();
|
|
35
|
+
switch (key) {
|
|
36
|
+
case "Threads":
|
|
37
|
+
out.threads = Number(value);
|
|
38
|
+
break;
|
|
39
|
+
case "VmRSS":
|
|
40
|
+
out.vmRssBytes = kBToBytes(value);
|
|
41
|
+
break;
|
|
42
|
+
case "VmSize":
|
|
43
|
+
out.vmSizeBytes = kBToBytes(value);
|
|
44
|
+
break;
|
|
45
|
+
case "VmPeak":
|
|
46
|
+
out.vmPeakBytes = kBToBytes(value);
|
|
47
|
+
break;
|
|
48
|
+
case "VmData":
|
|
49
|
+
out.vmDataBytes = kBToBytes(value);
|
|
50
|
+
break;
|
|
51
|
+
case "voluntary_ctxt_switches":
|
|
52
|
+
out.voluntaryCtxSwitches = Number(value);
|
|
53
|
+
break;
|
|
54
|
+
case "nonvoluntary_ctxt_switches":
|
|
55
|
+
out.nonvoluntaryCtxSwitches = Number(value);
|
|
56
|
+
break;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
return out;
|
|
60
|
+
} catch {
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function kBToBytes(value: string): number | undefined {
|
|
66
|
+
const num = Number(value.replace(/\s*kB$/, ""));
|
|
67
|
+
if (!Number.isFinite(num)) return undefined;
|
|
68
|
+
return num * 1024;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Bun exposes JSC heap stats via the `bun:jsc` builtin. Optional —
|
|
73
|
+
* absent under plain Node — and best-effort: failures are swallowed
|
|
74
|
+
* so the reporter never crashes the process.
|
|
75
|
+
*/
|
|
76
|
+
async function readBunJscStats(): Promise<Record<string, number> | null> {
|
|
77
|
+
if (typeof (globalThis as { Bun?: unknown }).Bun === "undefined") {
|
|
78
|
+
return null;
|
|
79
|
+
}
|
|
80
|
+
try {
|
|
81
|
+
// Dynamic import so Node builds don't fail at parse time.
|
|
82
|
+
const jsc = (await import("bun:jsc")) as unknown as {
|
|
83
|
+
heapStats?: () => Record<string, number>;
|
|
84
|
+
memoryUsage?: () => Record<string, number>;
|
|
85
|
+
};
|
|
86
|
+
const heap = jsc.heapStats?.();
|
|
87
|
+
const mem = jsc.memoryUsage?.();
|
|
88
|
+
if (!heap && !mem) return null;
|
|
89
|
+
return { ...(heap ?? {}), ...(mem ?? {}) };
|
|
90
|
+
} catch {
|
|
91
|
+
return null;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Periodically logs process memory and thread counts to give ops a
|
|
97
|
+
* cheap, always-on signal for the leak classes that have OOM-killed
|
|
98
|
+
* prod (DuckDB connection thread pools, libuv worker pool, Malloy
|
|
99
|
+
* compile heap, etc.).
|
|
100
|
+
*
|
|
101
|
+
* Logs at `info` so it shows up without flipping `LOG_LEVEL`. Volume
|
|
102
|
+
* is low (~2 lines/minute by default). Pulls the memory governor's
|
|
103
|
+
* snapshot too so RSS/back-pressure state appears in the same line as
|
|
104
|
+
* Node/Bun heap.
|
|
105
|
+
*/
|
|
106
|
+
export class ProcessStatsReporter {
|
|
107
|
+
private timer: ReturnType<typeof setInterval> | null = null;
|
|
108
|
+
private readonly intervalMs: number;
|
|
109
|
+
private readonly memoryGovernor: PackageMemoryGovernor | null;
|
|
110
|
+
|
|
111
|
+
constructor(
|
|
112
|
+
memoryGovernor: PackageMemoryGovernor | null,
|
|
113
|
+
intervalMs: number = DEFAULT_INTERVAL_MS,
|
|
114
|
+
) {
|
|
115
|
+
this.memoryGovernor = memoryGovernor;
|
|
116
|
+
this.intervalMs = intervalMs;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
public start(): void {
|
|
120
|
+
if (this.timer !== null) return;
|
|
121
|
+
// Immediate first sample so a freshly-started pod logs its
|
|
122
|
+
// baseline before the first 30s has elapsed.
|
|
123
|
+
void this.tick();
|
|
124
|
+
this.timer = setInterval(() => void this.tick(), this.intervalMs);
|
|
125
|
+
// Don't keep the event loop alive on our account — if everything
|
|
126
|
+
// else has shut down, the reporter shouldn't block exit.
|
|
127
|
+
(
|
|
128
|
+
this.timer as ReturnType<typeof setInterval> & {
|
|
129
|
+
unref?: () => void;
|
|
130
|
+
}
|
|
131
|
+
).unref?.();
|
|
132
|
+
logger.info(
|
|
133
|
+
`ProcessStatsReporter started (intervalMs=${this.intervalMs})`,
|
|
134
|
+
);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
public stop(): void {
|
|
138
|
+
if (this.timer !== null) {
|
|
139
|
+
clearInterval(this.timer);
|
|
140
|
+
this.timer = null;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
private async tick(): Promise<void> {
|
|
145
|
+
try {
|
|
146
|
+
const mem = process.memoryUsage();
|
|
147
|
+
const proc =
|
|
148
|
+
process.platform === "linux" ? readLinuxProcStatus() : null;
|
|
149
|
+
const bun = await readBunJscStats();
|
|
150
|
+
const governor = this.memoryGovernor?.getStatus() ?? null;
|
|
151
|
+
|
|
152
|
+
logger.info("process stats", {
|
|
153
|
+
uptimeSeconds: Math.round(process.uptime()),
|
|
154
|
+
nodeMemory: {
|
|
155
|
+
rssBytes: mem.rss,
|
|
156
|
+
heapTotalBytes: mem.heapTotal,
|
|
157
|
+
heapUsedBytes: mem.heapUsed,
|
|
158
|
+
externalBytes: mem.external,
|
|
159
|
+
arrayBuffersBytes: mem.arrayBuffers,
|
|
160
|
+
},
|
|
161
|
+
linux: proc,
|
|
162
|
+
bunJsc: bun,
|
|
163
|
+
memoryGovernor: governor,
|
|
164
|
+
});
|
|
165
|
+
} catch (err) {
|
|
166
|
+
logger.warn("ProcessStatsReporter tick failed", { error: err });
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Worker thread that owns one capped DuckDB connection and answers
|
|
3
|
+
* schema-introspection requests for parquet/csv files. Running this
|
|
4
|
+
* off the main thread isolates the native DuckDB thread pool — when
|
|
5
|
+
* the worker exits, its threads die with it, which puts a hard
|
|
6
|
+
* ceiling on the leak class that OOM-killed prod
|
|
7
|
+
* (worker-76b49bdb89-8bsv4: 466 leaked Bun Pool threads).
|
|
8
|
+
*
|
|
9
|
+
* Protocol (parent ↔ worker):
|
|
10
|
+
* parent → worker: { id, packagePath, databasePath }
|
|
11
|
+
* worker → parent: { id, ok: true, result: SchemaResult }
|
|
12
|
+
* | { id, ok: false, error: { message, stack? } }
|
|
13
|
+
*
|
|
14
|
+
* One request at a time per worker — the pool in the parent
|
|
15
|
+
* (`schema_worker_pool.ts`) handles fan-out. Keeping the worker
|
|
16
|
+
* single-threaded from the JS side matches DuckDB's behavior on a
|
|
17
|
+
* single connection and avoids head-of-line blocking inside the
|
|
18
|
+
* worker itself.
|
|
19
|
+
*/
|
|
20
|
+
import { DuckDBConnection } from "@malloydata/db-duckdb";
|
|
21
|
+
import "@malloydata/db-duckdb/native";
|
|
22
|
+
import {
|
|
23
|
+
ConnectionRuntime,
|
|
24
|
+
EmptyURLReader,
|
|
25
|
+
SourceDef,
|
|
26
|
+
} from "@malloydata/malloy";
|
|
27
|
+
import * as path from "path";
|
|
28
|
+
import { parentPort } from "worker_threads";
|
|
29
|
+
|
|
30
|
+
export interface SchemaRequest {
|
|
31
|
+
id: number;
|
|
32
|
+
packagePath: string;
|
|
33
|
+
databasePath: string;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export interface SchemaResponse {
|
|
37
|
+
id: number;
|
|
38
|
+
ok: boolean;
|
|
39
|
+
result?: {
|
|
40
|
+
name: string;
|
|
41
|
+
rowCount: number;
|
|
42
|
+
columns: Array<{ type: string; name: string }>;
|
|
43
|
+
};
|
|
44
|
+
error?: { message: string; stack?: string };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (!parentPort) {
|
|
48
|
+
// Defensive: schema_worker.ts must only be loaded as a worker. If
|
|
49
|
+
// someone accidentally imports it from the main thread the
|
|
50
|
+
// connection below would still allocate its native pool there,
|
|
51
|
+
// recreating the exact leak this file exists to fix.
|
|
52
|
+
throw new Error("schema_worker.ts loaded outside a worker thread");
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// One DuckDB connection per worker, capped tight. Schema introspection
|
|
56
|
+
// reads parquet footers / csv headers — it does not need parallelism
|
|
57
|
+
// or a large memory arena. The cap is what keeps the per-worker
|
|
58
|
+
// native-thread cost bounded.
|
|
59
|
+
const connection = new DuckDBConnection({
|
|
60
|
+
name: "duckdb",
|
|
61
|
+
databasePath: ":memory:",
|
|
62
|
+
threads: 1,
|
|
63
|
+
memoryLimit: "256MB",
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
async function handleRequest(req: SchemaRequest): Promise<SchemaResponse> {
|
|
67
|
+
try {
|
|
68
|
+
const fullPath = path.join(req.packagePath, req.databasePath);
|
|
69
|
+
// DuckDB on Windows supports forward slashes, and this avoids
|
|
70
|
+
// escaping issues in the inline SQL below.
|
|
71
|
+
const normalizedPath = fullPath.replace(/\\/g, "/");
|
|
72
|
+
|
|
73
|
+
const runtime = new ConnectionRuntime({
|
|
74
|
+
urlReader: new EmptyURLReader(),
|
|
75
|
+
connections: [connection],
|
|
76
|
+
});
|
|
77
|
+
const model = runtime.loadModel(
|
|
78
|
+
`source: temp is duckdb.table('${normalizedPath}')`,
|
|
79
|
+
);
|
|
80
|
+
const modelDef = await model.getModel();
|
|
81
|
+
const fields = (modelDef._modelDef.contents["temp"] as SourceDef).fields;
|
|
82
|
+
const columns = fields.map((field) => ({
|
|
83
|
+
type: String(field.type),
|
|
84
|
+
name: field.name,
|
|
85
|
+
}));
|
|
86
|
+
|
|
87
|
+
const runner = model.loadQuery(
|
|
88
|
+
"run: temp->{aggregate: row_count is count()}",
|
|
89
|
+
);
|
|
90
|
+
const result = await runner.run();
|
|
91
|
+
const rowCount = result.data.value[0].row_count?.valueOf() as number;
|
|
92
|
+
|
|
93
|
+
return {
|
|
94
|
+
id: req.id,
|
|
95
|
+
ok: true,
|
|
96
|
+
result: { name: req.databasePath, rowCount, columns },
|
|
97
|
+
};
|
|
98
|
+
} catch (err) {
|
|
99
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
100
|
+
return {
|
|
101
|
+
id: req.id,
|
|
102
|
+
ok: false,
|
|
103
|
+
error: { message: error.message, stack: error.stack },
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
parentPort.on("message", async (msg: SchemaRequest) => {
|
|
109
|
+
const response = await handleRequest(msg);
|
|
110
|
+
parentPort!.postMessage(response);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
// On any termination signal, close the connection so DuckDB releases
|
|
114
|
+
// its native threads cleanly instead of leaking them past worker exit.
|
|
115
|
+
const shutdown = async () => {
|
|
116
|
+
try {
|
|
117
|
+
await connection.close();
|
|
118
|
+
} catch {
|
|
119
|
+
// best effort
|
|
120
|
+
}
|
|
121
|
+
process.exit(0);
|
|
122
|
+
};
|
|
123
|
+
parentPort.on("close", () => void shutdown());
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Long-lived pool of {@link Worker} threads that perform DuckDB schema
|
|
3
|
+
* introspection off the main event loop.
|
|
4
|
+
*
|
|
5
|
+
* Why a dedicated pool (not the libuv worker pool, not setImmediate):
|
|
6
|
+
*
|
|
7
|
+
* - `@malloydata/db-duckdb` opens DuckDB via a native addon. Every
|
|
8
|
+
* DuckDBConnection allocates its own native thread pool sized to
|
|
9
|
+
* the host's CPU count (not the cgroup's CPU share). Concurrent
|
|
10
|
+
* schema introspection on the main thread compounded into the
|
|
11
|
+
* 466-leaked-Bun-Pool-threads / 90GB-VmSize OOM signature seen on
|
|
12
|
+
* `worker-76b49bdb89-8bsv4`.
|
|
13
|
+
*
|
|
14
|
+
* - Owning the DuckDBConnection inside a worker isolates the native
|
|
15
|
+
* pool to *that* worker. Per-pool sizing → predictable thread
|
|
16
|
+
* budget. Worker exit → native threads die with it. No leak across
|
|
17
|
+
* package loads.
|
|
18
|
+
*
|
|
19
|
+
* - The schema-introspection case is uniquely suited to workers:
|
|
20
|
+
* inputs and outputs are plain JSON (structured-cloneable), and
|
|
21
|
+
* the work touches no environment connections, so we don't need
|
|
22
|
+
* cross-thread IPC for live Snowflake/BigQuery handles. This is
|
|
23
|
+
* why we tackle schema first — model compile (which *does* need
|
|
24
|
+
* live env connections) is a much bigger lift, tracked separately.
|
|
25
|
+
*/
|
|
26
|
+
import { Worker } from "worker_threads";
|
|
27
|
+
|
|
28
|
+
import { logger } from "../logger";
|
|
29
|
+
|
|
30
|
+
type ColumnInfo = { type: string; name: string };
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Public-facing schema-row shape. Mirrors the original synchronous
|
|
34
|
+
* `getDatabaseInfo` return so callers in `package.ts` are unchanged.
|
|
35
|
+
*/
|
|
36
|
+
export interface SchemaResult {
|
|
37
|
+
name: string;
|
|
38
|
+
rowCount: number;
|
|
39
|
+
columns: ColumnInfo[];
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
interface WorkerSlot {
|
|
43
|
+
worker: Worker;
|
|
44
|
+
/** Whether the worker is currently handling a request. */
|
|
45
|
+
busy: boolean;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
interface PendingRequest {
|
|
49
|
+
id: number;
|
|
50
|
+
packagePath: string;
|
|
51
|
+
databasePath: string;
|
|
52
|
+
resolve: (value: SchemaResult) => void;
|
|
53
|
+
reject: (reason: Error) => void;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const DEFAULT_POOL_SIZE = 2;
|
|
57
|
+
|
|
58
|
+
export class SchemaWorkerPool {
|
|
59
|
+
private readonly workers: WorkerSlot[] = [];
|
|
60
|
+
private readonly queue: PendingRequest[] = [];
|
|
61
|
+
/** id → pending request currently executing. */
|
|
62
|
+
private readonly inFlight = new Map<number, PendingRequest>();
|
|
63
|
+
/** Maps a worker index to the id of the request it's running. */
|
|
64
|
+
private readonly workerCurrentId = new Map<number, number>();
|
|
65
|
+
private nextId = 1;
|
|
66
|
+
private stopped = false;
|
|
67
|
+
|
|
68
|
+
constructor(
|
|
69
|
+
private readonly workerUrl: URL,
|
|
70
|
+
private readonly size: number = DEFAULT_POOL_SIZE,
|
|
71
|
+
) {}
|
|
72
|
+
|
|
73
|
+
public start(): void {
|
|
74
|
+
if (this.workers.length > 0) return;
|
|
75
|
+
for (let i = 0; i < this.size; i++) {
|
|
76
|
+
this.workers.push(this.spawn(i));
|
|
77
|
+
}
|
|
78
|
+
logger.info(`SchemaWorkerPool started (size=${this.size})`);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
public async stop(): Promise<void> {
|
|
82
|
+
this.stopped = true;
|
|
83
|
+
// Fail any queued/in-flight work so callers don't hang on shutdown.
|
|
84
|
+
const shutdownError = new Error("SchemaWorkerPool stopped");
|
|
85
|
+
for (const req of this.queue.splice(0)) req.reject(shutdownError);
|
|
86
|
+
for (const req of this.inFlight.values()) req.reject(shutdownError);
|
|
87
|
+
this.inFlight.clear();
|
|
88
|
+
await Promise.all(
|
|
89
|
+
this.workers.map(async (slot) => {
|
|
90
|
+
try {
|
|
91
|
+
await slot.worker.terminate();
|
|
92
|
+
} catch {
|
|
93
|
+
// Best-effort: terminate failures shouldn't block shutdown.
|
|
94
|
+
}
|
|
95
|
+
}),
|
|
96
|
+
);
|
|
97
|
+
this.workers.length = 0;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Submit one schema-introspection job. Resolves with the schema
|
|
102
|
+
* description; rejects if the worker returns an error or crashes.
|
|
103
|
+
*
|
|
104
|
+
* Concurrent calls beyond the pool size are queued FIFO; once a
|
|
105
|
+
* worker frees up the next queued request is dispatched.
|
|
106
|
+
*/
|
|
107
|
+
public submit(
|
|
108
|
+
packagePath: string,
|
|
109
|
+
databasePath: string,
|
|
110
|
+
): Promise<SchemaResult> {
|
|
111
|
+
if (this.stopped) {
|
|
112
|
+
return Promise.reject(new Error("SchemaWorkerPool stopped"));
|
|
113
|
+
}
|
|
114
|
+
if (this.workers.length === 0) {
|
|
115
|
+
return Promise.reject(
|
|
116
|
+
new Error("SchemaWorkerPool.submit called before start()"),
|
|
117
|
+
);
|
|
118
|
+
}
|
|
119
|
+
return new Promise<SchemaResult>((resolve, reject) => {
|
|
120
|
+
const req: PendingRequest = {
|
|
121
|
+
id: this.nextId++,
|
|
122
|
+
packagePath,
|
|
123
|
+
databasePath,
|
|
124
|
+
resolve,
|
|
125
|
+
reject,
|
|
126
|
+
};
|
|
127
|
+
this.queue.push(req);
|
|
128
|
+
this.drain();
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Try to assign queued requests to idle workers. Cheap; called
|
|
134
|
+
* after every enqueue and after every worker completes a request.
|
|
135
|
+
*/
|
|
136
|
+
private drain(): void {
|
|
137
|
+
for (let i = 0; i < this.workers.length; i++) {
|
|
138
|
+
if (this.queue.length === 0) return;
|
|
139
|
+
const slot = this.workers[i];
|
|
140
|
+
if (slot.busy) continue;
|
|
141
|
+
const req = this.queue.shift()!;
|
|
142
|
+
slot.busy = true;
|
|
143
|
+
this.inFlight.set(req.id, req);
|
|
144
|
+
this.workerCurrentId.set(i, req.id);
|
|
145
|
+
slot.worker.postMessage({
|
|
146
|
+
id: req.id,
|
|
147
|
+
packagePath: req.packagePath,
|
|
148
|
+
databasePath: req.databasePath,
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
private spawn(index: number): WorkerSlot {
|
|
154
|
+
const worker = new Worker(this.workerUrl);
|
|
155
|
+
const slot: WorkerSlot = { worker, busy: false };
|
|
156
|
+
|
|
157
|
+
worker.on(
|
|
158
|
+
"message",
|
|
159
|
+
(msg: {
|
|
160
|
+
id: number;
|
|
161
|
+
ok: boolean;
|
|
162
|
+
result?: SchemaResult;
|
|
163
|
+
error?: { message: string; stack?: string };
|
|
164
|
+
}) => {
|
|
165
|
+
const req = this.inFlight.get(msg.id);
|
|
166
|
+
if (!req) {
|
|
167
|
+
logger.warn("SchemaWorkerPool: response for unknown request", {
|
|
168
|
+
id: msg.id,
|
|
169
|
+
workerIndex: index,
|
|
170
|
+
});
|
|
171
|
+
return;
|
|
172
|
+
}
|
|
173
|
+
this.inFlight.delete(msg.id);
|
|
174
|
+
this.workerCurrentId.delete(index);
|
|
175
|
+
slot.busy = false;
|
|
176
|
+
if (msg.ok && msg.result) {
|
|
177
|
+
req.resolve(msg.result);
|
|
178
|
+
} else {
|
|
179
|
+
const err = new Error(msg.error?.message ?? "Unknown error");
|
|
180
|
+
if (msg.error?.stack) err.stack = msg.error.stack;
|
|
181
|
+
req.reject(err);
|
|
182
|
+
}
|
|
183
|
+
this.drain();
|
|
184
|
+
},
|
|
185
|
+
);
|
|
186
|
+
|
|
187
|
+
worker.on("error", (err) => {
|
|
188
|
+
// A native crash inside the worker — fail the in-flight request
|
|
189
|
+
// attributed to this slot, then respawn the worker so the pool
|
|
190
|
+
// self-heals. Without respawn, one crash silently shrinks
|
|
191
|
+
// capacity and concurrent loads would queue forever.
|
|
192
|
+
const inFlightId = this.workerCurrentId.get(index);
|
|
193
|
+
if (inFlightId !== undefined) {
|
|
194
|
+
const req = this.inFlight.get(inFlightId);
|
|
195
|
+
if (req) {
|
|
196
|
+
this.inFlight.delete(inFlightId);
|
|
197
|
+
req.reject(err);
|
|
198
|
+
}
|
|
199
|
+
this.workerCurrentId.delete(index);
|
|
200
|
+
}
|
|
201
|
+
logger.error("SchemaWorkerPool: worker errored, respawning", {
|
|
202
|
+
workerIndex: index,
|
|
203
|
+
error: err,
|
|
204
|
+
});
|
|
205
|
+
if (!this.stopped) {
|
|
206
|
+
this.workers[index] = this.spawn(index);
|
|
207
|
+
this.drain();
|
|
208
|
+
}
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
worker.on("exit", (code) => {
|
|
212
|
+
if (this.stopped) return;
|
|
213
|
+
if (code !== 0) {
|
|
214
|
+
logger.warn("SchemaWorkerPool: worker exited unexpectedly", {
|
|
215
|
+
workerIndex: index,
|
|
216
|
+
code,
|
|
217
|
+
});
|
|
218
|
+
// Treat unexpected exit like an error: respawn so the pool
|
|
219
|
+
// doesn't silently lose capacity.
|
|
220
|
+
const inFlightId = this.workerCurrentId.get(index);
|
|
221
|
+
if (inFlightId !== undefined) {
|
|
222
|
+
const req = this.inFlight.get(inFlightId);
|
|
223
|
+
if (req) {
|
|
224
|
+
this.inFlight.delete(inFlightId);
|
|
225
|
+
req.reject(
|
|
226
|
+
new Error(`SchemaWorker exited with code ${code}`),
|
|
227
|
+
);
|
|
228
|
+
}
|
|
229
|
+
this.workerCurrentId.delete(index);
|
|
230
|
+
}
|
|
231
|
+
this.workers[index] = this.spawn(index);
|
|
232
|
+
this.drain();
|
|
233
|
+
}
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
return slot;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Process-wide singleton. Constructed lazily so importing this module
|
|
242
|
+
* doesn't spawn workers in test environments that never call
|
|
243
|
+
* `getSchemaWorkerPool()`.
|
|
244
|
+
*
|
|
245
|
+
* The worker URL is resolved from `import.meta.url`, which lets Bun
|
|
246
|
+
* load `schema_worker.ts` directly in dev and the bundled
|
|
247
|
+
* `schema_worker.mjs` in prod (see `build.ts`).
|
|
248
|
+
*/
|
|
249
|
+
let singleton: SchemaWorkerPool | null = null;
|
|
250
|
+
|
|
251
|
+
export function getSchemaWorkerPool(): SchemaWorkerPool {
|
|
252
|
+
if (!singleton) {
|
|
253
|
+
const url = resolveWorkerUrl();
|
|
254
|
+
const size = Number(process.env.PUBLISHER_SCHEMA_WORKER_POOL_SIZE) || 2;
|
|
255
|
+
singleton = new SchemaWorkerPool(url, size);
|
|
256
|
+
singleton.start();
|
|
257
|
+
}
|
|
258
|
+
return singleton;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
function resolveWorkerUrl(): URL {
|
|
262
|
+
// In dev (`bun --watch src/server.ts`), import.meta.url points at
|
|
263
|
+
// `.../src/service/schema_worker_pool.ts` and the worker is the
|
|
264
|
+
// sibling `.ts` file.
|
|
265
|
+
//
|
|
266
|
+
// In prod, this module gets inlined into `dist/server.mjs`, so
|
|
267
|
+
// `import.meta.url` resolves to `dist/server.mjs`. Bun's bundler
|
|
268
|
+
// nests outputs by their path relative to the common entrypoint
|
|
269
|
+
// root (./src), so schema_worker lands at
|
|
270
|
+
// `dist/service/schema_worker.mjs` — one directory below
|
|
271
|
+
// server.mjs.
|
|
272
|
+
const base = new URL(import.meta.url);
|
|
273
|
+
const isBundled = base.pathname.endsWith(".mjs");
|
|
274
|
+
return new URL(
|
|
275
|
+
isBundled ? "./service/schema_worker.mjs" : "./schema_worker.ts",
|
|
276
|
+
base,
|
|
277
|
+
);
|
|
278
|
+
}
|
|
@@ -1,5 +1,13 @@
|
|
|
1
|
+
import { Mutex } from "async-mutex";
|
|
1
2
|
import * as crypto from "crypto";
|
|
3
|
+
import { ConnectionAuthError } from "../errors";
|
|
2
4
|
import { logger } from "../logger";
|
|
5
|
+
import {
|
|
6
|
+
handlePgAttachError,
|
|
7
|
+
pgConnectTimeoutSeconds,
|
|
8
|
+
redactPgSecrets,
|
|
9
|
+
withPgConnectTimeout,
|
|
10
|
+
} from "../pg_helpers";
|
|
3
11
|
import {
|
|
4
12
|
DatabaseConnection,
|
|
5
13
|
ManifestStore,
|
|
@@ -78,6 +86,13 @@ export class StorageManager {
|
|
|
78
86
|
*/
|
|
79
87
|
private attachedCatalogs = new Map<string, string>();
|
|
80
88
|
|
|
89
|
+
// Serializes DuckLake catalog attaches. Concurrent POST /environments calls
|
|
90
|
+
// hitting the same DuckDB connection would otherwise race on extension
|
|
91
|
+
// autoload (httpfs/azure/etc.), where multiple connections download the
|
|
92
|
+
// extension to `.tmp-<uuid>` files in parallel; only one wins the rename
|
|
93
|
+
// and the rest crash with "Could not remove file ... No such file or directory".
|
|
94
|
+
private duckLakeAttachMutex: Mutex = new Mutex();
|
|
95
|
+
|
|
81
96
|
private config: StorageConfig;
|
|
82
97
|
|
|
83
98
|
constructor(config: StorageConfig) {
|
|
@@ -141,14 +156,18 @@ export class StorageManager {
|
|
|
141
156
|
}
|
|
142
157
|
|
|
143
158
|
const key = configKey(config);
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
159
|
+
const catalogName = await this.duckLakeAttachMutex.runExclusive(
|
|
160
|
+
async () => {
|
|
161
|
+
const existing = this.attachedCatalogs.get(key);
|
|
162
|
+
if (existing) return existing;
|
|
163
|
+
// Catalog name derived from the config so multiple configs can coexist as
|
|
164
|
+
// separate ATTACHments without colliding on the name.
|
|
165
|
+
const name = catalogNameForConfig(config);
|
|
166
|
+
await this.attachDuckLakeCatalog(config, name);
|
|
167
|
+
this.attachedCatalogs.set(key, name);
|
|
168
|
+
return name;
|
|
169
|
+
},
|
|
170
|
+
);
|
|
152
171
|
|
|
153
172
|
const store = new DuckLakeManifestStore(
|
|
154
173
|
this.duckDbConnection,
|
|
@@ -178,12 +197,31 @@ export class StorageManager {
|
|
|
178
197
|
await connection.run("INSTALL postgres; LOAD postgres;");
|
|
179
198
|
}
|
|
180
199
|
|
|
181
|
-
|
|
200
|
+
// For PG-backed catalogs, inject connect_timeout so a wedged libpq
|
|
201
|
+
// handshake fails the caller in seconds rather than hanging the
|
|
202
|
+
// worker until the K8s liveness probe trips (the 2026-05 incident).
|
|
203
|
+
// Non-PG catalogs (e.g. SQLite, MySQL) pass through unchanged.
|
|
204
|
+
const catalogUrl = isPostgres
|
|
205
|
+
? withPgConnectTimeout(config.catalogUrl, pgConnectTimeoutSeconds())
|
|
206
|
+
: config.catalogUrl;
|
|
207
|
+
|
|
208
|
+
const escapedCatalogUrl = escapeSQL(catalogUrl);
|
|
182
209
|
const escapedDataPath = escapeSQL(config.dataPath);
|
|
183
210
|
const isCloudStorage =
|
|
184
211
|
config.dataPath.startsWith("gs://") ||
|
|
185
212
|
config.dataPath.startsWith("s3://");
|
|
186
213
|
|
|
214
|
+
// Pre-install httpfs explicitly so the ATTACH below doesn't trigger
|
|
215
|
+
// DuckDB's autoloader. The autoloader downloads extensions to
|
|
216
|
+
// `<ext>.tmp-<uuid>` and races when multiple connections within the
|
|
217
|
+
// same process hit it concurrently — losers fail with
|
|
218
|
+
// "Could not remove file ... No such file or directory" on cleanup
|
|
219
|
+
// of their .tmp file. INSTALL/LOAD here is idempotent and serialized
|
|
220
|
+
// by the caller's mutex.
|
|
221
|
+
if (isCloudStorage) {
|
|
222
|
+
await connection.run("INSTALL httpfs; LOAD httpfs;");
|
|
223
|
+
}
|
|
224
|
+
|
|
187
225
|
let attachCmd = `ATTACH 'ducklake:${escapedCatalogUrl}' AS ${catalogName}`;
|
|
188
226
|
const attachOpts: string[] = [
|
|
189
227
|
`DATA_PATH '${escapedDataPath}'`,
|
|
@@ -193,13 +231,35 @@ export class StorageManager {
|
|
|
193
231
|
// sidestepping object-storage auth issues entirely for this path.
|
|
194
232
|
"DATA_INLINING_ROW_LIMIT 100000",
|
|
195
233
|
];
|
|
234
|
+
|
|
196
235
|
if (isCloudStorage) {
|
|
197
236
|
attachOpts.push("OVERRIDE_DATA_PATH true");
|
|
198
237
|
}
|
|
199
238
|
attachCmd += ` (${attachOpts.join(", ")});`;
|
|
200
239
|
|
|
201
|
-
logger.info(
|
|
202
|
-
|
|
240
|
+
logger.info(
|
|
241
|
+
`Attaching DuckLake manifest catalog: ${redactPgSecrets(attachCmd)}`,
|
|
242
|
+
);
|
|
243
|
+
try {
|
|
244
|
+
await connection.run(attachCmd);
|
|
245
|
+
} catch (error) {
|
|
246
|
+
const outcome = handlePgAttachError(
|
|
247
|
+
error,
|
|
248
|
+
`DuckLake catalog credentials rejected for ${catalogName}`,
|
|
249
|
+
);
|
|
250
|
+
if (outcome.action === "swallow") {
|
|
251
|
+
logger.info(
|
|
252
|
+
`DuckLake catalog ${catalogName} is already attached, skipping`,
|
|
253
|
+
);
|
|
254
|
+
return;
|
|
255
|
+
}
|
|
256
|
+
if (outcome.error instanceof ConnectionAuthError) {
|
|
257
|
+
logger.warn("DuckLake catalog credentials rejected", {
|
|
258
|
+
catalogName,
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
throw outcome.error;
|
|
262
|
+
}
|
|
203
263
|
}
|
|
204
264
|
|
|
205
265
|
getRepository(): ResourceRepository {
|