@malloy-publisher/server 0.0.198-dev2 → 0.0.198-dev4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,278 +0,0 @@
1
- /**
2
- * Long-lived pool of {@link Worker} threads that perform DuckDB schema
3
- * introspection off the main event loop.
4
- *
5
- * Why a dedicated pool (not the libuv worker pool, not setImmediate):
6
- *
7
- * - `@malloydata/db-duckdb` opens DuckDB via a native addon. Every
8
- * DuckDBConnection allocates its own native thread pool sized to
9
- * the host's CPU count (not the cgroup's CPU share). Concurrent
10
- * schema introspection on the main thread compounded into the
11
- * 466-leaked-Bun-Pool-threads / 90GB-VmSize OOM signature seen on
12
- * `worker-76b49bdb89-8bsv4`.
13
- *
14
- * - Owning the DuckDBConnection inside a worker isolates the native
15
- * pool to *that* worker. Per-pool sizing → predictable thread
16
- * budget. Worker exit → native threads die with it. No leak across
17
- * package loads.
18
- *
19
- * - The schema-introspection case is uniquely suited to workers:
20
- * inputs and outputs are plain JSON (structured-cloneable), and
21
- * the work touches no environment connections, so we don't need
22
- * cross-thread IPC for live Snowflake/BigQuery handles. This is
23
- * why we tackle schema first — model compile (which *does* need
24
- * live env connections) is a much bigger lift, tracked separately.
25
- */
26
- import { Worker } from "worker_threads";
27
-
28
- import { logger } from "../logger";
29
-
30
- type ColumnInfo = { type: string; name: string };
31
-
32
- /**
33
- * Public-facing schema-row shape. Mirrors the original synchronous
34
- * `getDatabaseInfo` return so callers in `package.ts` are unchanged.
35
- */
36
- export interface SchemaResult {
37
- name: string;
38
- rowCount: number;
39
- columns: ColumnInfo[];
40
- }
41
-
42
- interface WorkerSlot {
43
- worker: Worker;
44
- /** Whether the worker is currently handling a request. */
45
- busy: boolean;
46
- }
47
-
48
- interface PendingRequest {
49
- id: number;
50
- packagePath: string;
51
- databasePath: string;
52
- resolve: (value: SchemaResult) => void;
53
- reject: (reason: Error) => void;
54
- }
55
-
56
- const DEFAULT_POOL_SIZE = 2;
57
-
58
- export class SchemaWorkerPool {
59
- private readonly workers: WorkerSlot[] = [];
60
- private readonly queue: PendingRequest[] = [];
61
- /** id → pending request currently executing. */
62
- private readonly inFlight = new Map<number, PendingRequest>();
63
- /** Maps a worker index to the id of the request it's running. */
64
- private readonly workerCurrentId = new Map<number, number>();
65
- private nextId = 1;
66
- private stopped = false;
67
-
68
- constructor(
69
- private readonly workerUrl: URL,
70
- private readonly size: number = DEFAULT_POOL_SIZE,
71
- ) {}
72
-
73
- public start(): void {
74
- if (this.workers.length > 0) return;
75
- for (let i = 0; i < this.size; i++) {
76
- this.workers.push(this.spawn(i));
77
- }
78
- logger.info(`SchemaWorkerPool started (size=${this.size})`);
79
- }
80
-
81
- public async stop(): Promise<void> {
82
- this.stopped = true;
83
- // Fail any queued/in-flight work so callers don't hang on shutdown.
84
- const shutdownError = new Error("SchemaWorkerPool stopped");
85
- for (const req of this.queue.splice(0)) req.reject(shutdownError);
86
- for (const req of this.inFlight.values()) req.reject(shutdownError);
87
- this.inFlight.clear();
88
- await Promise.all(
89
- this.workers.map(async (slot) => {
90
- try {
91
- await slot.worker.terminate();
92
- } catch {
93
- // Best-effort: terminate failures shouldn't block shutdown.
94
- }
95
- }),
96
- );
97
- this.workers.length = 0;
98
- }
99
-
100
- /**
101
- * Submit one schema-introspection job. Resolves with the schema
102
- * description; rejects if the worker returns an error or crashes.
103
- *
104
- * Concurrent calls beyond the pool size are queued FIFO; once a
105
- * worker frees up the next queued request is dispatched.
106
- */
107
- public submit(
108
- packagePath: string,
109
- databasePath: string,
110
- ): Promise<SchemaResult> {
111
- if (this.stopped) {
112
- return Promise.reject(new Error("SchemaWorkerPool stopped"));
113
- }
114
- if (this.workers.length === 0) {
115
- return Promise.reject(
116
- new Error("SchemaWorkerPool.submit called before start()"),
117
- );
118
- }
119
- return new Promise<SchemaResult>((resolve, reject) => {
120
- const req: PendingRequest = {
121
- id: this.nextId++,
122
- packagePath,
123
- databasePath,
124
- resolve,
125
- reject,
126
- };
127
- this.queue.push(req);
128
- this.drain();
129
- });
130
- }
131
-
132
- /**
133
- * Try to assign queued requests to idle workers. Cheap; called
134
- * after every enqueue and after every worker completes a request.
135
- */
136
- private drain(): void {
137
- for (let i = 0; i < this.workers.length; i++) {
138
- if (this.queue.length === 0) return;
139
- const slot = this.workers[i];
140
- if (slot.busy) continue;
141
- const req = this.queue.shift()!;
142
- slot.busy = true;
143
- this.inFlight.set(req.id, req);
144
- this.workerCurrentId.set(i, req.id);
145
- slot.worker.postMessage({
146
- id: req.id,
147
- packagePath: req.packagePath,
148
- databasePath: req.databasePath,
149
- });
150
- }
151
- }
152
-
153
- private spawn(index: number): WorkerSlot {
154
- const worker = new Worker(this.workerUrl);
155
- const slot: WorkerSlot = { worker, busy: false };
156
-
157
- worker.on(
158
- "message",
159
- (msg: {
160
- id: number;
161
- ok: boolean;
162
- result?: SchemaResult;
163
- error?: { message: string; stack?: string };
164
- }) => {
165
- const req = this.inFlight.get(msg.id);
166
- if (!req) {
167
- logger.warn("SchemaWorkerPool: response for unknown request", {
168
- id: msg.id,
169
- workerIndex: index,
170
- });
171
- return;
172
- }
173
- this.inFlight.delete(msg.id);
174
- this.workerCurrentId.delete(index);
175
- slot.busy = false;
176
- if (msg.ok && msg.result) {
177
- req.resolve(msg.result);
178
- } else {
179
- const err = new Error(msg.error?.message ?? "Unknown error");
180
- if (msg.error?.stack) err.stack = msg.error.stack;
181
- req.reject(err);
182
- }
183
- this.drain();
184
- },
185
- );
186
-
187
- worker.on("error", (err) => {
188
- // A native crash inside the worker — fail the in-flight request
189
- // attributed to this slot, then respawn the worker so the pool
190
- // self-heals. Without respawn, one crash silently shrinks
191
- // capacity and concurrent loads would queue forever.
192
- const inFlightId = this.workerCurrentId.get(index);
193
- if (inFlightId !== undefined) {
194
- const req = this.inFlight.get(inFlightId);
195
- if (req) {
196
- this.inFlight.delete(inFlightId);
197
- req.reject(err);
198
- }
199
- this.workerCurrentId.delete(index);
200
- }
201
- logger.error("SchemaWorkerPool: worker errored, respawning", {
202
- workerIndex: index,
203
- error: err,
204
- });
205
- if (!this.stopped) {
206
- this.workers[index] = this.spawn(index);
207
- this.drain();
208
- }
209
- });
210
-
211
- worker.on("exit", (code) => {
212
- if (this.stopped) return;
213
- if (code !== 0) {
214
- logger.warn("SchemaWorkerPool: worker exited unexpectedly", {
215
- workerIndex: index,
216
- code,
217
- });
218
- // Treat unexpected exit like an error: respawn so the pool
219
- // doesn't silently lose capacity.
220
- const inFlightId = this.workerCurrentId.get(index);
221
- if (inFlightId !== undefined) {
222
- const req = this.inFlight.get(inFlightId);
223
- if (req) {
224
- this.inFlight.delete(inFlightId);
225
- req.reject(
226
- new Error(`SchemaWorker exited with code ${code}`),
227
- );
228
- }
229
- this.workerCurrentId.delete(index);
230
- }
231
- this.workers[index] = this.spawn(index);
232
- this.drain();
233
- }
234
- });
235
-
236
- return slot;
237
- }
238
- }
239
-
240
- /**
241
- * Process-wide singleton. Constructed lazily so importing this module
242
- * doesn't spawn workers in test environments that never call
243
- * `getSchemaWorkerPool()`.
244
- *
245
- * The worker URL is resolved from `import.meta.url`, which lets Bun
246
- * load `schema_worker.ts` directly in dev and the bundled
247
- * `schema_worker.mjs` in prod (see `build.ts`).
248
- */
249
- let singleton: SchemaWorkerPool | null = null;
250
-
251
- export function getSchemaWorkerPool(): SchemaWorkerPool {
252
- if (!singleton) {
253
- const url = resolveWorkerUrl();
254
- const size = Number(process.env.PUBLISHER_SCHEMA_WORKER_POOL_SIZE) || 2;
255
- singleton = new SchemaWorkerPool(url, size);
256
- singleton.start();
257
- }
258
- return singleton;
259
- }
260
-
261
- function resolveWorkerUrl(): URL {
262
- // In dev (`bun --watch src/server.ts`), import.meta.url points at
263
- // `.../src/service/schema_worker_pool.ts` and the worker is the
264
- // sibling `.ts` file.
265
- //
266
- // In prod, this module gets inlined into `dist/server.mjs`, so
267
- // `import.meta.url` resolves to `dist/server.mjs`. Bun's bundler
268
- // nests outputs by their path relative to the common entrypoint
269
- // root (./src), so schema_worker lands at
270
- // `dist/service/schema_worker.mjs` — one directory below
271
- // server.mjs.
272
- const base = new URL(import.meta.url);
273
- const isBundled = base.pathname.endsWith(".mjs");
274
- return new URL(
275
- isBundled ? "./service/schema_worker.mjs" : "./schema_worker.ts",
276
- base,
277
- );
278
- }
@@ -1,235 +0,0 @@
1
- /// <reference types="bun-types" />
2
-
3
- /**
4
- * Regression test for per-environment load/scaffold races.
5
- *
6
- * Before fix: concurrent GET (especially ?reload=true), POST, and PATCH against
7
- * the same environment name could enter `getEnvironment` / `addEnvironment` in
8
- * parallel. Multiple callers would scaffold or re-load the same directory
9
- * concurrently, leaving publisher.db and on-disk state inconsistent. Lazy loads
10
- * then failed with `Environment "…" could not be resolved to a path.`
11
- *
12
- * After fix: environment operations serialize on a per-environment mutex in
13
- * `EnvironmentStore.getEnvironment`, so concurrent callers share one load path.
14
- * All N requests must succeed and the environment must remain usable afterwards.
15
- */
16
-
17
- import { afterAll, beforeAll, describe, expect, it } from "bun:test";
18
- import { RestE2EEnv, startRestE2E } from "../../harness/rest_e2e";
19
-
20
- const ENV_NAME = `concurrent-environment-test-env-${Date.now()}`;
21
- const PACKAGE_NAME = "gcs_faa";
22
- const FIXTURE_LOCATION = "gs://publisher_test_packages/gcs_faa.zip";
23
- const CONCURRENCY = 12;
24
-
25
- const FORBIDDEN_ERROR_FRAGMENTS = [
26
- "could not be resolved to a path",
27
- "Package manifest for",
28
- "does not exist",
29
- "compiling model path not found",
30
- "model path not found",
31
- ];
32
-
33
- function findForbiddenError(body: unknown): string | undefined {
34
- const text = typeof body === "string" ? body : JSON.stringify(body ?? "");
35
- return FORBIDDEN_ERROR_FRAGMENTS.find((frag) => text.includes(frag));
36
- }
37
-
38
- function environmentPayload(description?: string) {
39
- return {
40
- name: ENV_NAME,
41
- packages: [{ name: PACKAGE_NAME, location: FIXTURE_LOCATION }],
42
- connections: [],
43
- ...(description !== undefined ? { description } : {}),
44
- };
45
- }
46
-
47
- async function waitForPackageReady(
48
- baseUrl: string,
49
- deadlineMs = 30_000,
50
- ): Promise<void> {
51
- const deadline = Date.now() + deadlineMs;
52
- while (Date.now() < deadline) {
53
- try {
54
- const res = await fetch(
55
- `${baseUrl}/api/v0/environments/${ENV_NAME}/packages/${PACKAGE_NAME}`,
56
- );
57
- if (res.ok) {
58
- return;
59
- }
60
- } catch {
61
- // not ready yet
62
- }
63
- await new Promise((r) => setTimeout(r, 250));
64
- }
65
- throw new Error("Seeded package did not become available in time");
66
- }
67
-
68
- describe("Concurrent environment operations (E2E)", () => {
69
- let env: (RestE2EEnv & { stop(): Promise<void> }) | null = null;
70
- let baseUrl: string;
71
-
72
- beforeAll(async () => {
73
- env = await startRestE2E();
74
- baseUrl = env.baseUrl;
75
-
76
- const createRes = await fetch(`${baseUrl}/api/v0/environments`, {
77
- method: "POST",
78
- headers: { "Content-Type": "application/json" },
79
- body: JSON.stringify(environmentPayload()),
80
- });
81
- if (!createRes.ok) {
82
- const body = await createRes.text();
83
- throw new Error(
84
- `Failed to seed test environment (${createRes.status}): ${body}`,
85
- );
86
- }
87
- await waitForPackageReady(baseUrl);
88
- });
89
-
90
- afterAll(async () => {
91
- if (baseUrl) {
92
- try {
93
- await fetch(`${baseUrl}/api/v0/environments/${ENV_NAME}`, {
94
- method: "DELETE",
95
- });
96
- } catch {
97
- // best-effort cleanup
98
- }
99
- }
100
- await env?.stop();
101
- env = null;
102
- });
103
-
104
- it("concurrent POST /environments for the same name all succeed", async () => {
105
- const requests = Array.from({ length: CONCURRENCY }, () =>
106
- fetch(`${baseUrl}/api/v0/environments`, {
107
- method: "POST",
108
- headers: { "Content-Type": "application/json" },
109
- body: JSON.stringify(environmentPayload()),
110
- }),
111
- );
112
-
113
- const responses = await Promise.all(requests);
114
- const bodies = await Promise.all(
115
- responses.map(async (r) => ({
116
- status: r.status,
117
- body: await r.json().catch(() => null),
118
- })),
119
- );
120
-
121
- for (const { status, body } of bodies) {
122
- expect(status).toBe(200);
123
- const forbidden = findForbiddenError(body);
124
- expect(forbidden).toBeUndefined();
125
- const meta = body as { name?: string };
126
- expect(meta.name).toBe(ENV_NAME);
127
- }
128
-
129
- await waitForPackageReady(baseUrl);
130
-
131
- const getRes = await fetch(`${baseUrl}/api/v0/environments/${ENV_NAME}`);
132
- expect(getRes.status).toBe(200);
133
- const forbidden = findForbiddenError(
134
- await getRes.json().catch(() => null),
135
- );
136
- expect(forbidden).toBeUndefined();
137
- });
138
-
139
- it("concurrent GET /environments/:name?reload=true all succeed", async () => {
140
- const requests = Array.from({ length: CONCURRENCY }, () =>
141
- fetch(`${baseUrl}/api/v0/environments/${ENV_NAME}?reload=true`),
142
- );
143
- const responses = await Promise.all(requests);
144
- const bodies = await Promise.all(
145
- responses.map(async (r) => ({
146
- status: r.status,
147
- body: await r.json().catch(() => null),
148
- })),
149
- );
150
-
151
- for (const { status, body } of bodies) {
152
- expect(status).toBe(200);
153
- const forbidden = findForbiddenError(body);
154
- expect(forbidden).toBeUndefined();
155
- const meta = body as { name?: string };
156
- expect(meta.name).toBe(ENV_NAME);
157
- }
158
-
159
- await waitForPackageReady(baseUrl);
160
- });
161
-
162
- it("simultaneous POST + PATCH for the same environment serialize cleanly", async () => {
163
- const newReadme = `concurrent-env-update-${Date.now()}`;
164
- const [postRes, patchRes] = await Promise.all([
165
- fetch(`${baseUrl}/api/v0/environments`, {
166
- method: "POST",
167
- headers: { "Content-Type": "application/json" },
168
- body: JSON.stringify(environmentPayload()),
169
- }),
170
- fetch(`${baseUrl}/api/v0/environments/${ENV_NAME}`, {
171
- method: "PATCH",
172
- headers: { "Content-Type": "application/json" },
173
- body: JSON.stringify({
174
- name: ENV_NAME,
175
- readme: newReadme,
176
- }),
177
- }),
178
- ]);
179
-
180
- expect(postRes.status).toBe(200);
181
- expect(
182
- findForbiddenError(await postRes.json().catch(() => null)),
183
- ).toBeUndefined();
184
-
185
- const patchBody = await patchRes.json().catch(() => null);
186
- expect(findForbiddenError(patchBody)).toBeUndefined();
187
- expect([200, 404]).toContain(patchRes.status);
188
-
189
- const getRes = await fetch(`${baseUrl}/api/v0/environments/${ENV_NAME}`);
190
- expect(getRes.status).toBe(200);
191
- const meta = (await getRes.json()) as { name?: string; readme?: string };
192
- expect(meta.name).toBe(ENV_NAME);
193
- });
194
-
195
- it("interleaved POST + GET-reload + package list never surface path errors", async () => {
196
- const work: Array<Promise<{ status: number; body: unknown }>> = [];
197
- for (let i = 0; i < CONCURRENCY; i++) {
198
- work.push(
199
- fetch(`${baseUrl}/api/v0/environments`, {
200
- method: "POST",
201
- headers: { "Content-Type": "application/json" },
202
- body: JSON.stringify(environmentPayload()),
203
- }).then(async (r) => ({
204
- status: r.status,
205
- body: await r.json().catch(() => null),
206
- })),
207
- );
208
- work.push(
209
- fetch(
210
- `${baseUrl}/api/v0/environments/${ENV_NAME}?reload=true`,
211
- ).then(async (r) => ({
212
- status: r.status,
213
- body: await r.json().catch(() => null),
214
- })),
215
- );
216
- work.push(
217
- fetch(`${baseUrl}/api/v0/environments/${ENV_NAME}/packages`).then(
218
- async (r) => ({
219
- status: r.status,
220
- body: await r.json().catch(() => null),
221
- }),
222
- ),
223
- );
224
- }
225
-
226
- const results = await Promise.all(work);
227
- for (const { status, body } of results) {
228
- const forbidden = findForbiddenError(body);
229
- expect(forbidden).toBeUndefined();
230
- expect(status).toBeLessThan(500);
231
- }
232
-
233
- await waitForPackageReady(baseUrl);
234
- });
235
- });