@malloy-publisher/server 0.0.198-dev3 → 0.0.198-dev4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build.ts +22 -12
- package/dist/compile_worker.mjs +633 -0
- package/dist/instrumentation.mjs +36 -57
- package/dist/server.mjs +3240 -2259
- package/package.json +3 -2
- package/src/compile/compile_pool.spec.ts +292 -0
- package/src/compile/compile_pool.ts +796 -0
- package/src/compile/compile_worker.ts +721 -0
- package/src/compile/protocol.ts +270 -0
- package/src/health.ts +13 -0
- package/src/instrumentation.ts +0 -50
- package/src/server.ts +0 -5
- package/src/service/environment_store.ts +3 -33
- package/src/service/model.ts +226 -3
- package/src/service/model_worker_path.spec.ts +133 -0
- package/src/service/package.spec.ts +7 -11
- package/src/service/package.ts +156 -49
- package/dist/service/schema_worker.mjs +0 -61
- package/src/service/process_stats_reporter.ts +0 -169
- package/src/service/schema_worker.ts +0 -123
- package/src/service/schema_worker_pool.ts +0 -287
- package/tests/integration/concurrent_environment/concurrent_environment.integration.spec.ts +0 -235
|
@@ -0,0 +1,796 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Main-thread half of the compile worker pool.
|
|
3
|
+
*
|
|
4
|
+
* Why this exists: Malloy compile is pure JavaScript and runs on the
|
|
5
|
+
* V8 main thread. A large package can hold the event loop for many
|
|
6
|
+
* seconds, which is long enough to time out the Kubernetes
|
|
7
|
+
* `/health/liveness` probe (timeout=5s, see worker/main.tf). Moving
|
|
8
|
+
* compile into `worker_threads` keeps the main loop responsive
|
|
9
|
+
* regardless of compile cost.
|
|
10
|
+
*
|
|
11
|
+
* Public surface:
|
|
12
|
+
* - `getCompilePool()` — lazy singleton; respects MALLOY_COMPILE_WORKERS
|
|
13
|
+
* - `pool.compile({...})` — submit one model for off-thread compile
|
|
14
|
+
* - `pool.shutdown()` — graceful drain (called from health.ts)
|
|
15
|
+
*
|
|
16
|
+
* Lifecycle:
|
|
17
|
+
* - Workers are spawned lazily on first use, up to N (configurable).
|
|
18
|
+
* - Each worker emits {type:'ready'} once initialized; the pool
|
|
19
|
+
* waits for that before dispatching to a newly-spawned worker.
|
|
20
|
+
* - Jobs are dispatched to the worker with the fewest in-flight
|
|
21
|
+
* jobs (least-busy load balancing). A worker can hold multiple
|
|
22
|
+
* jobs concurrently because compile interleaves on `await`s.
|
|
23
|
+
* - On worker `exit` (uncaught error, OOM), the pool fails any
|
|
24
|
+
* in-flight jobs on that worker with a clean error, then respawns
|
|
25
|
+
* lazily on the next request.
|
|
26
|
+
*
|
|
27
|
+
* Schema-fetch RPC routing:
|
|
28
|
+
* - When a worker proxies `fetchSchemaForTables` back to the main
|
|
29
|
+
* thread, the message includes a `jobId`. The pool keeps a
|
|
30
|
+
* `jobId → MalloyConfig` map for in-flight jobs so it knows which
|
|
31
|
+
* live connection to dispatch the request to. The mapping is
|
|
32
|
+
* installed when the job is submitted and removed when it
|
|
33
|
+
* resolves.
|
|
34
|
+
*/
|
|
35
|
+
import type {
|
|
36
|
+
Annotation,
|
|
37
|
+
FetchSchemaOptions,
|
|
38
|
+
InfoConnection,
|
|
39
|
+
LookupConnection,
|
|
40
|
+
MalloyConfig,
|
|
41
|
+
ModelDef,
|
|
42
|
+
SQLSourceDef,
|
|
43
|
+
TableSourceDef,
|
|
44
|
+
} from "@malloydata/malloy";
|
|
45
|
+
import * as Malloy from "@malloydata/malloy-interfaces";
|
|
46
|
+
import { fileURLToPath } from "url";
|
|
47
|
+
import { dirname, join } from "path";
|
|
48
|
+
import { Worker } from "node:worker_threads";
|
|
49
|
+
import { ModelCompilationError } from "../errors";
|
|
50
|
+
import { logger } from "../logger";
|
|
51
|
+
import type { FilterDefinition } from "../service/filter";
|
|
52
|
+
import type {
|
|
53
|
+
CompileJobError,
|
|
54
|
+
CompileJobRequest,
|
|
55
|
+
CompileJobResult,
|
|
56
|
+
ConnectionMetadataRequest,
|
|
57
|
+
ConnectionMetadataResponse,
|
|
58
|
+
MainToWorkerMessage,
|
|
59
|
+
ReadUrlRequest,
|
|
60
|
+
ReadUrlResponse,
|
|
61
|
+
RpcErrorResponse,
|
|
62
|
+
SchemaForSqlRequest,
|
|
63
|
+
SchemaForSqlResponse,
|
|
64
|
+
SchemaForTablesRequest,
|
|
65
|
+
SchemaForTablesResponse,
|
|
66
|
+
SerializedError,
|
|
67
|
+
WorkerToMainMessage,
|
|
68
|
+
} from "./protocol";
|
|
69
|
+
|
|
70
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
71
|
+
// Configuration
|
|
72
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Returns the configured worker pool size, or 0 if disabled.
|
|
76
|
+
*
|
|
77
|
+
* Defaults to 0 (off) so the change ships dark and only takes effect
|
|
78
|
+
* where Terraform / operator config opts in via
|
|
79
|
+
* `MALLOY_COMPILE_WORKERS=N`. This lets us land the feature, validate
|
|
80
|
+
* it in staging, and roll out per cluster without surprising any
|
|
81
|
+
* environment that's happy with the in-process compile path.
|
|
82
|
+
*/
|
|
83
|
+
export function getCompileWorkerCount(): number {
|
|
84
|
+
const raw = process.env.MALLOY_COMPILE_WORKERS;
|
|
85
|
+
if (raw === undefined) return 0;
|
|
86
|
+
const parsed = Number.parseInt(raw, 10);
|
|
87
|
+
if (!Number.isFinite(parsed) || parsed < 0) return 0;
|
|
88
|
+
return parsed;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Wall-clock cap for a single job — if a worker hangs, we don't
|
|
92
|
+
// want to leak a Promise forever. Twice the K8s liveness threshold
|
|
93
|
+
// (5s × 15 fails = 75s) gives us a comfortable margin against false
|
|
94
|
+
// timeouts during cold starts while still failing fast on real hangs.
|
|
95
|
+
const COMPILE_JOB_TIMEOUT_MS = Number.parseInt(
|
|
96
|
+
process.env.MALLOY_COMPILE_JOB_TIMEOUT_MS ?? "120000",
|
|
97
|
+
10,
|
|
98
|
+
);
|
|
99
|
+
|
|
100
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
101
|
+
// Worker-side stub of the job request, plus the resolved connection
|
|
102
|
+
// lookup the pool services schema RPCs against.
|
|
103
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
104
|
+
|
|
105
|
+
interface JobContext {
|
|
106
|
+
jobId: string;
|
|
107
|
+
connections: LookupConnection<InfoConnection>;
|
|
108
|
+
urlReader: CompileUrlReader;
|
|
109
|
+
resolve: (result: CompileJobResult) => void;
|
|
110
|
+
reject: (err: Error) => void;
|
|
111
|
+
timeout: ReturnType<typeof setTimeout>;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
interface PoolWorker {
|
|
115
|
+
id: number;
|
|
116
|
+
worker: Worker;
|
|
117
|
+
ready: Promise<void>;
|
|
118
|
+
inFlight: Set<string>; // job ids
|
|
119
|
+
exited: boolean;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Minimal URL-reader shape the pool dispatches to. Intentionally
|
|
124
|
+
* narrower than Malloy's `URLReader` (which can also return
|
|
125
|
+
* `{contents, invalidationKey}`) — the pool only needs the string
|
|
126
|
+
* payload to forward back to the worker.
|
|
127
|
+
*/
|
|
128
|
+
export interface CompileUrlReader {
|
|
129
|
+
readURL(
|
|
130
|
+
url: URL,
|
|
131
|
+
): Promise<string | { contents: string; invalidationKey?: unknown }>;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
export interface CompileRequest {
|
|
135
|
+
packagePath: string;
|
|
136
|
+
modelPath: string;
|
|
137
|
+
/**
|
|
138
|
+
* The live MalloyConfig. We don't ship it across the worker
|
|
139
|
+
* boundary; we hold it on the main side and answer the worker's
|
|
140
|
+
* proxy RPCs against `config.connections`.
|
|
141
|
+
*/
|
|
142
|
+
malloyConfig: MalloyConfig;
|
|
143
|
+
/** Default connection name (passed verbatim to the worker). */
|
|
144
|
+
defaultConnectionName: string | null;
|
|
145
|
+
/** Custom URLReader for non-file:// imports; falls back to fs in the worker. */
|
|
146
|
+
urlReader?: CompileUrlReader;
|
|
147
|
+
/** Optional buildManifest passed through to Malloy Runtime. */
|
|
148
|
+
buildManifest?: unknown;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Inline-source variant of {@link CompileRequest}. The worker compiles
|
|
153
|
+
* a Malloy string directly instead of reading a file. Mainly used by
|
|
154
|
+
* call sites that synthesize Malloy on the fly — `Package.getDatabaseInfo`
|
|
155
|
+
* probes each `.parquet`/`.csv` with a one-line
|
|
156
|
+
* `source: temp is duckdb.table('<path>')` snippet, and previously paid
|
|
157
|
+
* a main-thread Malloy compile per data file. Routing the snippet
|
|
158
|
+
* through the worker pool keeps that cost off the event loop.
|
|
159
|
+
*
|
|
160
|
+
* The pool still owns the live MalloyConfig and services schema-fetch
|
|
161
|
+
* RPCs back from the worker against it, identical to the file-backed
|
|
162
|
+
* compile path.
|
|
163
|
+
*/
|
|
164
|
+
export interface CompileInlineRequest {
|
|
165
|
+
packagePath: string;
|
|
166
|
+
/** The Malloy source string to compile. */
|
|
167
|
+
source: string;
|
|
168
|
+
/** Base URL for `import "…"` resolution; defaults to the package root. */
|
|
169
|
+
importBaseURL?: string;
|
|
170
|
+
malloyConfig: MalloyConfig;
|
|
171
|
+
defaultConnectionName: string | null;
|
|
172
|
+
urlReader?: CompileUrlReader;
|
|
173
|
+
buildManifest?: unknown;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
export interface CompileOutcome {
|
|
177
|
+
modelDef: ModelDef;
|
|
178
|
+
sourceInfos: Malloy.SourceInfo[];
|
|
179
|
+
sources: unknown[];
|
|
180
|
+
queries: unknown[];
|
|
181
|
+
filterMap: Map<string, FilterDefinition[]>;
|
|
182
|
+
/** Pre-converted API-shape givens or undefined if the model declared none. */
|
|
183
|
+
givens?: unknown[];
|
|
184
|
+
compileDurationMs: number;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
188
|
+
// Path to the bundled worker script
|
|
189
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Locate the worker entrypoint. In production builds it's bundled to
|
|
193
|
+
* `dist/compile_worker.mjs` next to `server.mjs`. In dev (bun --watch)
|
|
194
|
+
* the source `.ts` next to this file is loaded directly — Bun supports
|
|
195
|
+
* `new Worker(<ts-url>)`. Falling back to the source path also keeps
|
|
196
|
+
* `bun test` happy without a build step.
|
|
197
|
+
*/
|
|
198
|
+
function resolveWorkerScript(): URL {
|
|
199
|
+
const thisFile = fileURLToPath(import.meta.url);
|
|
200
|
+
const thisDir = dirname(thisFile);
|
|
201
|
+
// dist layout: dist/compile_worker.mjs sibling to dist/server.mjs.
|
|
202
|
+
// The bundler emits compile_pool.ts inlined into server.mjs (it's
|
|
203
|
+
// not a separate entry), but compile_worker.mjs IS a separate entry
|
|
204
|
+
// so `new Worker(...)` can load it. See build.ts.
|
|
205
|
+
const distCandidate = join(thisDir, "compile_worker.mjs");
|
|
206
|
+
if (thisFile.endsWith(".mjs") || thisFile.endsWith(".js")) {
|
|
207
|
+
return new URL(`file://${distCandidate}`);
|
|
208
|
+
}
|
|
209
|
+
// Dev / test: load the .ts directly.
|
|
210
|
+
const tsCandidate = join(thisDir, "compile_worker.ts");
|
|
211
|
+
return new URL(`file://${tsCandidate}`);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
215
|
+
// The pool
|
|
216
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
217
|
+
|
|
218
|
+
export class CompileWorkerPool {
|
|
219
|
+
private readonly workers: PoolWorker[] = [];
|
|
220
|
+
private readonly maxWorkers: number;
|
|
221
|
+
private nextWorkerId = 0;
|
|
222
|
+
private readonly jobs = new Map<string, JobContext>();
|
|
223
|
+
private nextJobId = 0;
|
|
224
|
+
private shuttingDown = false;
|
|
225
|
+
private readonly workerScript: URL;
|
|
226
|
+
|
|
227
|
+
constructor(maxWorkers: number, workerScript?: URL) {
|
|
228
|
+
this.maxWorkers = maxWorkers;
|
|
229
|
+
this.workerScript = workerScript ?? resolveWorkerScript();
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
get enabled(): boolean {
|
|
233
|
+
return this.maxWorkers > 0;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
get size(): number {
|
|
237
|
+
return this.workers.filter((w) => !w.exited).length;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
async compile(request: CompileRequest): Promise<CompileOutcome> {
|
|
241
|
+
return this.dispatchJob({
|
|
242
|
+
label: `model=${request.modelPath}`,
|
|
243
|
+
malloyConfig: request.malloyConfig,
|
|
244
|
+
urlReader: request.urlReader,
|
|
245
|
+
buildMessage: (jobId) => ({
|
|
246
|
+
type: "compile",
|
|
247
|
+
requestId: jobId,
|
|
248
|
+
packagePath: request.packagePath,
|
|
249
|
+
modelPath: request.modelPath,
|
|
250
|
+
defaultConnectionName: request.defaultConnectionName,
|
|
251
|
+
buildManifest: request.buildManifest,
|
|
252
|
+
}),
|
|
253
|
+
});
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Compile an inline Malloy source string off the main thread. The
|
|
258
|
+
* worker uses `runtime.loadModel(source, {…})` instead of resolving a
|
|
259
|
+
* file:// URL. Schema fetches still proxy back to the main thread via
|
|
260
|
+
* the live MalloyConfig the caller supplies. See {@link CompileInlineRequest}.
|
|
261
|
+
*/
|
|
262
|
+
async compileInline(request: CompileInlineRequest): Promise<CompileOutcome> {
|
|
263
|
+
return this.dispatchJob({
|
|
264
|
+
label: "inline-source",
|
|
265
|
+
malloyConfig: request.malloyConfig,
|
|
266
|
+
urlReader: request.urlReader,
|
|
267
|
+
buildMessage: (jobId) => ({
|
|
268
|
+
type: "compile",
|
|
269
|
+
requestId: jobId,
|
|
270
|
+
packagePath: request.packagePath,
|
|
271
|
+
inlineSource: request.source,
|
|
272
|
+
importBaseURL: request.importBaseURL,
|
|
273
|
+
defaultConnectionName: request.defaultConnectionName,
|
|
274
|
+
buildManifest: request.buildManifest,
|
|
275
|
+
}),
|
|
276
|
+
});
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* Shared dispatch helper for both file- and inline-source compiles.
|
|
281
|
+
* Allocates a jobId, registers the JobContext (so the pool can route
|
|
282
|
+
* schema RPCs from the worker back to the right MalloyConfig),
|
|
283
|
+
* posts the message, and returns a Promise that resolves with the
|
|
284
|
+
* compile outcome (or rejects on worker error / timeout).
|
|
285
|
+
*/
|
|
286
|
+
private async dispatchJob(opts: {
|
|
287
|
+
label: string;
|
|
288
|
+
malloyConfig: MalloyConfig;
|
|
289
|
+
urlReader: CompileUrlReader | undefined;
|
|
290
|
+
buildMessage: (jobId: string) => CompileJobRequest;
|
|
291
|
+
}): Promise<CompileOutcome> {
|
|
292
|
+
if (!this.enabled) {
|
|
293
|
+
throw new Error(
|
|
294
|
+
"CompileWorkerPool called while disabled (MALLOY_COMPILE_WORKERS=0)",
|
|
295
|
+
);
|
|
296
|
+
}
|
|
297
|
+
if (this.shuttingDown) {
|
|
298
|
+
throw new Error("CompileWorkerPool is shutting down");
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
const worker = await this.acquireWorker();
|
|
302
|
+
this.nextJobId += 1;
|
|
303
|
+
const jobId = `job-${this.nextJobId}`;
|
|
304
|
+
|
|
305
|
+
return new Promise<CompileOutcome>((resolve, reject) => {
|
|
306
|
+
const timeout = setTimeout(() => {
|
|
307
|
+
this.failJob(
|
|
308
|
+
jobId,
|
|
309
|
+
new Error(
|
|
310
|
+
`Compile job timed out after ${COMPILE_JOB_TIMEOUT_MS}ms (${opts.label})`,
|
|
311
|
+
),
|
|
312
|
+
);
|
|
313
|
+
}, COMPILE_JOB_TIMEOUT_MS);
|
|
314
|
+
|
|
315
|
+
this.jobs.set(jobId, {
|
|
316
|
+
jobId,
|
|
317
|
+
connections: opts.malloyConfig.connections,
|
|
318
|
+
urlReader: opts.urlReader ?? defaultUrlReader,
|
|
319
|
+
resolve: (result) => {
|
|
320
|
+
clearTimeout(timeout);
|
|
321
|
+
resolve(adaptResult(result));
|
|
322
|
+
},
|
|
323
|
+
reject: (err) => {
|
|
324
|
+
clearTimeout(timeout);
|
|
325
|
+
reject(err);
|
|
326
|
+
},
|
|
327
|
+
timeout,
|
|
328
|
+
});
|
|
329
|
+
|
|
330
|
+
worker.inFlight.add(jobId);
|
|
331
|
+
worker.worker.postMessage(opts.buildMessage(jobId));
|
|
332
|
+
});
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* Drain every in-flight job and terminate the workers. Safe to
|
|
337
|
+
* call multiple times. Awaits each worker's `exit` event.
|
|
338
|
+
*/
|
|
339
|
+
async shutdown(): Promise<void> {
|
|
340
|
+
if (this.shuttingDown) return;
|
|
341
|
+
this.shuttingDown = true;
|
|
342
|
+
const exits = this.workers.map(
|
|
343
|
+
(pw) =>
|
|
344
|
+
new Promise<void>((resolve) => {
|
|
345
|
+
if (pw.exited) {
|
|
346
|
+
resolve();
|
|
347
|
+
return;
|
|
348
|
+
}
|
|
349
|
+
pw.worker.once("exit", () => resolve());
|
|
350
|
+
const msg: { type: "shutdown" } = { type: "shutdown" };
|
|
351
|
+
pw.worker.postMessage(msg);
|
|
352
|
+
// Hard ceiling — if a worker won't exit, terminate it.
|
|
353
|
+
setTimeout(() => {
|
|
354
|
+
if (!pw.exited) {
|
|
355
|
+
void pw.worker.terminate().finally(() => resolve());
|
|
356
|
+
}
|
|
357
|
+
}, 10_000);
|
|
358
|
+
}),
|
|
359
|
+
);
|
|
360
|
+
await Promise.all(exits);
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// ────────────────────────────────────────────────────────────────
|
|
364
|
+
// Internals
|
|
365
|
+
// ────────────────────────────────────────────────────────────────
|
|
366
|
+
|
|
367
|
+
private async acquireWorker(): Promise<PoolWorker> {
|
|
368
|
+
// Prune any dead workers from the front so size accounting is
|
|
369
|
+
// honest; spawn lazily up to maxWorkers.
|
|
370
|
+
this.workers.splice(
|
|
371
|
+
0,
|
|
372
|
+
this.workers.length,
|
|
373
|
+
...this.workers.filter((w) => !w.exited),
|
|
374
|
+
);
|
|
375
|
+
if (this.workers.length < this.maxWorkers) {
|
|
376
|
+
const pw = this.spawnWorker();
|
|
377
|
+
this.workers.push(pw);
|
|
378
|
+
await pw.ready;
|
|
379
|
+
return pw;
|
|
380
|
+
}
|
|
381
|
+
// Least-busy
|
|
382
|
+
const alive = this.workers.filter((w) => !w.exited);
|
|
383
|
+
let best = alive[0];
|
|
384
|
+
for (const candidate of alive) {
|
|
385
|
+
if (candidate.inFlight.size < best.inFlight.size) {
|
|
386
|
+
best = candidate;
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
await best.ready;
|
|
390
|
+
return best;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
private spawnWorker(): PoolWorker {
|
|
394
|
+
this.nextWorkerId += 1;
|
|
395
|
+
const id = this.nextWorkerId;
|
|
396
|
+
logger.info(
|
|
397
|
+
`CompileWorkerPool: spawning worker #${id} (script=${this.workerScript.toString()})`,
|
|
398
|
+
);
|
|
399
|
+
const worker = new Worker(this.workerScript, {
|
|
400
|
+
// Worker stderr/stdout flow through the parent's by default;
|
|
401
|
+
// we don't override.
|
|
402
|
+
name: `malloy-compile-worker-${id}`,
|
|
403
|
+
});
|
|
404
|
+
let readyResolve!: () => void;
|
|
405
|
+
const ready = new Promise<void>((r) => (readyResolve = r));
|
|
406
|
+
|
|
407
|
+
const pw: PoolWorker = {
|
|
408
|
+
id,
|
|
409
|
+
worker,
|
|
410
|
+
ready,
|
|
411
|
+
inFlight: new Set(),
|
|
412
|
+
exited: false,
|
|
413
|
+
};
|
|
414
|
+
|
|
415
|
+
worker.on("message", (msg: WorkerToMainMessage) => {
|
|
416
|
+
this.handleWorkerMessage(pw, msg, readyResolve);
|
|
417
|
+
});
|
|
418
|
+
|
|
419
|
+
worker.on("error", (err) => {
|
|
420
|
+
logger.error(`CompileWorkerPool: worker #${id} errored`, {
|
|
421
|
+
error: err,
|
|
422
|
+
});
|
|
423
|
+
});
|
|
424
|
+
|
|
425
|
+
worker.on("exit", (code) => {
|
|
426
|
+
pw.exited = true;
|
|
427
|
+
logger.warn(
|
|
428
|
+
`CompileWorkerPool: worker #${id} exited (code=${code}, inFlight=${pw.inFlight.size})`,
|
|
429
|
+
);
|
|
430
|
+
// Fail any jobs that were running on this worker — don't
|
|
431
|
+
// strand callers waiting for a result that will never come.
|
|
432
|
+
for (const jobId of pw.inFlight) {
|
|
433
|
+
this.failJob(
|
|
434
|
+
jobId,
|
|
435
|
+
new Error(
|
|
436
|
+
`Compile worker #${id} exited unexpectedly (code=${code})`,
|
|
437
|
+
),
|
|
438
|
+
);
|
|
439
|
+
}
|
|
440
|
+
pw.inFlight.clear();
|
|
441
|
+
});
|
|
442
|
+
|
|
443
|
+
return pw;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
private handleWorkerMessage(
|
|
447
|
+
pw: PoolWorker,
|
|
448
|
+
msg: WorkerToMainMessage,
|
|
449
|
+
markReady: () => void,
|
|
450
|
+
): void {
|
|
451
|
+
switch (msg.type) {
|
|
452
|
+
case "ready":
|
|
453
|
+
markReady();
|
|
454
|
+
return;
|
|
455
|
+
case "compile-result":
|
|
456
|
+
this.completeJob(pw, msg);
|
|
457
|
+
return;
|
|
458
|
+
case "compile-error":
|
|
459
|
+
this.errorJob(pw, msg);
|
|
460
|
+
return;
|
|
461
|
+
case "connection-metadata":
|
|
462
|
+
void this.handleConnectionMetadata(pw, msg);
|
|
463
|
+
return;
|
|
464
|
+
case "schema-for-tables":
|
|
465
|
+
void this.handleSchemaForTables(pw, msg);
|
|
466
|
+
return;
|
|
467
|
+
case "schema-for-sql":
|
|
468
|
+
void this.handleSchemaForSql(pw, msg);
|
|
469
|
+
return;
|
|
470
|
+
case "read-url":
|
|
471
|
+
void this.handleReadUrl(pw, msg);
|
|
472
|
+
return;
|
|
473
|
+
default: {
|
|
474
|
+
const exhaustive: never = msg;
|
|
475
|
+
void exhaustive;
|
|
476
|
+
return;
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
private async handleConnectionMetadata(
|
|
482
|
+
pw: PoolWorker,
|
|
483
|
+
msg: ConnectionMetadataRequest,
|
|
484
|
+
): Promise<void> {
|
|
485
|
+
const ctx = this.jobs.get(msg.jobId);
|
|
486
|
+
const reply = (
|
|
487
|
+
response: ConnectionMetadataResponse | RpcErrorResponse,
|
|
488
|
+
): void => {
|
|
489
|
+
pw.worker.postMessage(response as MainToWorkerMessage);
|
|
490
|
+
};
|
|
491
|
+
if (!ctx) {
|
|
492
|
+
reply({
|
|
493
|
+
type: "rpc-error",
|
|
494
|
+
requestId: msg.requestId,
|
|
495
|
+
ok: false,
|
|
496
|
+
error: { name: "Error", message: `Unknown jobId ${msg.jobId}` },
|
|
497
|
+
});
|
|
498
|
+
return;
|
|
499
|
+
}
|
|
500
|
+
try {
|
|
501
|
+
const conn = await ctx.connections.lookupConnection(
|
|
502
|
+
msg.connectionName,
|
|
503
|
+
);
|
|
504
|
+
reply({
|
|
505
|
+
type: "connection-metadata-response",
|
|
506
|
+
requestId: msg.requestId,
|
|
507
|
+
ok: true,
|
|
508
|
+
metadata: {
|
|
509
|
+
name: msg.connectionName,
|
|
510
|
+
dialectName: conn.dialectName,
|
|
511
|
+
digest:
|
|
512
|
+
typeof conn.getDigest === "function"
|
|
513
|
+
? conn.getDigest()
|
|
514
|
+
: msg.connectionName,
|
|
515
|
+
},
|
|
516
|
+
});
|
|
517
|
+
} catch (error) {
|
|
518
|
+
reply({
|
|
519
|
+
type: "rpc-error",
|
|
520
|
+
requestId: msg.requestId,
|
|
521
|
+
ok: false,
|
|
522
|
+
error: serializeError(error),
|
|
523
|
+
});
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
private completeJob(pw: PoolWorker, msg: CompileJobResult): void {
|
|
528
|
+
const ctx = this.jobs.get(msg.requestId);
|
|
529
|
+
if (!ctx) return;
|
|
530
|
+
this.jobs.delete(msg.requestId);
|
|
531
|
+
pw.inFlight.delete(msg.requestId);
|
|
532
|
+
ctx.resolve(msg);
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
private errorJob(pw: PoolWorker, msg: CompileJobError): void {
|
|
536
|
+
const ctx = this.jobs.get(msg.requestId);
|
|
537
|
+
if (!ctx) return;
|
|
538
|
+
this.jobs.delete(msg.requestId);
|
|
539
|
+
pw.inFlight.delete(msg.requestId);
|
|
540
|
+
ctx.reject(deserializeError(msg.error));
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
private failJob(jobId: string, error: Error): void {
|
|
544
|
+
const ctx = this.jobs.get(jobId);
|
|
545
|
+
if (!ctx) return;
|
|
546
|
+
this.jobs.delete(jobId);
|
|
547
|
+
ctx.reject(error);
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
private async handleSchemaForTables(
|
|
551
|
+
pw: PoolWorker,
|
|
552
|
+
msg: SchemaForTablesRequest,
|
|
553
|
+
): Promise<void> {
|
|
554
|
+
const ctx = this.jobs.get(msg.jobId);
|
|
555
|
+
const reply = (
|
|
556
|
+
response: SchemaForTablesResponse | RpcErrorResponse,
|
|
557
|
+
): void => {
|
|
558
|
+
pw.worker.postMessage(response as MainToWorkerMessage);
|
|
559
|
+
};
|
|
560
|
+
if (!ctx) {
|
|
561
|
+
reply({
|
|
562
|
+
type: "rpc-error",
|
|
563
|
+
requestId: msg.requestId,
|
|
564
|
+
ok: false,
|
|
565
|
+
error: { name: "Error", message: `Unknown jobId ${msg.jobId}` },
|
|
566
|
+
});
|
|
567
|
+
return;
|
|
568
|
+
}
|
|
569
|
+
try {
|
|
570
|
+
const conn = await ctx.connections.lookupConnection(
|
|
571
|
+
msg.connectionName,
|
|
572
|
+
);
|
|
573
|
+
const result = await conn.fetchSchemaForTables(
|
|
574
|
+
msg.tables,
|
|
575
|
+
buildFetchOptions(msg.options),
|
|
576
|
+
);
|
|
577
|
+
reply({
|
|
578
|
+
type: "schema-for-tables-response",
|
|
579
|
+
requestId: msg.requestId,
|
|
580
|
+
ok: true,
|
|
581
|
+
schemas: result.schemas as Record<string, TableSourceDef>,
|
|
582
|
+
errors: result.errors,
|
|
583
|
+
});
|
|
584
|
+
} catch (error) {
|
|
585
|
+
reply({
|
|
586
|
+
type: "rpc-error",
|
|
587
|
+
requestId: msg.requestId,
|
|
588
|
+
ok: false,
|
|
589
|
+
error: serializeError(error),
|
|
590
|
+
});
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
private async handleSchemaForSql(
|
|
595
|
+
pw: PoolWorker,
|
|
596
|
+
msg: SchemaForSqlRequest,
|
|
597
|
+
): Promise<void> {
|
|
598
|
+
const ctx = this.jobs.get(msg.jobId);
|
|
599
|
+
const reply = (
|
|
600
|
+
response: SchemaForSqlResponse | RpcErrorResponse,
|
|
601
|
+
): void => {
|
|
602
|
+
pw.worker.postMessage(response as MainToWorkerMessage);
|
|
603
|
+
};
|
|
604
|
+
if (!ctx) {
|
|
605
|
+
reply({
|
|
606
|
+
type: "rpc-error",
|
|
607
|
+
requestId: msg.requestId,
|
|
608
|
+
ok: false,
|
|
609
|
+
error: { name: "Error", message: `Unknown jobId ${msg.jobId}` },
|
|
610
|
+
});
|
|
611
|
+
return;
|
|
612
|
+
}
|
|
613
|
+
try {
|
|
614
|
+
const conn = await ctx.connections.lookupConnection(
|
|
615
|
+
msg.connectionName,
|
|
616
|
+
);
|
|
617
|
+
const result = await conn.fetchSchemaForSQLStruct(
|
|
618
|
+
msg.sentence as Parameters<
|
|
619
|
+
InfoConnection["fetchSchemaForSQLStruct"]
|
|
620
|
+
>[0],
|
|
621
|
+
buildFetchOptions(msg.options),
|
|
622
|
+
);
|
|
623
|
+
if (result.error !== undefined) {
|
|
624
|
+
reply({
|
|
625
|
+
type: "schema-for-sql-response",
|
|
626
|
+
requestId: msg.requestId,
|
|
627
|
+
ok: true,
|
|
628
|
+
error: result.error,
|
|
629
|
+
});
|
|
630
|
+
} else {
|
|
631
|
+
reply({
|
|
632
|
+
type: "schema-for-sql-response",
|
|
633
|
+
requestId: msg.requestId,
|
|
634
|
+
ok: true,
|
|
635
|
+
structDef: result.structDef as SQLSourceDef,
|
|
636
|
+
});
|
|
637
|
+
}
|
|
638
|
+
} catch (error) {
|
|
639
|
+
reply({
|
|
640
|
+
type: "rpc-error",
|
|
641
|
+
requestId: msg.requestId,
|
|
642
|
+
ok: false,
|
|
643
|
+
error: serializeError(error),
|
|
644
|
+
});
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
private async handleReadUrl(
|
|
649
|
+
pw: PoolWorker,
|
|
650
|
+
msg: ReadUrlRequest,
|
|
651
|
+
): Promise<void> {
|
|
652
|
+
const ctx = this.jobs.get(msg.jobId);
|
|
653
|
+
const reply = (response: ReadUrlResponse | RpcErrorResponse): void => {
|
|
654
|
+
pw.worker.postMessage(response as MainToWorkerMessage);
|
|
655
|
+
};
|
|
656
|
+
if (!ctx) {
|
|
657
|
+
reply({
|
|
658
|
+
type: "rpc-error",
|
|
659
|
+
requestId: msg.requestId,
|
|
660
|
+
ok: false,
|
|
661
|
+
error: { name: "Error", message: `Unknown jobId ${msg.jobId}` },
|
|
662
|
+
});
|
|
663
|
+
return;
|
|
664
|
+
}
|
|
665
|
+
try {
|
|
666
|
+
const raw = await ctx.urlReader.readURL(new URL(msg.url));
|
|
667
|
+
const contents = typeof raw === "string" ? raw : raw.contents;
|
|
668
|
+
reply({
|
|
669
|
+
type: "read-url-response",
|
|
670
|
+
requestId: msg.requestId,
|
|
671
|
+
ok: true,
|
|
672
|
+
contents,
|
|
673
|
+
});
|
|
674
|
+
} catch (error) {
|
|
675
|
+
reply({
|
|
676
|
+
type: "rpc-error",
|
|
677
|
+
requestId: msg.requestId,
|
|
678
|
+
ok: false,
|
|
679
|
+
error: serializeError(error),
|
|
680
|
+
});
|
|
681
|
+
}
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
function buildFetchOptions(options: {
|
|
686
|
+
refreshTimestamp?: number;
|
|
687
|
+
modelAnnotation?: Annotation;
|
|
688
|
+
}): FetchSchemaOptions {
|
|
689
|
+
const out: FetchSchemaOptions = {};
|
|
690
|
+
if (options.refreshTimestamp !== undefined) {
|
|
691
|
+
out.refreshTimestamp = options.refreshTimestamp;
|
|
692
|
+
}
|
|
693
|
+
if (options.modelAnnotation !== undefined) {
|
|
694
|
+
out.modelAnnotation = options.modelAnnotation;
|
|
695
|
+
}
|
|
696
|
+
return out;
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
// Translate the worker's flat-array filterMap into a Map for use sites.
|
|
700
|
+
function adaptResult(result: CompileJobResult): CompileOutcome {
|
|
701
|
+
return {
|
|
702
|
+
modelDef: result.modelDef as ModelDef,
|
|
703
|
+
sourceInfos: result.sourceInfos as Malloy.SourceInfo[],
|
|
704
|
+
sources: result.sources,
|
|
705
|
+
queries: result.queries,
|
|
706
|
+
filterMap: new Map(
|
|
707
|
+
result.filterMap.map(([k, v]) => [k, v as FilterDefinition[]]),
|
|
708
|
+
),
|
|
709
|
+
givens: result.givens,
|
|
710
|
+
compileDurationMs: result.compileDurationMs,
|
|
711
|
+
};
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
function serializeError(error: unknown): SerializedError {
|
|
715
|
+
if (error instanceof Error) {
|
|
716
|
+
return {
|
|
717
|
+
name: error.name,
|
|
718
|
+
message: error.message,
|
|
719
|
+
stack: error.stack,
|
|
720
|
+
};
|
|
721
|
+
}
|
|
722
|
+
return { name: "Error", message: String(error) };
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
function deserializeError(serialized: SerializedError): Error {
|
|
726
|
+
// Compilation errors retain their identity so callers that
|
|
727
|
+
// catch(MalloyError) / catch(ModelCompilationError) keep working
|
|
728
|
+
// when they live on the main thread. The MalloyError ctor takes
|
|
729
|
+
// a non-trivial argument shape, so we construct a plain Error
|
|
730
|
+
// with `.problems` attached (downstream code reads `.message`
|
|
731
|
+
// and `.problems` on both shapes).
|
|
732
|
+
const err = new Error(serialized.message);
|
|
733
|
+
err.name = serialized.name;
|
|
734
|
+
if (serialized.stack) err.stack = serialized.stack;
|
|
735
|
+
if (serialized.malloyProblems) {
|
|
736
|
+
(err as unknown as { problems: unknown }).problems =
|
|
737
|
+
serialized.malloyProblems;
|
|
738
|
+
}
|
|
739
|
+
if (serialized.isCompilationError) {
|
|
740
|
+
// ModelCompilationError's ctor expects a MalloyError-shaped
|
|
741
|
+
// input but at runtime only reads `.message`. We pass our
|
|
742
|
+
// reconstituted Error (with `.problems` attached) — the cast
|
|
743
|
+
// narrows the constructor's nominal type without losing data.
|
|
744
|
+
const wrapped = new ModelCompilationError(
|
|
745
|
+
err as unknown as ConstructorParameters<
|
|
746
|
+
typeof ModelCompilationError
|
|
747
|
+
>[0],
|
|
748
|
+
);
|
|
749
|
+
if (serialized.stack) wrapped.stack = serialized.stack;
|
|
750
|
+
return wrapped;
|
|
751
|
+
}
|
|
752
|
+
return err;
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
const defaultUrlReader = {
|
|
756
|
+
readURL: async (url: URL): Promise<string> => {
|
|
757
|
+
const { promises: fs } = await import("fs");
|
|
758
|
+
const { fileURLToPath } = await import("url");
|
|
759
|
+
const filePath =
|
|
760
|
+
url.protocol === "file:" ? fileURLToPath(url) : url.toString();
|
|
761
|
+
return fs.readFile(filePath, "utf8");
|
|
762
|
+
},
|
|
763
|
+
};
|
|
764
|
+
|
|
765
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
766
|
+
// Singleton accessor
|
|
767
|
+
// ──────────────────────────────────────────────────────────────────────
|
|
768
|
+
|
|
769
|
+
let singleton: CompileWorkerPool | null = null;
|
|
770
|
+
|
|
771
|
+
export function getCompilePool(): CompileWorkerPool {
|
|
772
|
+
if (singleton === null) {
|
|
773
|
+
const n = getCompileWorkerCount();
|
|
774
|
+
singleton = new CompileWorkerPool(n);
|
|
775
|
+
if (n > 0) {
|
|
776
|
+
logger.info(
|
|
777
|
+
`Malloy compile worker pool enabled (size=${n}). Set MALLOY_COMPILE_WORKERS=0 to disable.`,
|
|
778
|
+
);
|
|
779
|
+
} else {
|
|
780
|
+
logger.info(
|
|
781
|
+
"Malloy compile worker pool DISABLED (MALLOY_COMPILE_WORKERS=0). Compile runs on the main event loop.",
|
|
782
|
+
);
|
|
783
|
+
}
|
|
784
|
+
}
|
|
785
|
+
return singleton;
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
/** Test-only: replace the singleton (and shut down the previous one). */
|
|
789
|
+
export async function __setCompilePoolForTests(
|
|
790
|
+
pool: CompileWorkerPool | null,
|
|
791
|
+
): Promise<void> {
|
|
792
|
+
if (singleton && singleton !== pool) {
|
|
793
|
+
await singleton.shutdown();
|
|
794
|
+
}
|
|
795
|
+
singleton = pool;
|
|
796
|
+
}
|