@query-farm/vgi-rpc 0.6.3 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/access-log.d.ts +50 -0
- package/dist/access-log.d.ts.map +1 -0
- package/dist/arrow/impl-arrowjs/index.d.ts +96 -0
- package/dist/arrow/impl-arrowjs/index.d.ts.map +1 -0
- package/dist/arrow/impl-flechette/index.d.ts +102 -0
- package/dist/arrow/impl-flechette/index.d.ts.map +1 -0
- package/dist/arrow/impl-flechette/message-meta.d.ts +11 -0
- package/dist/arrow/impl-flechette/message-meta.d.ts.map +1 -0
- package/dist/arrow/index.d.ts +4 -0
- package/dist/arrow/index.d.ts.map +1 -0
- package/dist/arrow/predicates.d.ts +44 -0
- package/dist/arrow/predicates.d.ts.map +1 -0
- package/dist/arrow/types.d.ts +62 -0
- package/dist/arrow/types.d.ts.map +1 -0
- package/dist/client/capabilities.d.ts +25 -0
- package/dist/client/capabilities.d.ts.map +1 -0
- package/dist/client/connect.d.ts.map +1 -1
- package/dist/client/introspect.d.ts +7 -0
- package/dist/client/introspect.d.ts.map +1 -1
- package/dist/client/ipc.d.ts +8 -2
- package/dist/client/ipc.d.ts.map +1 -1
- package/dist/client/pipe.d.ts.map +1 -1
- package/dist/client/stream.d.ts +11 -2
- package/dist/client/stream.d.ts.map +1 -1
- package/dist/client/uploadUrl.d.ts +25 -0
- package/dist/client/uploadUrl.d.ts.map +1 -0
- package/dist/constants.d.ts +15 -1
- package/dist/constants.d.ts.map +1 -1
- package/dist/crypto.d.ts +22 -0
- package/dist/crypto.d.ts.map +1 -0
- package/dist/dispatch/describe.d.ts +10 -6
- package/dist/dispatch/describe.d.ts.map +1 -1
- package/dist/dispatch/stream.d.ts +2 -2
- package/dist/dispatch/stream.d.ts.map +1 -1
- package/dist/dispatch/unary.d.ts +2 -2
- package/dist/dispatch/unary.d.ts.map +1 -1
- package/dist/errors.d.ts +46 -0
- package/dist/errors.d.ts.map +1 -1
- package/dist/external.d.ts +25 -5
- package/dist/external.d.ts.map +1 -1
- package/dist/http/bearer.d.ts.map +1 -1
- package/dist/http/common.d.ts +42 -7
- package/dist/http/common.d.ts.map +1 -1
- package/dist/http/dispatch.d.ts +20 -2
- package/dist/http/dispatch.d.ts.map +1 -1
- package/dist/http/handler.d.ts.map +1 -1
- package/dist/http/index.d.ts +1 -0
- package/dist/http/index.d.ts.map +1 -1
- package/dist/http/mtls.d.ts +2 -1
- package/dist/http/mtls.d.ts.map +1 -1
- package/dist/http/oauth-pkce.d.ts +141 -0
- package/dist/http/oauth-pkce.d.ts.map +1 -0
- package/dist/http/pages.d.ts +3 -0
- package/dist/http/pages.d.ts.map +1 -1
- package/dist/http/sticky.d.ts +124 -0
- package/dist/http/sticky.d.ts.map +1 -0
- package/dist/http/token.d.ts +38 -12
- package/dist/http/token.d.ts.map +1 -1
- package/dist/http/types.d.ts +68 -5
- package/dist/http/types.d.ts.map +1 -1
- package/dist/index.d.ts +6 -4
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1275 -3507
- package/dist/index.js.map +19 -37
- package/dist/launcher/hash.d.ts +22 -0
- package/dist/launcher/hash.d.ts.map +1 -0
- package/dist/launcher/index.d.ts +23 -0
- package/dist/launcher/index.d.ts.map +1 -0
- package/dist/launcher/launch.d.ts +27 -0
- package/dist/launcher/launch.d.ts.map +1 -0
- package/dist/launcher/lock.d.ts +19 -0
- package/dist/launcher/lock.d.ts.map +1 -0
- package/dist/launcher/serve-unix.d.ts +54 -0
- package/dist/launcher/serve-unix.d.ts.map +1 -0
- package/dist/launcher/state.d.ts +59 -0
- package/dist/launcher/state.d.ts.map +1 -0
- package/dist/otel.d.ts.map +1 -1
- package/dist/protocol.d.ts +16 -2
- package/dist/protocol.d.ts.map +1 -1
- package/dist/schema.d.ts +45 -18
- package/dist/schema.d.ts.map +1 -1
- package/dist/server.d.ts +23 -2
- package/dist/server.d.ts.map +1 -1
- package/dist/types.d.ts +216 -12
- package/dist/types.d.ts.map +1 -1
- package/dist/util/gzip.d.ts +10 -0
- package/dist/util/gzip.d.ts.map +1 -0
- package/dist/util/schema.d.ts +3 -15
- package/dist/util/schema.d.ts.map +1 -1
- package/dist/util/web-crypto.d.ts +22 -0
- package/dist/util/web-crypto.d.ts.map +1 -0
- package/dist/util/zstd.d.ts +26 -3
- package/dist/util/zstd.d.ts.map +1 -1
- package/dist/wire/opaque.d.ts +11 -0
- package/dist/wire/opaque.d.ts.map +1 -0
- package/dist/wire/reader.d.ts +5 -5
- package/dist/wire/reader.d.ts.map +1 -1
- package/dist/wire/request.d.ts +11 -3
- package/dist/wire/request.d.ts.map +1 -1
- package/dist/wire/response.d.ts +6 -6
- package/dist/wire/response.d.ts.map +1 -1
- package/dist/wire/writer.d.ts +49 -39
- package/dist/wire/writer.d.ts.map +1 -1
- package/package.json +24 -10
- package/src/access-log.ts +195 -0
- package/src/arrow/impl-arrowjs/index.ts +433 -0
- package/src/arrow/impl-flechette/index.ts +414 -0
- package/src/arrow/impl-flechette/message-meta.ts +174 -0
- package/src/arrow/index.ts +89 -0
- package/src/arrow/predicates.ts +56 -0
- package/src/arrow/types.ts +73 -0
- package/src/client/capabilities.ts +84 -0
- package/src/client/connect.ts +103 -26
- package/src/client/introspect.ts +60 -38
- package/src/client/ipc.ts +37 -27
- package/src/client/pipe.ts +12 -9
- package/src/client/stream.ts +34 -19
- package/src/client/uploadUrl.ts +169 -0
- package/src/constants.ts +18 -1
- package/src/crypto.ts +95 -0
- package/src/dispatch/describe.ts +146 -107
- package/src/dispatch/stream.ts +53 -24
- package/src/dispatch/unary.ts +5 -4
- package/src/errors.ts +76 -0
- package/src/external.ts +43 -29
- package/src/http/bearer.ts +2 -5
- package/src/http/common.ts +90 -23
- package/src/http/dispatch.ts +373 -46
- package/src/http/handler.ts +794 -68
- package/src/http/index.ts +1 -0
- package/src/http/mtls.ts +18 -3
- package/src/http/oauth-pkce.ts +1035 -0
- package/src/http/pages.ts +30 -15
- package/src/http/sticky.ts +429 -0
- package/src/http/token.ts +165 -75
- package/src/http/types.ts +69 -5
- package/src/index.ts +40 -1
- package/src/launcher/hash.ts +104 -0
- package/src/launcher/index.ts +35 -0
- package/src/launcher/launch.ts +284 -0
- package/src/launcher/lock.ts +171 -0
- package/src/launcher/serve-unix.ts +385 -0
- package/src/launcher/state.ts +245 -0
- package/src/otel.ts +39 -33
- package/src/protocol.ts +27 -3
- package/src/schema.ts +107 -56
- package/src/server.ts +196 -20
- package/src/types.ts +322 -18
- package/src/util/gzip.ts +63 -0
- package/src/util/schema.ts +4 -22
- package/src/util/web-crypto.ts +98 -0
- package/src/util/zstd.ts +133 -14
- package/src/wire/opaque.ts +37 -0
- package/src/wire/reader.ts +5 -4
- package/src/wire/request.ts +67 -8
- package/src/wire/response.ts +51 -85
- package/src/wire/writer.ts +165 -69
- package/dist/util/conform.d.ts +0 -18
- package/dist/util/conform.d.ts.map +0 -1
- package/src/util/conform.ts +0 -94
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
// © Copyright 2025-2026, Query.Farm LLC - https://query.farm
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Generic Unix-socket worker launcher — TypeScript port of
|
|
6
|
+
* `vgi_rpc.launcher.launch`.
|
|
7
|
+
*
|
|
8
|
+
* Coordinates spawn-or-reuse of long-running worker processes that serve
|
|
9
|
+
* RPC over `AF_UNIX` sockets. Designed for clients that want a warm
|
|
10
|
+
* worker without managing its lifecycle themselves.
|
|
11
|
+
*
|
|
12
|
+
* Architecture (cross-language identical to the Python implementation):
|
|
13
|
+
*
|
|
14
|
+
* - The launcher derives a deterministic socket path from a hash of the
|
|
15
|
+
* worker command tuple (cmd + args + cwd + `VGI_RPC_*` env), so the
|
|
16
|
+
* same worker is reused across unrelated callers.
|
|
17
|
+
* - Concurrent first-callers serialise on a per-hash lockfile.
|
|
18
|
+
* - Each spawned worker self-terminates after `idleTimeout` seconds with
|
|
19
|
+
* zero connected clients (the worker side enforces this — see the
|
|
20
|
+
* `serveUnix` runner in the same module for TS workers, or
|
|
21
|
+
* `vgi_rpc.rpc.serve_unix` for Python).
|
|
22
|
+
*
|
|
23
|
+
* Worker contract — across language ports:
|
|
24
|
+
*
|
|
25
|
+
* - Accept `--unix PATH` and `--idle-timeout SEC` on the command line.
|
|
26
|
+
* - Emit exactly one line `UNIX:<absolute-path>\n` on **stdout** (flushed)
|
|
27
|
+
* once bind+listen succeed. Write nothing further to stdout afterward.
|
|
28
|
+
* - Tolerate (or suppress) stdout noise *before* the bind line — the
|
|
29
|
+
* launcher skips non-`UNIX:` prefix lines for resilience.
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
import { type ChildProcessWithoutNullStreams, spawn } from "node:child_process";
|
|
33
|
+
import { createWriteStream, unlinkSync } from "node:fs";
|
|
34
|
+
|
|
35
|
+
import { computeHash } from "./hash.js";
|
|
36
|
+
import { acquireLock, tryAcquireLock } from "./lock.js";
|
|
37
|
+
import { defaultStateDir, gcStateDir, probeSocket, socketPaths, writeMeta } from "./state.js";
|
|
38
|
+
|
|
39
|
+
/** Maximum number of stale entries the opportunistic in-launch GC scans. */
|
|
40
|
+
const DEFAULT_GC_LIMIT = 16;
|
|
41
|
+
|
|
42
|
+
/** Inputs to {@link launch}. */
|
|
43
|
+
export interface LaunchConfig {
|
|
44
|
+
/** The worker command and its arguments. Must be non-empty. */
|
|
45
|
+
workerArgv: readonly string[];
|
|
46
|
+
/** Explicit socket path; when omitted, derived from the hash of the tuple. */
|
|
47
|
+
socketPath?: string;
|
|
48
|
+
/** Worker self-shutdown after this many seconds idle. Forwarded as
|
|
49
|
+
* `--idle-timeout SEC`. Default: 300. */
|
|
50
|
+
idleTimeout?: number;
|
|
51
|
+
/** Maximum seconds to block waiting for the per-hash file lock. Default: 30. */
|
|
52
|
+
connectTimeout?: number;
|
|
53
|
+
/** Maximum seconds to wait for the worker to print `UNIX:<path>`. Default: 60. */
|
|
54
|
+
workerStartupTimeout?: number;
|
|
55
|
+
/** If set, worker stderr is appended to this file; otherwise discarded. */
|
|
56
|
+
workerStderr?: string;
|
|
57
|
+
/** Override the default state directory. */
|
|
58
|
+
stateDir?: string;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Ensure a worker is running and return its socket path.
|
|
63
|
+
*
|
|
64
|
+
* Either the existing worker for this hash is reused (probe succeeds) or
|
|
65
|
+
* a fresh one is spawned under flock. Throws on any failure to bring up
|
|
66
|
+
* a worker.
|
|
67
|
+
*/
|
|
68
|
+
export async function launch(config: LaunchConfig): Promise<string> {
|
|
69
|
+
if (!config.workerArgv || config.workerArgv.length === 0) {
|
|
70
|
+
throw new Error("workerArgv must be non-empty");
|
|
71
|
+
}
|
|
72
|
+
const stateDir = config.stateDir ?? defaultStateDir();
|
|
73
|
+
const idleTimeout = config.idleTimeout ?? 300;
|
|
74
|
+
const connectTimeoutMs = (config.connectTimeout ?? 30) * 1000;
|
|
75
|
+
const startupTimeoutMs = (config.workerStartupTimeout ?? 60) * 1000;
|
|
76
|
+
|
|
77
|
+
let lockPath: string;
|
|
78
|
+
let sockPath: string;
|
|
79
|
+
let metaPath: string | null;
|
|
80
|
+
let hashId: string | null;
|
|
81
|
+
|
|
82
|
+
if (config.socketPath !== undefined) {
|
|
83
|
+
const { resolve } = await import("node:path");
|
|
84
|
+
sockPath = resolve(config.socketPath);
|
|
85
|
+
// Explicit paths get a sibling lock, no .meta, skipped by status/gc.
|
|
86
|
+
lockPath = `${sockPath}.lock`;
|
|
87
|
+
metaPath = null;
|
|
88
|
+
hashId = null;
|
|
89
|
+
} else {
|
|
90
|
+
hashId = await computeHash(config.workerArgv);
|
|
91
|
+
const paths = socketPaths(stateDir, hashId);
|
|
92
|
+
lockPath = paths.lockPath;
|
|
93
|
+
sockPath = paths.sockPath;
|
|
94
|
+
metaPath = paths.metaPath;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const handle = await acquireLock(lockPath, connectTimeoutMs);
|
|
98
|
+
try {
|
|
99
|
+
// Probe — maybe a worker is already serving for this hash.
|
|
100
|
+
if (await probeSocket(sockPath)) {
|
|
101
|
+
return sockPath;
|
|
102
|
+
}
|
|
103
|
+
// Stale socket cleanup.
|
|
104
|
+
try {
|
|
105
|
+
unlinkSync(sockPath);
|
|
106
|
+
} catch {
|
|
107
|
+
// ENOENT is normal; anything else is broadened away (matches Python's
|
|
108
|
+
// OSError suppression for Windows ERROR_SHARING_VIOLATION).
|
|
109
|
+
}
|
|
110
|
+
if (metaPath !== null) {
|
|
111
|
+
writeMeta(metaPath, config.workerArgv, process.cwd(), sockPath);
|
|
112
|
+
}
|
|
113
|
+
await spawnWorker(config.workerArgv, sockPath, idleTimeout, config.workerStderr ?? null, startupTimeoutMs);
|
|
114
|
+
return sockPath;
|
|
115
|
+
} finally {
|
|
116
|
+
handle.release();
|
|
117
|
+
// Opportunistic GC after release — bounded so it can't dominate runtime.
|
|
118
|
+
if (hashId !== null) {
|
|
119
|
+
try {
|
|
120
|
+
await gcStateDir(
|
|
121
|
+
stateDir,
|
|
122
|
+
async (p) => {
|
|
123
|
+
const h = tryAcquireLock(p);
|
|
124
|
+
return h ? () => h.release() : null;
|
|
125
|
+
},
|
|
126
|
+
{ limit: DEFAULT_GC_LIMIT, excludeHash: hashId },
|
|
127
|
+
);
|
|
128
|
+
} catch {
|
|
129
|
+
// GC is best-effort.
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/** Spawn the worker, wait for `UNIX:<path>` on stdout, return when ready. */
|
|
136
|
+
async function spawnWorker(
|
|
137
|
+
workerArgv: readonly string[],
|
|
138
|
+
sockPath: string,
|
|
139
|
+
idleTimeout: number,
|
|
140
|
+
workerStderr: string | null,
|
|
141
|
+
startupTimeoutMs: number,
|
|
142
|
+
): Promise<void> {
|
|
143
|
+
const fullArgv = [...workerArgv, "--unix", sockPath, "--idle-timeout", String(idleTimeout)];
|
|
144
|
+
const [cmd, ...rest] = fullArgv;
|
|
145
|
+
|
|
146
|
+
const stderrTarget = workerStderr === null ? "ignore" : "pipe";
|
|
147
|
+
|
|
148
|
+
const proc = spawn(cmd, rest, {
|
|
149
|
+
stdio: ["ignore", "pipe", stderrTarget],
|
|
150
|
+
detached: false,
|
|
151
|
+
}) as ChildProcessWithoutNullStreams;
|
|
152
|
+
|
|
153
|
+
if (workerStderr !== null && proc.stderr) {
|
|
154
|
+
// Append mode so multiple worker generations share one log file.
|
|
155
|
+
const sink = createWriteStream(workerStderr, { flags: "a" });
|
|
156
|
+
proc.stderr.pipe(sink);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const expectedPrefix = `UNIX:${sockPath}`;
|
|
160
|
+
|
|
161
|
+
// Read line-by-line from stdout until we see the bind announcement.
|
|
162
|
+
const reader = lineReader(proc.stdout);
|
|
163
|
+
const deadline = Date.now() + startupTimeoutMs;
|
|
164
|
+
|
|
165
|
+
while (Date.now() < deadline) {
|
|
166
|
+
// Race the next stdout line against the worker's exit and the deadline.
|
|
167
|
+
const remaining = deadline - Date.now();
|
|
168
|
+
const result = await Promise.race([
|
|
169
|
+
reader.next().then((r) => ({ kind: "line" as const, value: r })),
|
|
170
|
+
onceExit(proc).then((rc) => ({ kind: "exit" as const, rc })),
|
|
171
|
+
delay(remaining).then(() => ({ kind: "timeout" as const })),
|
|
172
|
+
]);
|
|
173
|
+
|
|
174
|
+
if (result.kind === "exit") {
|
|
175
|
+
throw new Error(`worker exited before readiness (rc=${result.rc})`);
|
|
176
|
+
}
|
|
177
|
+
if (result.kind === "timeout") {
|
|
178
|
+
proc.kill("SIGTERM");
|
|
179
|
+
throw new Error(`worker did not emit UNIX:<path> within ${startupTimeoutMs}ms`);
|
|
180
|
+
}
|
|
181
|
+
if (result.value.done) {
|
|
182
|
+
// stdout closed without the announcement.
|
|
183
|
+
const rc = await onceExit(proc);
|
|
184
|
+
throw new Error(`worker exited before readiness (rc=${rc})`);
|
|
185
|
+
}
|
|
186
|
+
const line = result.value.value;
|
|
187
|
+
if (line.startsWith("UNIX:")) {
|
|
188
|
+
if (line !== expectedPrefix) {
|
|
189
|
+
proc.kill("SIGTERM");
|
|
190
|
+
throw new Error(
|
|
191
|
+
`worker bound to unexpected path: ${JSON.stringify(line)} (expected ${JSON.stringify(expectedPrefix)})`,
|
|
192
|
+
);
|
|
193
|
+
}
|
|
194
|
+
// Drain remaining stdout so a buffer-full doesn't deadlock the worker.
|
|
195
|
+
reader.drainAndDiscard();
|
|
196
|
+
return;
|
|
197
|
+
}
|
|
198
|
+
// Non-matching prefix — third-party noise; log at debug and keep reading.
|
|
199
|
+
process.env.VGI_RPC_LAUNCHER_DEBUG &&
|
|
200
|
+
process.stderr.write(`launcher: skipping pre-bind stdout line: ${JSON.stringify(line)}\n`);
|
|
201
|
+
}
|
|
202
|
+
proc.kill("SIGTERM");
|
|
203
|
+
throw new Error(`worker did not emit UNIX:<path> within ${startupTimeoutMs}ms`);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// ---------------------------------------------------------------------------
|
|
207
|
+
// Internal helpers
|
|
208
|
+
// ---------------------------------------------------------------------------
|
|
209
|
+
|
|
210
|
+
interface LineReader {
|
|
211
|
+
next(): Promise<{ done: boolean; value: string }>;
|
|
212
|
+
drainAndDiscard(): void;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/** Newline-delimited line reader over a Node Readable stream. */
|
|
216
|
+
function lineReader(stream: NodeJS.ReadableStream): LineReader {
|
|
217
|
+
let buffer = "";
|
|
218
|
+
let ended = false;
|
|
219
|
+
const queued: string[] = [];
|
|
220
|
+
const waiters: Array<(line: { done: boolean; value: string }) => void> = [];
|
|
221
|
+
let discardMode = false;
|
|
222
|
+
|
|
223
|
+
const flushWaiter = () => {
|
|
224
|
+
if (waiters.length === 0) return;
|
|
225
|
+
if (queued.length > 0) {
|
|
226
|
+
const w = waiters.shift();
|
|
227
|
+
w?.({ done: false, value: queued.shift() ?? "" });
|
|
228
|
+
} else if (ended) {
|
|
229
|
+
const w = waiters.shift();
|
|
230
|
+
w?.({ done: true, value: "" });
|
|
231
|
+
}
|
|
232
|
+
};
|
|
233
|
+
|
|
234
|
+
stream.setEncoding?.("utf8");
|
|
235
|
+
stream.on("data", (chunk) => {
|
|
236
|
+
if (discardMode) return;
|
|
237
|
+
buffer += String(chunk);
|
|
238
|
+
for (;;) {
|
|
239
|
+
const nl = buffer.indexOf("\n");
|
|
240
|
+
if (nl < 0) break;
|
|
241
|
+
const line = buffer.slice(0, nl).replace(/\r$/, "");
|
|
242
|
+
buffer = buffer.slice(nl + 1);
|
|
243
|
+
queued.push(line);
|
|
244
|
+
}
|
|
245
|
+
flushWaiter();
|
|
246
|
+
});
|
|
247
|
+
stream.on("end", () => {
|
|
248
|
+
ended = true;
|
|
249
|
+
if (buffer.length > 0) {
|
|
250
|
+
queued.push(buffer.replace(/\r$/, ""));
|
|
251
|
+
buffer = "";
|
|
252
|
+
}
|
|
253
|
+
flushWaiter();
|
|
254
|
+
});
|
|
255
|
+
stream.on("error", () => {
|
|
256
|
+
ended = true;
|
|
257
|
+
flushWaiter();
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
return {
|
|
261
|
+
next() {
|
|
262
|
+
return new Promise((resolve) => {
|
|
263
|
+
waiters.push(resolve);
|
|
264
|
+
flushWaiter();
|
|
265
|
+
});
|
|
266
|
+
},
|
|
267
|
+
drainAndDiscard() {
|
|
268
|
+
discardMode = true;
|
|
269
|
+
// Drop any queued lines and let the stream flow into the void.
|
|
270
|
+
queued.length = 0;
|
|
271
|
+
stream.resume?.();
|
|
272
|
+
},
|
|
273
|
+
};
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
function onceExit(proc: ChildProcessWithoutNullStreams): Promise<number | null> {
|
|
277
|
+
return new Promise((resolve) => {
|
|
278
|
+
proc.once("exit", (code) => resolve(code));
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
function delay(ms: number): Promise<void> {
|
|
283
|
+
return new Promise((r) => setTimeout(r, Math.max(0, ms)));
|
|
284
|
+
}
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
// © Copyright 2025-2026, Query.Farm LLC - https://query.farm
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Cross-process file lock with PID-stamp fallback.
|
|
6
|
+
*
|
|
7
|
+
* Python's launcher uses `filelock` (POSIX `flock(2)` / Windows
|
|
8
|
+
* `LockFileEx`), which auto-releases on process death. Node has no
|
|
9
|
+
* equivalent in its standard library, so we approximate it with a
|
|
10
|
+
* persistent PID-stamp protocol:
|
|
11
|
+
*
|
|
12
|
+
* - File exists, empty content → unlocked (slot marker)
|
|
13
|
+
* - File exists, content `<PID>`, PID alive → held by that PID
|
|
14
|
+
* - File exists, content `<PID>`, PID dead → stale, treat as unlocked
|
|
15
|
+
* - File doesn't exist → unlocked (slot never used)
|
|
16
|
+
*
|
|
17
|
+
* The lockfile **persists** after release (we truncate to zero bytes
|
|
18
|
+
* rather than unlinking) so cross-language scanners — `statusRows` /
|
|
19
|
+
* `gcStateDir` here and Python's `gc_state_dir` — can use lockfile
|
|
20
|
+
* presence as a "this hash slot has been used at some point" marker
|
|
21
|
+
* even when no launcher is currently coordinating.
|
|
22
|
+
*
|
|
23
|
+
* The acquire path has a small race window: between reading the stamp
|
|
24
|
+
* and writing ours, another process can interleave. Mitigations:
|
|
25
|
+
*
|
|
26
|
+
* 1. After writing our PID we re-read and verify; on mismatch we
|
|
27
|
+
* retry up to a small bound, then back off.
|
|
28
|
+
* 2. The launcher's bind() step is itself a kernel mutex — two
|
|
29
|
+
* workers racing past the lock will see exactly one bind()
|
|
30
|
+
* succeed; the other fails fast with EADDRINUSE and the second
|
|
31
|
+
* launcher's spawnWorker() surfaces the error.
|
|
32
|
+
*
|
|
33
|
+
* That's looser than `flock`-based mutual exclusion but adequate for
|
|
34
|
+
* the launcher's use case: the protected critical section is short
|
|
35
|
+
* (probe + spawn).
|
|
36
|
+
*/
|
|
37
|
+
|
|
38
|
+
import { closeSync, constants as FS, openSync, readSync, statSync, writeSync } from "node:fs";
|
|
39
|
+
|
|
40
|
+
/** Result of a successful lock acquisition. */
|
|
41
|
+
export interface FileLockHandle {
|
|
42
|
+
/** Path to the lockfile (informational). */
|
|
43
|
+
readonly path: string;
|
|
44
|
+
/** Release the lock — truncates the file to zero bytes; the file
|
|
45
|
+
* itself persists as a slot marker. Idempotent. */
|
|
46
|
+
release(): void;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const POLL_MS = 50;
|
|
50
|
+
/** Max retries for the post-write verify step. */
|
|
51
|
+
const VERIFY_RETRIES = 5;
|
|
52
|
+
|
|
53
|
+
function pidAlive(pid: number): boolean {
|
|
54
|
+
if (!Number.isInteger(pid) || pid <= 0) return false;
|
|
55
|
+
try {
|
|
56
|
+
process.kill(pid, 0);
|
|
57
|
+
return true;
|
|
58
|
+
} catch (err) {
|
|
59
|
+
return (err as { code?: string })?.code === "EPERM";
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function readPid(path: string): number {
|
|
64
|
+
try {
|
|
65
|
+
const fd = openSync(path, FS.O_RDONLY);
|
|
66
|
+
try {
|
|
67
|
+
const buf = Buffer.alloc(64);
|
|
68
|
+
const n = readSync(fd, buf, 0, buf.length, 0);
|
|
69
|
+
const text = buf.subarray(0, n).toString("utf8").trim();
|
|
70
|
+
if (text === "") return 0;
|
|
71
|
+
const parsed = Number(text);
|
|
72
|
+
return Number.isInteger(parsed) ? parsed : 0;
|
|
73
|
+
} finally {
|
|
74
|
+
closeSync(fd);
|
|
75
|
+
}
|
|
76
|
+
} catch {
|
|
77
|
+
return 0;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function tryStampPid(path: string): boolean {
|
|
82
|
+
// Open r/w, creating if missing. Truncate to zero, write our PID.
|
|
83
|
+
// The natural race here is mitigated by the post-write verify in the
|
|
84
|
+
// caller.
|
|
85
|
+
const fd = openSync(path, FS.O_RDWR | FS.O_CREAT, 0o600);
|
|
86
|
+
try {
|
|
87
|
+
// Truncate via ftruncateSync — Node's fs has it but only on the fd.
|
|
88
|
+
// Use a fresh write at offset 0 with the full string and re-stat to
|
|
89
|
+
// confirm the file is our PID's worth of bytes.
|
|
90
|
+
const stamp = Buffer.from(String(process.pid), "utf8");
|
|
91
|
+
// Truncate by reopening with O_TRUNC would re-create; instead use ftruncateSync.
|
|
92
|
+
// Node 18+ has `fs.ftruncateSync`.
|
|
93
|
+
const { ftruncateSync } = require("node:fs");
|
|
94
|
+
ftruncateSync(fd, 0);
|
|
95
|
+
let written = 0;
|
|
96
|
+
while (written < stamp.length) {
|
|
97
|
+
const n = writeSync(fd, stamp, written, stamp.length - written, 0 + written);
|
|
98
|
+
if (n <= 0) throw new Error(`writeSync returned ${n}`);
|
|
99
|
+
written += n;
|
|
100
|
+
}
|
|
101
|
+
// Sanity stat — confirm the file is our stamp's size (lossy check
|
|
102
|
+
// for the basic interleave race).
|
|
103
|
+
const st = statSync(path);
|
|
104
|
+
if (st.size !== stamp.length) return false;
|
|
105
|
+
return true;
|
|
106
|
+
} finally {
|
|
107
|
+
closeSync(fd);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function clearStamp(path: string): void {
|
|
112
|
+
try {
|
|
113
|
+
const fd = openSync(path, FS.O_RDWR);
|
|
114
|
+
try {
|
|
115
|
+
const { ftruncateSync } = require("node:fs");
|
|
116
|
+
ftruncateSync(fd, 0);
|
|
117
|
+
} finally {
|
|
118
|
+
closeSync(fd);
|
|
119
|
+
}
|
|
120
|
+
} catch {
|
|
121
|
+
// already gone
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Try to acquire the lock once, non-blocking.
|
|
127
|
+
*
|
|
128
|
+
* Returns a release callback on success, or `null` when the lock is
|
|
129
|
+
* held by another live process. Stale stamps (PID not alive) are
|
|
130
|
+
* cleared and the call retries.
|
|
131
|
+
*/
|
|
132
|
+
export function tryAcquireLock(lockPath: string): FileLockHandle | null {
|
|
133
|
+
for (let attempt = 0; attempt < VERIFY_RETRIES; attempt++) {
|
|
134
|
+
const existingPid = readPid(lockPath);
|
|
135
|
+
if (existingPid > 0 && pidAlive(existingPid)) {
|
|
136
|
+
// Held by a live process (possibly even ourselves on a different
|
|
137
|
+
// call site — match Python's filelock semantics: not reentrant).
|
|
138
|
+
return null;
|
|
139
|
+
}
|
|
140
|
+
// Stale, empty, or missing — try to claim the slot.
|
|
141
|
+
if (!tryStampPid(lockPath)) continue;
|
|
142
|
+
const verifyPid = readPid(lockPath);
|
|
143
|
+
if (verifyPid !== process.pid) {
|
|
144
|
+
// Lost the race to a peer that wrote after our truncate.
|
|
145
|
+
continue;
|
|
146
|
+
}
|
|
147
|
+
let released = false;
|
|
148
|
+
return {
|
|
149
|
+
path: lockPath,
|
|
150
|
+
release() {
|
|
151
|
+
if (released) return;
|
|
152
|
+
released = true;
|
|
153
|
+
clearStamp(lockPath);
|
|
154
|
+
},
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
return null;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/** Async version that polls until the lock is acquired or the timeout fires. */
|
|
161
|
+
export async function acquireLock(lockPath: string, timeoutMs: number): Promise<FileLockHandle> {
|
|
162
|
+
const deadline = Date.now() + Math.max(0, timeoutMs);
|
|
163
|
+
for (;;) {
|
|
164
|
+
const handle = tryAcquireLock(lockPath);
|
|
165
|
+
if (handle) return handle;
|
|
166
|
+
if (Date.now() >= deadline) {
|
|
167
|
+
throw new Error(`failed to acquire ${lockPath} within ${timeoutMs}ms`);
|
|
168
|
+
}
|
|
169
|
+
await new Promise((r) => setTimeout(r, POLL_MS));
|
|
170
|
+
}
|
|
171
|
+
}
|