@vellumai/assistant 0.7.3 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +29 -28
- package/Dockerfile +1 -0
- package/__tests__/permissions/gateway-threshold-reader.test.ts +236 -9
- package/bun.lock +3 -0
- package/knip.json +1 -0
- package/node_modules/@vellumai/ipc-server-utils/bun.lock +24 -0
- package/node_modules/@vellumai/ipc-server-utils/package.json +18 -0
- package/node_modules/@vellumai/ipc-server-utils/src/index.ts +6 -0
- package/node_modules/@vellumai/ipc-server-utils/src/socket-watchdog.test.ts +430 -0
- package/node_modules/@vellumai/ipc-server-utils/src/socket-watchdog.ts +221 -0
- package/node_modules/@vellumai/ipc-server-utils/tsconfig.json +20 -0
- package/openapi.yaml +22 -4
- package/package.json +3 -1
- package/src/__tests__/annotate-risk-options.test.ts +291 -0
- package/src/__tests__/approval-cascade.test.ts +8 -16
- package/src/__tests__/approval-routes-http.test.ts +6 -0
- package/src/__tests__/auto-analysis-end-to-end.test.ts +12 -25
- package/src/__tests__/call-constants.test.ts +10 -1
- package/src/__tests__/call-controller.test.ts +127 -0
- package/src/__tests__/cli-memory-v2-reembed-skills.test.ts +58 -28
- package/src/__tests__/config-loader-platform-defaults.test.ts +284 -1
- package/src/__tests__/context-search-memory-source.test.ts +3 -26
- package/src/__tests__/context-search-pkb-source.test.ts +12 -6
- package/src/__tests__/conversation-abort-tool-results.test.ts +1 -6
- package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +1 -1
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +1 -1
- package/src/__tests__/conversation-agent-loop.test.ts +3 -3
- package/src/__tests__/conversation-confirmation-signals.test.ts +5 -13
- package/src/__tests__/conversation-init.benchmark.test.ts +1 -1
- package/src/__tests__/conversation-process-callsite.test.ts +1 -6
- package/src/__tests__/conversation-provider-retry-repair.test.ts +1 -6
- package/src/__tests__/conversation-runtime-assembly.test.ts +15 -6
- package/src/__tests__/conversation-slash-unknown.test.ts +1 -6
- package/src/__tests__/conversation-surfaces-action-delivery.test.ts +170 -9
- package/src/__tests__/conversation-surfaces-data-persist.test.ts +73 -1
- package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +59 -0
- package/src/__tests__/conversation-workspace-injection.test.ts +1 -7
- package/src/__tests__/conversation-workspace-tool-tracking.test.ts +1 -7
- package/src/__tests__/filing-service.test.ts +2 -19
- package/src/__tests__/handlers-skills-memory-v2-reseed.test.ts +10 -26
- package/src/__tests__/injector-chain.test.ts +24 -16
- package/src/__tests__/injector-pkb-v2-silenced.test.ts +10 -7
- package/src/__tests__/lifecycle-memory-v2-seed.test.ts +154 -67
- package/src/__tests__/notification-decision-fallback.test.ts +91 -0
- package/src/__tests__/notification-decision-strategy.test.ts +22 -0
- package/src/__tests__/oauth-cli.test.ts +121 -0
- package/src/__tests__/relay-server.test.ts +46 -2
- package/src/__tests__/secret-prompt-log-hygiene.test.ts +7 -5
- package/src/__tests__/secret-prompter-channel-fallback.test.ts +7 -5
- package/src/__tests__/secret-response-routing.test.ts +7 -5
- package/src/__tests__/server-history-render.test.ts +82 -0
- package/src/__tests__/skill-include-graph.test.ts +31 -0
- package/src/__tests__/skill-load-tool.test.ts +44 -16
- package/src/__tests__/skills.test.ts +39 -0
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -42
- package/src/__tests__/tool-executor.test.ts +155 -0
- package/src/__tests__/voice-session-bridge.test.ts +3 -0
- package/src/__tests__/workspace-migration-069-seed-onboarding-threads.test.ts +120 -0
- package/src/__tests__/workspace-migration-071-remove-safe-storage-release-note.test.ts +206 -0
- package/src/__tests__/workspace-migration-safe-storage-limits-release.test.ts +15 -27
- package/src/agent/loop.ts +11 -0
- package/src/approvals/guardian-decision-primitive.ts +0 -13
- package/src/approvals/guardian-request-resolvers.ts +4 -32
- package/src/calls/call-constants.ts +5 -8
- package/src/calls/call-controller.ts +130 -67
- package/src/calls/relay-server.ts +7 -1
- package/src/calls/voice-session-bridge.ts +1 -1
- package/src/cli/commands/memory-v2.ts +7 -7
- package/src/cli/commands/oauth/__tests__/connect.test.ts +0 -254
- package/src/cli/commands/oauth/connect.ts +10 -52
- package/src/config/bundled-skills/app-builder/SKILL.md +1 -3
- package/src/config/feature-flag-registry.json +1 -17
- package/src/config/loader.ts +72 -19
- package/src/config/schemas/memory-v2.ts +1 -1
- package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +32 -0
- package/src/daemon/conversation-agent-loop-handlers.ts +32 -0
- package/src/daemon/conversation-agent-loop.ts +13 -10
- package/src/daemon/conversation-lifecycle.ts +22 -8
- package/src/daemon/conversation-surfaces.ts +16 -14
- package/src/daemon/conversation-tool-setup.ts +9 -5
- package/src/daemon/conversation.ts +1 -1
- package/src/daemon/handlers/shared.ts +26 -0
- package/src/daemon/host-bash-proxy.ts +1 -1
- package/src/daemon/host-browser-proxy.ts +1 -1
- package/src/daemon/host-cu-proxy.ts +1 -1
- package/src/daemon/host-file-proxy.ts +1 -1
- package/src/daemon/host-transfer-proxy.ts +2 -2
- package/src/daemon/lifecycle.ts +88 -73
- package/src/daemon/memory-v2-startup.ts +55 -14
- package/src/daemon/message-types/messages.ts +19 -1
- package/src/documents/document-store.ts +35 -1
- package/src/filing/filing-service.ts +2 -3
- package/src/heartbeat/heartbeat-service.ts +1 -1
- package/src/ipc/assistant-server.ts +93 -36
- package/src/ipc/skill-server.ts +99 -42
- package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +10 -57
- package/src/memory/context-search/sources/memory-v2.ts +1 -17
- package/src/memory/context-search/sources/memory.ts +2 -2
- package/src/memory/context-search/sources/pkb.ts +2 -3
- package/src/memory/graph/__tests__/conversation-graph-memory-v2-routing.test.ts +104 -61
- package/src/memory/graph/__tests__/handle-remember-v2.test.ts +11 -26
- package/src/memory/graph/conversation-graph-memory.ts +32 -9
- package/src/memory/graph/graph-search.test.ts +6 -5
- package/src/memory/graph/graph-search.ts +3 -4
- package/src/memory/graph/retriever.test.ts +12 -7
- package/src/memory/graph/retriever.ts +4 -5
- package/src/memory/graph/tool-handlers.ts +3 -4
- package/src/memory/graph/tools.ts +4 -4
- package/src/memory/indexer.ts +1 -2
- package/src/memory/jobs/__tests__/embed-concept-page.test.ts +116 -0
- package/src/memory/jobs/embed-concept-page.ts +223 -87
- package/src/memory/jobs-worker.ts +8 -4
- package/src/memory/pkb/pkb-search.test.ts +6 -5
- package/src/memory/pkb/pkb-search.ts +4 -5
- package/src/memory/qdrant-client.ts +3 -0
- package/src/memory/search/semantic.ts +4 -5
- package/src/memory/v2/__tests__/activation.test.ts +35 -5
- package/src/memory/v2/__tests__/consolidation-job.test.ts +21 -32
- package/src/memory/v2/__tests__/injection.test.ts +140 -23
- package/src/memory/v2/__tests__/qdrant.test.ts +310 -9
- package/src/memory/v2/__tests__/sim.test.ts +118 -7
- package/src/memory/v2/__tests__/static-context.test.ts +1 -13
- package/src/memory/v2/__tests__/sweep-job.test.ts +19 -33
- package/src/memory/v2/consolidation-job.ts +7 -8
- package/src/memory/v2/injection.ts +32 -12
- package/src/memory/v2/page-store.ts +39 -0
- package/src/memory/v2/prompts/consolidation.ts +5 -0
- package/src/memory/v2/qdrant.ts +209 -48
- package/src/memory/v2/sim.ts +67 -26
- package/src/memory/v2/static-context.ts +4 -8
- package/src/memory/v2/sweep-job.ts +5 -6
- package/src/memory/v2/types.ts +7 -0
- package/src/notifications/copy-composer.ts +46 -12
- package/src/notifications/decision-engine.ts +46 -0
- package/src/permissions/gateway-threshold-reader.ts +116 -8
- package/src/permissions/prompter.ts +86 -96
- package/src/permissions/secret-prompter.ts +31 -31
- package/src/plugins/defaults/injectors.ts +1 -2
- package/src/proactive-artifact/job.test.ts +51 -4
- package/src/proactive-artifact/job.ts +16 -2
- package/src/proactive-artifact/message-copy.ts +18 -1
- package/src/prompts/templates/SOUL.md +13 -28
- package/src/runtime/auth/route-policy.ts +1 -0
- package/src/runtime/channel-approvals.ts +3 -2
- package/src/runtime/guardian-reply-router.ts +0 -10
- package/src/runtime/pending-interactions.ts +19 -15
- package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +147 -0
- package/src/runtime/routes/approval-routes.ts +7 -3
- package/src/runtime/routes/consolidation-routes.ts +8 -9
- package/src/runtime/routes/conversation-query-routes.ts +44 -1
- package/src/runtime/routes/debug-bash-routes.ts +2 -0
- package/src/runtime/routes/filing-routes.ts +2 -3
- package/src/runtime/routes/inbound-stages/guardian-reply-intercept.ts +0 -3
- package/src/runtime/routes/memory-item-routes.test.ts +3 -9
- package/src/runtime/routes/memory-item-routes.ts +5 -6
- package/src/runtime/routes/memory-v2-routes.ts +103 -17
- package/src/skills/include-graph.ts +35 -13
- package/src/tools/document/document-tool.ts +20 -0
- package/src/tools/executor.ts +18 -2
- package/src/tools/memory/register.test.ts +7 -5
- package/src/tools/permission-checker.ts +15 -0
- package/src/tools/skills/load.ts +24 -20
- package/src/tools/tool-name-aliases.ts +19 -0
- package/src/tools/types.ts +19 -1
- package/src/workspace/migrations/067-release-notes-safe-storage-limits.ts +4 -62
- package/src/workspace/migrations/069-seed-onboarding-threads.ts +28 -0
- package/src/workspace/migrations/070-memory-v2-summary-schema-rebuild.ts +31 -0
- package/src/workspace/migrations/071-remove-safe-storage-release-note.ts +111 -0
- package/src/workspace/migrations/registry.ts +6 -0
|
@@ -0,0 +1,430 @@
|
|
|
1
|
+
import {
|
|
2
|
+
afterAll,
|
|
3
|
+
afterEach,
|
|
4
|
+
beforeEach,
|
|
5
|
+
describe,
|
|
6
|
+
expect,
|
|
7
|
+
test,
|
|
8
|
+
} from "bun:test";
|
|
9
|
+
import {
|
|
10
|
+
existsSync,
|
|
11
|
+
mkdtempSync,
|
|
12
|
+
rmSync,
|
|
13
|
+
unlinkSync,
|
|
14
|
+
} from "node:fs";
|
|
15
|
+
import { createConnection, createServer, type Server, type Socket } from "node:net";
|
|
16
|
+
import { tmpdir } from "node:os";
|
|
17
|
+
import { join } from "node:path";
|
|
18
|
+
|
|
19
|
+
import { SocketWatchdog, type SocketWatchdogLogger } from "./socket-watchdog.js";
|
|
20
|
+
|
|
21
|
+
// macOS caps Unix-socket paths at sizeof(sun_path)-1 == 103 bytes, so the
|
|
22
|
+
// shared test-preload temp dir is too long. Mint a short path under tmpdir
|
|
23
|
+
// for these tests.
|
|
24
|
+
const shortRoot = mkdtempSync(join(tmpdir(), "vmw-"));
|
|
25
|
+
const socketPath = join(shortRoot, "g.sock");
|
|
26
|
+
|
|
27
|
+
afterAll(() => {
|
|
28
|
+
try {
|
|
29
|
+
rmSync(shortRoot, { recursive: true, force: true });
|
|
30
|
+
} catch {
|
|
31
|
+
// best-effort
|
|
32
|
+
}
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
interface TestHarness {
|
|
36
|
+
watchdog: SocketWatchdog;
|
|
37
|
+
/** Mutated by tests to simulate stop()/restart. */
|
|
38
|
+
serverRef: { current: Server | null };
|
|
39
|
+
/** Servers handed to onRebind, captured for assertions + cleanup. */
|
|
40
|
+
rebinds: Array<{ newServer: Server; oldServer: Server }>;
|
|
41
|
+
log: SocketWatchdogLogger;
|
|
42
|
+
loggedErrors: Array<{ obj: object; msg?: string }>;
|
|
43
|
+
/** Tracks every server the harness factory produced, for cleanup. */
|
|
44
|
+
spawnedServers: Server[];
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
interface BuildOptions {
|
|
48
|
+
intervalMs?: number;
|
|
49
|
+
createServerOverride?: () => Server;
|
|
50
|
+
/** Override `getServer` to simulate races. */
|
|
51
|
+
getServerOverride?: () => Server | null;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function buildHarness(opts: BuildOptions): TestHarness {
|
|
55
|
+
const serverRef: { current: Server | null } = { current: null };
|
|
56
|
+
const rebinds: Array<{ newServer: Server; oldServer: Server }> = [];
|
|
57
|
+
const loggedErrors: Array<{ obj: object; msg?: string }> = [];
|
|
58
|
+
const spawnedServers: Server[] = [];
|
|
59
|
+
|
|
60
|
+
const log: SocketWatchdogLogger = {
|
|
61
|
+
info: () => {},
|
|
62
|
+
warn: () => {},
|
|
63
|
+
error: (obj, msg) => {
|
|
64
|
+
loggedErrors.push({ obj, msg });
|
|
65
|
+
},
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
const defaultFactory = () => {
|
|
69
|
+
const s = createServer();
|
|
70
|
+
s.on("error", () => {
|
|
71
|
+
/* tests don't care; suppress */
|
|
72
|
+
});
|
|
73
|
+
spawnedServers.push(s);
|
|
74
|
+
return s;
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
const watchdog = new SocketWatchdog({
|
|
78
|
+
socketPath,
|
|
79
|
+
intervalMs: opts.intervalMs ?? 0,
|
|
80
|
+
getServer: opts.getServerOverride ?? (() => serverRef.current),
|
|
81
|
+
createServer: opts.createServerOverride ?? defaultFactory,
|
|
82
|
+
onRebind: (newServer, oldServer) => {
|
|
83
|
+
rebinds.push({ newServer, oldServer });
|
|
84
|
+
serverRef.current = newServer;
|
|
85
|
+
// Mirror gateway behavior: close old server gracefully so its
|
|
86
|
+
// accept-loop drains. Close errors are not the watchdog's concern.
|
|
87
|
+
oldServer.close(() => {
|
|
88
|
+
/* drained */
|
|
89
|
+
});
|
|
90
|
+
},
|
|
91
|
+
log,
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
return { watchdog, serverRef, rebinds, log, loggedErrors, spawnedServers };
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Spin up a real listening server and install it into the harness. Returns
|
|
99
|
+
* once the kernel reports the socket file present on disk.
|
|
100
|
+
*/
|
|
101
|
+
async function startInitialServer(harness: TestHarness): Promise<Server> {
|
|
102
|
+
const server = createServer();
|
|
103
|
+
server.on("error", () => {
|
|
104
|
+
/* ignore */
|
|
105
|
+
});
|
|
106
|
+
harness.spawnedServers.push(server);
|
|
107
|
+
await new Promise<void>((resolve, reject) => {
|
|
108
|
+
server.once("error", reject);
|
|
109
|
+
server.once("listening", () => resolve());
|
|
110
|
+
server.listen(socketPath);
|
|
111
|
+
});
|
|
112
|
+
harness.serverRef.current = server;
|
|
113
|
+
return server;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function connectClient(path: string): Promise<Socket> {
|
|
117
|
+
return new Promise<Socket>((resolve, reject) => {
|
|
118
|
+
const client: Socket = createConnection(path, () => resolve(client));
|
|
119
|
+
client.on("error", reject);
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
async function closeServer(server: Server): Promise<void> {
|
|
124
|
+
await new Promise<void>((resolve) => {
|
|
125
|
+
server.close(() => resolve());
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
describe("SocketWatchdog", () => {
|
|
130
|
+
let harness: TestHarness | undefined;
|
|
131
|
+
const sockets: Socket[] = [];
|
|
132
|
+
|
|
133
|
+
beforeEach(() => {
|
|
134
|
+
harness = undefined;
|
|
135
|
+
// Defensive: clean up any leftover socket file from a previous test
|
|
136
|
+
// whose afterEach didn't fully drain.
|
|
137
|
+
if (existsSync(socketPath)) {
|
|
138
|
+
try {
|
|
139
|
+
unlinkSync(socketPath);
|
|
140
|
+
} catch {
|
|
141
|
+
/* ignore */
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
afterEach(async () => {
|
|
147
|
+
for (const s of sockets) {
|
|
148
|
+
if (!s.destroyed) s.destroy();
|
|
149
|
+
}
|
|
150
|
+
sockets.length = 0;
|
|
151
|
+
|
|
152
|
+
if (harness) {
|
|
153
|
+
harness.watchdog.stop();
|
|
154
|
+
// Close every server the harness produced, regardless of how the
|
|
155
|
+
// test left things. Closing an already-closed server is a no-op.
|
|
156
|
+
for (const s of harness.spawnedServers) {
|
|
157
|
+
try {
|
|
158
|
+
await closeServer(s);
|
|
159
|
+
} catch {
|
|
160
|
+
/* already closed */
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
harness = undefined;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
if (existsSync(socketPath)) {
|
|
167
|
+
try {
|
|
168
|
+
unlinkSync(socketPath);
|
|
169
|
+
} catch {
|
|
170
|
+
/* ignore */
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
test("rebindIfMissing is a no-op when the socket path exists", async () => {
|
|
176
|
+
harness = buildHarness({});
|
|
177
|
+
await startInitialServer(harness);
|
|
178
|
+
|
|
179
|
+
const rebound = await harness.watchdog.rebindIfMissing();
|
|
180
|
+
expect(rebound).toBe(false);
|
|
181
|
+
expect(harness.rebinds).toHaveLength(0);
|
|
182
|
+
expect(existsSync(socketPath)).toBe(true);
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
test("rebindIfMissing is a no-op when getServer returns null", async () => {
|
|
186
|
+
harness = buildHarness({});
|
|
187
|
+
// serverRef.current stays null.
|
|
188
|
+
const rebound = await harness.watchdog.rebindIfMissing();
|
|
189
|
+
expect(rebound).toBe(false);
|
|
190
|
+
expect(harness.rebinds).toHaveLength(0);
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
test("rebindIfMissing recreates the listener when the path is gone", async () => {
|
|
194
|
+
harness = buildHarness({});
|
|
195
|
+
const initial = await startInitialServer(harness);
|
|
196
|
+
expect(existsSync(socketPath)).toBe(true);
|
|
197
|
+
|
|
198
|
+
// Simulate the cleanup that wipes /run/* — unlink the path while the
|
|
199
|
+
// listener fd is still alive in the kernel.
|
|
200
|
+
unlinkSync(socketPath);
|
|
201
|
+
expect(existsSync(socketPath)).toBe(false);
|
|
202
|
+
|
|
203
|
+
const rebound = await harness.watchdog.rebindIfMissing();
|
|
204
|
+
expect(rebound).toBe(true);
|
|
205
|
+
expect(existsSync(socketPath)).toBe(true);
|
|
206
|
+
expect(harness.rebinds).toHaveLength(1);
|
|
207
|
+
expect(harness.rebinds[0]!.oldServer).toBe(initial);
|
|
208
|
+
expect(harness.serverRef.current).toBe(harness.rebinds[0]!.newServer);
|
|
209
|
+
|
|
210
|
+
// A fresh client can connect to the re-bound listener.
|
|
211
|
+
const client = await connectClient(socketPath);
|
|
212
|
+
sockets.push(client);
|
|
213
|
+
expect(client.destroyed).toBe(false);
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
test("connected clients survive a rebind", async () => {
|
|
217
|
+
harness = buildHarness({});
|
|
218
|
+
await startInitialServer(harness);
|
|
219
|
+
|
|
220
|
+
const survivor = await connectClient(socketPath);
|
|
221
|
+
sockets.push(survivor);
|
|
222
|
+
expect(survivor.destroyed).toBe(false);
|
|
223
|
+
|
|
224
|
+
unlinkSync(socketPath);
|
|
225
|
+
const rebound = await harness.watchdog.rebindIfMissing();
|
|
226
|
+
expect(rebound).toBe(true);
|
|
227
|
+
|
|
228
|
+
// Give the close-callback a moment to settle without churning the EL.
|
|
229
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
230
|
+
expect(survivor.destroyed).toBe(false);
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
test("rebindIfMissing aborts when getServer changes mid-listen (shutdown race)", async () => {
|
|
234
|
+
// Drive the race deterministically by mutating what getServer returns
|
|
235
|
+
// between its first call (precondition check) and its second call
|
|
236
|
+
// (post-listen race guard).
|
|
237
|
+
const initial = createServer();
|
|
238
|
+
initial.on("error", () => {});
|
|
239
|
+
await new Promise<void>((r) => {
|
|
240
|
+
initial.once("listening", () => r());
|
|
241
|
+
initial.listen(socketPath);
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
let getServerCalls = 0;
|
|
245
|
+
const rebinds: Array<{ newServer: Server; oldServer: Server }> = [];
|
|
246
|
+
const spawnedNewServers: Server[] = [];
|
|
247
|
+
|
|
248
|
+
const watchdog = new SocketWatchdog({
|
|
249
|
+
socketPath,
|
|
250
|
+
intervalMs: 0,
|
|
251
|
+
getServer: () => {
|
|
252
|
+
getServerCalls++;
|
|
253
|
+
// First call: precondition — initialServer is still around.
|
|
254
|
+
// Subsequent calls (race guard): null, simulating stop().
|
|
255
|
+
return getServerCalls === 1 ? initial : null;
|
|
256
|
+
},
|
|
257
|
+
createServer: () => {
|
|
258
|
+
const s = createServer();
|
|
259
|
+
s.on("error", () => {});
|
|
260
|
+
spawnedNewServers.push(s);
|
|
261
|
+
return s;
|
|
262
|
+
},
|
|
263
|
+
onRebind: (n, o) => {
|
|
264
|
+
rebinds.push({ newServer: n, oldServer: o });
|
|
265
|
+
},
|
|
266
|
+
log: { info: () => {}, warn: () => {}, error: () => {} },
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
unlinkSync(socketPath);
|
|
270
|
+
expect(existsSync(socketPath)).toBe(false);
|
|
271
|
+
|
|
272
|
+
const rebound = await watchdog.rebindIfMissing();
|
|
273
|
+
expect(rebound).toBe(false);
|
|
274
|
+
expect(rebinds).toHaveLength(0);
|
|
275
|
+
// The race guard should have unlinked the path the discarded server
|
|
276
|
+
// recreated, so a future start() doesn't see a phantom listener.
|
|
277
|
+
expect(existsSync(socketPath)).toBe(false);
|
|
278
|
+
// getServer was called at least twice — once for precondition, once
|
|
279
|
+
// for the race guard.
|
|
280
|
+
expect(getServerCalls).toBeGreaterThanOrEqual(2);
|
|
281
|
+
|
|
282
|
+
// Cleanup: initial is still listening on the unlinked path; close it.
|
|
283
|
+
await closeServer(initial);
|
|
284
|
+
for (const s of spawnedNewServers) {
|
|
285
|
+
try {
|
|
286
|
+
await closeServer(s);
|
|
287
|
+
} catch {
|
|
288
|
+
/* already closed by race guard */
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
test("rebindIfMissing returns false and logs when listen() rejects", async () => {
|
|
294
|
+
// Provide a factory whose listen() always errors, so rebindIfMissing
|
|
295
|
+
// hits the catch branch.
|
|
296
|
+
const initial = createServer();
|
|
297
|
+
initial.on("error", () => {});
|
|
298
|
+
await new Promise<void>((r) => {
|
|
299
|
+
initial.once("listening", () => r());
|
|
300
|
+
initial.listen(socketPath);
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
const rebinds: Array<{ newServer: Server; oldServer: Server }> = [];
|
|
304
|
+
const loggedErrors: Array<{ obj: object; msg?: string }> = [];
|
|
305
|
+
const failingFactory = () => {
|
|
306
|
+
const s = createServer();
|
|
307
|
+
s.on("error", () => {});
|
|
308
|
+
// Replace listen to immediately error.
|
|
309
|
+
const realListen = s.listen.bind(s);
|
|
310
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
311
|
+
(s as any).listen = (_path: string) => {
|
|
312
|
+
queueMicrotask(() => s.emit("error", new Error("simulated EADDRINUSE")));
|
|
313
|
+
return s;
|
|
314
|
+
};
|
|
315
|
+
// Keep realListen reference alive so TS doesn't complain
|
|
316
|
+
void realListen;
|
|
317
|
+
return s;
|
|
318
|
+
};
|
|
319
|
+
|
|
320
|
+
const watchdog = new SocketWatchdog({
|
|
321
|
+
socketPath,
|
|
322
|
+
intervalMs: 0,
|
|
323
|
+
getServer: () => initial,
|
|
324
|
+
createServer: failingFactory,
|
|
325
|
+
onRebind: (n, o) => rebinds.push({ newServer: n, oldServer: o }),
|
|
326
|
+
log: {
|
|
327
|
+
info: () => {},
|
|
328
|
+
warn: () => {},
|
|
329
|
+
error: (obj, msg) => loggedErrors.push({ obj, msg }),
|
|
330
|
+
},
|
|
331
|
+
});
|
|
332
|
+
|
|
333
|
+
unlinkSync(socketPath);
|
|
334
|
+
const rebound = await watchdog.rebindIfMissing();
|
|
335
|
+
expect(rebound).toBe(false);
|
|
336
|
+
expect(rebinds).toHaveLength(0);
|
|
337
|
+
expect(loggedErrors.length).toBeGreaterThan(0);
|
|
338
|
+
|
|
339
|
+
await closeServer(initial);
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
test("watchdog timer catches synchronous rebind errors so unhandled rejections don't escape", async () => {
|
|
343
|
+
// createServer factory throws synchronously — simulates EACCES on
|
|
344
|
+
// mkdir / a broken factory dependency.
|
|
345
|
+
const throwingFactory = () => {
|
|
346
|
+
throw new Error("boom — synchronous factory failure");
|
|
347
|
+
};
|
|
348
|
+
|
|
349
|
+
const initial = createServer();
|
|
350
|
+
initial.on("error", () => {});
|
|
351
|
+
await new Promise<void>((r) => {
|
|
352
|
+
initial.once("listening", () => r());
|
|
353
|
+
initial.listen(socketPath);
|
|
354
|
+
});
|
|
355
|
+
|
|
356
|
+
const loggedErrors: Array<{ obj: object; msg?: string }> = [];
|
|
357
|
+
const watchdog = new SocketWatchdog({
|
|
358
|
+
socketPath,
|
|
359
|
+
intervalMs: 5,
|
|
360
|
+
getServer: () => initial,
|
|
361
|
+
createServer: throwingFactory,
|
|
362
|
+
onRebind: () => {},
|
|
363
|
+
log: {
|
|
364
|
+
info: () => {},
|
|
365
|
+
warn: () => {},
|
|
366
|
+
error: (obj, msg) => loggedErrors.push({ obj, msg }),
|
|
367
|
+
},
|
|
368
|
+
});
|
|
369
|
+
|
|
370
|
+
unlinkSync(socketPath);
|
|
371
|
+
|
|
372
|
+
const seenRejections: unknown[] = [];
|
|
373
|
+
const onRejection = (reason: unknown) => seenRejections.push(reason);
|
|
374
|
+
process.on("unhandledRejection", onRejection);
|
|
375
|
+
|
|
376
|
+
try {
|
|
377
|
+
watchdog.start();
|
|
378
|
+
// Let the timer fire several times.
|
|
379
|
+
await new Promise((r) => setTimeout(r, 30));
|
|
380
|
+
watchdog.stop();
|
|
381
|
+
} finally {
|
|
382
|
+
process.off("unhandledRejection", onRejection);
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
expect(seenRejections).toHaveLength(0);
|
|
386
|
+
expect(loggedErrors.length).toBeGreaterThan(0);
|
|
387
|
+
|
|
388
|
+
await closeServer(initial);
|
|
389
|
+
});
|
|
390
|
+
|
|
391
|
+
test("start() polls and rebinds without manual ticking", async () => {
|
|
392
|
+
harness = buildHarness({ intervalMs: 10 });
|
|
393
|
+
await startInitialServer(harness);
|
|
394
|
+
harness.watchdog.start();
|
|
395
|
+
|
|
396
|
+
unlinkSync(socketPath);
|
|
397
|
+
|
|
398
|
+
// Wait up to 500ms for the timer to recover.
|
|
399
|
+
const deadline = Date.now() + 500;
|
|
400
|
+
while (harness.rebinds.length === 0 && Date.now() < deadline) {
|
|
401
|
+
await new Promise((r) => setTimeout(r, 5));
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
expect(harness.rebinds).toHaveLength(1);
|
|
405
|
+
expect(existsSync(socketPath)).toBe(true);
|
|
406
|
+
});
|
|
407
|
+
|
|
408
|
+
test("stop() prevents future rebinds from firing", async () => {
|
|
409
|
+
harness = buildHarness({ intervalMs: 10 });
|
|
410
|
+
await startInitialServer(harness);
|
|
411
|
+
harness.watchdog.start();
|
|
412
|
+
|
|
413
|
+
// First recovery cycle.
|
|
414
|
+
unlinkSync(socketPath);
|
|
415
|
+
let deadline = Date.now() + 500;
|
|
416
|
+
while (harness.rebinds.length < 1 && Date.now() < deadline) {
|
|
417
|
+
await new Promise((r) => setTimeout(r, 5));
|
|
418
|
+
}
|
|
419
|
+
expect(harness.rebinds).toHaveLength(1);
|
|
420
|
+
|
|
421
|
+
harness.watchdog.stop();
|
|
422
|
+
const stoppedAt = harness.rebinds.length;
|
|
423
|
+
|
|
424
|
+
// Unlink again. Wait three intervals; no new rebind should appear.
|
|
425
|
+
unlinkSync(socketPath);
|
|
426
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
427
|
+
expect(harness.rebinds).toHaveLength(stoppedAt);
|
|
428
|
+
expect(existsSync(socketPath)).toBe(false);
|
|
429
|
+
});
|
|
430
|
+
});
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Resilience helper for Unix-domain-socket IPC servers: re-binds the
|
|
3
|
+
* listening socket when its on-disk path entry has been removed (e.g. by a
|
|
4
|
+
* tmpfs sweep or rogue cleanup of `/run/*`).
|
|
5
|
+
*
|
|
6
|
+
* Existing connected sockets survive the re-bind because the kernel keeps
|
|
7
|
+
* connection inodes alive independently of the listener path; only new
|
|
8
|
+
* `connect()` calls require the path to exist.
|
|
9
|
+
*
|
|
10
|
+
* Consumers wire their `Server` reference into the watchdog via callbacks
|
|
11
|
+
* rather than passing the server directly so the watchdog can guard against
|
|
12
|
+
* shutdown/restart races mid-rebind.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { existsSync, mkdirSync, unlinkSync } from "node:fs";
|
|
16
|
+
import type { Server } from "node:net";
|
|
17
|
+
import { dirname } from "node:path";
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Minimal logger surface (pino-compatible). Each method receives a context
|
|
21
|
+
* object plus an optional human-readable message.
|
|
22
|
+
*/
|
|
23
|
+
export interface SocketWatchdogLogger {
|
|
24
|
+
info(obj: object, msg?: string): void;
|
|
25
|
+
warn(obj: object, msg?: string): void;
|
|
26
|
+
error(obj: object, msg?: string): void;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface SocketWatchdogOptions {
|
|
30
|
+
/** Absolute path to the Unix socket file the consumer is listening on. */
|
|
31
|
+
socketPath: string;
|
|
32
|
+
/**
|
|
33
|
+
* How often to stat the socket path. Set to `0` to disable. Defaults to
|
|
34
|
+
* 5000ms.
|
|
35
|
+
*/
|
|
36
|
+
intervalMs?: number;
|
|
37
|
+
/**
|
|
38
|
+
* Returns the consumer's current listening server. The watchdog uses this
|
|
39
|
+
* both as a precondition (no rebind when null) and as a generation marker
|
|
40
|
+
* to detect shutdown/restart races mid-rebind.
|
|
41
|
+
*/
|
|
42
|
+
getServer: () => Server | null;
|
|
43
|
+
/**
|
|
44
|
+
* Factory for a fresh listening Server. Called by the watchdog when a
|
|
45
|
+
* rebind is needed; the watchdog drives `.listen(socketPath)` and waits
|
|
46
|
+
* for the `listening` event before installing.
|
|
47
|
+
*/
|
|
48
|
+
createServer: () => Server;
|
|
49
|
+
/**
|
|
50
|
+
* Invoked when a rebind succeeds. The consumer is responsible for
|
|
51
|
+
* swapping its primary server reference to `newServer` and disposing of
|
|
52
|
+
* `oldServer` (typically by tracking it as a legacy listener while
|
|
53
|
+
* in-flight clients drain, then closing it).
|
|
54
|
+
*/
|
|
55
|
+
onRebind: (newServer: Server, oldServer: Server) => void;
|
|
56
|
+
/** Pino-compatible logger. */
|
|
57
|
+
log: SocketWatchdogLogger;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const DEFAULT_INTERVAL_MS = 5000;
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Ensure the directory containing `socketPath` exists. Created with mode
|
|
64
|
+
* `0o700` so a freshly-spawned dir on a tmpfs mount doesn't leak the IPC
|
|
65
|
+
* surface to other UIDs. Existing directories keep their permissions —
|
|
66
|
+
* `mkdir` only applies the mode to directories it creates.
|
|
67
|
+
*/
|
|
68
|
+
export function ensureSocketDir(socketPath: string): void {
|
|
69
|
+
const socketDir = dirname(socketPath);
|
|
70
|
+
if (!existsSync(socketDir)) {
|
|
71
|
+
mkdirSync(socketDir, { recursive: true, mode: 0o700 });
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Watchdog that periodically stats a Unix socket file and re-binds the
|
|
77
|
+
* listener when the path has been removed.
|
|
78
|
+
*
|
|
79
|
+
* Lifecycle:
|
|
80
|
+
* - Construct with the consumer's callbacks.
|
|
81
|
+
* - Call {@link start} after the consumer's initial `listen()` succeeds.
|
|
82
|
+
* - Call {@link stop} during shutdown (before closing the underlying
|
|
83
|
+
* server) so an in-flight rebind doesn't resurrect the listener.
|
|
84
|
+
*
|
|
85
|
+
* The watchdog timer is `unref`-ed so it never keeps the event loop alive
|
|
86
|
+
* on its own.
|
|
87
|
+
*/
|
|
88
|
+
export class SocketWatchdog {
|
|
89
|
+
private readonly socketPath: string;
|
|
90
|
+
private readonly intervalMs: number;
|
|
91
|
+
private readonly getServer: () => Server | null;
|
|
92
|
+
private readonly createServer: () => Server;
|
|
93
|
+
private readonly onRebind: (newServer: Server, oldServer: Server) => void;
|
|
94
|
+
private readonly log: SocketWatchdogLogger;
|
|
95
|
+
|
|
96
|
+
private handle: ReturnType<typeof setInterval> | null = null;
|
|
97
|
+
|
|
98
|
+
constructor(options: SocketWatchdogOptions) {
|
|
99
|
+
this.socketPath = options.socketPath;
|
|
100
|
+
this.intervalMs = options.intervalMs ?? DEFAULT_INTERVAL_MS;
|
|
101
|
+
this.getServer = options.getServer;
|
|
102
|
+
this.createServer = options.createServer;
|
|
103
|
+
this.onRebind = options.onRebind;
|
|
104
|
+
this.log = options.log;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Begin polling the socket path. No-op if `intervalMs <= 0` or the
|
|
109
|
+
* watchdog is already running.
|
|
110
|
+
*/
|
|
111
|
+
start(): void {
|
|
112
|
+
if (this.intervalMs <= 0 || this.handle !== null) return;
|
|
113
|
+
this.handle = setInterval(() => {
|
|
114
|
+
// The async entry path of rebindIfMissing performs filesystem work
|
|
115
|
+
// (ensureSocketDir, createServer) before its inner try/catch, so a
|
|
116
|
+
// synchronous throw — e.g. EACCES on a read-only fs — would surface
|
|
117
|
+
// as an unhandled rejection on every tick. Catch here so the timer
|
|
118
|
+
// stays quiet on persistent failure modes.
|
|
119
|
+
this.rebindIfMissing().catch((err) => {
|
|
120
|
+
this.log.error(
|
|
121
|
+
{ err, path: this.socketPath },
|
|
122
|
+
"Watchdog rebind failed unexpectedly",
|
|
123
|
+
);
|
|
124
|
+
});
|
|
125
|
+
}, this.intervalMs);
|
|
126
|
+
this.handle.unref?.();
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/** Stop the polling timer. Safe to call multiple times. */
|
|
130
|
+
stop(): void {
|
|
131
|
+
if (this.handle !== null) {
|
|
132
|
+
clearInterval(this.handle);
|
|
133
|
+
this.handle = null;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Re-bind the listening socket if its path entry is missing on disk.
|
|
139
|
+
*
|
|
140
|
+
* Public for tests so the watchdog can be exercised deterministically
|
|
141
|
+
* without waiting for the interval. Returns `true` when a re-bind was
|
|
142
|
+
* performed, `false` when the socket was already healthy, the consumer
|
|
143
|
+
* is not running, or a shutdown/restart raced the rebind.
|
|
144
|
+
*/
|
|
145
|
+
async rebindIfMissing(): Promise<boolean> {
|
|
146
|
+
const initialServer = this.getServer();
|
|
147
|
+
if (initialServer === null) return false;
|
|
148
|
+
if (existsSync(this.socketPath)) return false;
|
|
149
|
+
|
|
150
|
+
this.log.warn(
|
|
151
|
+
{ path: this.socketPath },
|
|
152
|
+
"IPC socket path missing on disk — re-binding listener",
|
|
153
|
+
);
|
|
154
|
+
|
|
155
|
+
ensureSocketDir(this.socketPath);
|
|
156
|
+
|
|
157
|
+
const newServer = this.createServer();
|
|
158
|
+
try {
|
|
159
|
+
await new Promise<void>((resolve, reject) => {
|
|
160
|
+
const onError = (err: unknown) => {
|
|
161
|
+
newServer.off("listening", onListening);
|
|
162
|
+
reject(err);
|
|
163
|
+
};
|
|
164
|
+
const onListening = () => {
|
|
165
|
+
newServer.off("error", onError);
|
|
166
|
+
resolve();
|
|
167
|
+
};
|
|
168
|
+
newServer.once("error", onError);
|
|
169
|
+
newServer.once("listening", onListening);
|
|
170
|
+
newServer.listen(this.socketPath);
|
|
171
|
+
});
|
|
172
|
+
} catch (err) {
|
|
173
|
+
this.log.error(
|
|
174
|
+
{ err, path: this.socketPath },
|
|
175
|
+
"Failed to re-bind IPC socket — will retry on next watchdog tick",
|
|
176
|
+
);
|
|
177
|
+
try {
|
|
178
|
+
newServer.close();
|
|
179
|
+
} catch {
|
|
180
|
+
/* ignore */
|
|
181
|
+
}
|
|
182
|
+
return false;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Race guard: while we were awaiting listen(), the consumer may have
|
|
186
|
+
// stopped, restarted, or otherwise replaced its server reference.
|
|
187
|
+
// Installing newServer would resurrect a listener after shutdown
|
|
188
|
+
// (keeping the process alive and accepting IPC again). Discard the
|
|
189
|
+
// new server instead.
|
|
190
|
+
if (this.getServer() !== initialServer) {
|
|
191
|
+
try {
|
|
192
|
+
newServer.close();
|
|
193
|
+
} catch {
|
|
194
|
+
/* ignore */
|
|
195
|
+
}
|
|
196
|
+
// newServer.listen() recreated the path on disk. If our listen won
|
|
197
|
+
// the race, the file is sitting there — clean it up so it doesn't
|
|
198
|
+
// shadow a future start().
|
|
199
|
+
if (existsSync(this.socketPath)) {
|
|
200
|
+
try {
|
|
201
|
+
unlinkSync(this.socketPath);
|
|
202
|
+
} catch {
|
|
203
|
+
/* ignore */
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
this.log.warn(
|
|
207
|
+
{ path: this.socketPath },
|
|
208
|
+
"IPC server state changed during rebind — discarded new listener",
|
|
209
|
+
);
|
|
210
|
+
return false;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
this.onRebind(newServer, initialServer);
|
|
214
|
+
|
|
215
|
+
this.log.info(
|
|
216
|
+
{ path: this.socketPath },
|
|
217
|
+
"IPC socket re-bound after path loss",
|
|
218
|
+
);
|
|
219
|
+
return true;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "NodeNext",
|
|
5
|
+
"moduleResolution": "NodeNext",
|
|
6
|
+
"strict": true,
|
|
7
|
+
"esModuleInterop": true,
|
|
8
|
+
"skipLibCheck": true,
|
|
9
|
+
"forceConsistentCasingInFileNames": true,
|
|
10
|
+
"resolveJsonModule": true,
|
|
11
|
+
"declaration": true,
|
|
12
|
+
"declarationMap": true,
|
|
13
|
+
"sourceMap": true,
|
|
14
|
+
"outDir": "./dist",
|
|
15
|
+
"rootDir": "./src",
|
|
16
|
+
"types": ["bun-types"]
|
|
17
|
+
},
|
|
18
|
+
"include": ["src/**/*"],
|
|
19
|
+
"exclude": ["node_modules", "dist"]
|
|
20
|
+
}
|