@oh-my-pi/pi-coding-agent 16.1.2 → 16.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +44 -1
- package/dist/cli.js +2990 -2991
- package/dist/types/config/model-resolver.d.ts +3 -3
- package/dist/types/mnemopi/embed-client.d.ts +70 -0
- package/dist/types/mnemopi/embed-protocol.d.ts +52 -0
- package/dist/types/mnemopi/embed-worker.d.ts +12 -0
- package/dist/types/mnemopi/state.d.ts +9 -1
- package/dist/types/modes/components/cache-invalidation-marker.d.ts +23 -10
- package/dist/types/modes/components/status-line/component.d.ts +2 -3
- package/dist/types/sdk.d.ts +12 -0
- package/dist/types/session/agent-session.d.ts +2 -0
- package/dist/types/session/agent-storage.d.ts +2 -0
- package/dist/types/session/auth-broker-config.d.ts +3 -2
- package/dist/types/session/history-storage.d.ts +1 -1
- package/dist/types/session/tool-choice-queue.d.ts +2 -0
- package/dist/types/tools/image-gen.d.ts +2 -2
- package/dist/types/tools/index.d.ts +2 -0
- package/dist/types/tui/hyperlink.d.ts +3 -2
- package/dist/types/utils/image-loading.d.ts +1 -1
- package/dist/types/utils/ipc.d.ts +22 -0
- package/dist/types/web/search/providers/perplexity-auth.d.ts +37 -0
- package/package.json +12 -12
- package/src/cli/bench-cli.ts +33 -2
- package/src/cli/dry-balance-cli.ts +4 -2
- package/src/cli.ts +8 -0
- package/src/commands/token.ts +52 -33
- package/src/config/append-only-context-mode.ts +45 -0
- package/src/config/model-discovery.ts +3 -0
- package/src/config/model-registry.ts +21 -3
- package/src/config/model-resolver.ts +31 -8
- package/src/discovery/builtin-rules/ts-no-return-type.md +0 -1
- package/src/extensibility/plugins/manager.ts +82 -22
- package/src/lsp/client.ts +24 -0
- package/src/mnemopi/backend.ts +49 -3
- package/src/mnemopi/embed-client.ts +401 -0
- package/src/mnemopi/embed-protocol.ts +35 -0
- package/src/mnemopi/embed-worker.ts +113 -0
- package/src/mnemopi/state.ts +29 -1
- package/src/modes/components/cache-invalidation-marker.ts +31 -15
- package/src/modes/components/custom-editor.test.ts +4 -3
- package/src/modes/components/custom-editor.ts +1 -1
- package/src/modes/components/model-selector.ts +2 -2
- package/src/modes/components/status-line/component.ts +64 -18
- package/src/modes/components/welcome.ts +1 -1
- package/src/modes/controllers/event-controller.ts +8 -0
- package/src/modes/controllers/selector-controller.ts +2 -2
- package/src/modes/theme/theme.ts +69 -0
- package/src/sdk.ts +37 -0
- package/src/session/agent-session.ts +13 -0
- package/src/session/agent-storage.ts +14 -0
- package/src/session/auth-broker-config.ts +2 -1
- package/src/session/history-storage.ts +13 -1
- package/src/session/tool-choice-queue.ts +6 -0
- package/src/stt/asr-client.ts +2 -7
- package/src/tiny/title-client.ts +2 -7
- package/src/tools/image-gen.ts +4 -8
- package/src/tools/index.ts +2 -0
- package/src/tools/render-utils.ts +4 -1
- package/src/tools/resolve.ts +1 -0
- package/src/tts/tts-client.ts +2 -7
- package/src/tui/hyperlink.ts +6 -3
- package/src/utils/image-loading.ts +12 -2
- package/src/utils/ipc.ts +38 -0
- package/src/web/search/providers/perplexity-auth.ts +133 -0
- package/src/web/search/providers/perplexity.ts +2 -125
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
import * as path from "node:path";
|
|
2
|
+
import { $env, isBunTestRuntime, isCompiledBinary, logger, workerHostEntry } from "@oh-my-pi/pi-utils";
|
|
3
|
+
import type { Subprocess } from "bun";
|
|
4
|
+
import type { MnemopiEmbedModelId, MnemopiEmbedWorkerInbound, MnemopiEmbedWorkerOutbound } from "./embed-protocol";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Abstraction over the mnemopi embeddings subprocess. The runtime
|
|
8
|
+
* implementation is a Bun child process so `onnxruntime-node`'s NAPI
|
|
9
|
+
* constructor + finalizer never run inside the main agent address space —
|
|
10
|
+
* those destructors segfault Bun on Windows when mnemopi's local embedding
|
|
11
|
+
* provider loads fastembed in the main process (issue #3031; the mnemopi
|
|
12
|
+
* sibling of the tiny-model fix from #1606 / #1607).
|
|
13
|
+
*/
|
|
14
|
+
export interface MnemopiEmbedWorkerHandle {
|
|
15
|
+
send(message: MnemopiEmbedWorkerInbound): void;
|
|
16
|
+
onMessage(handler: (message: MnemopiEmbedWorkerOutbound) => void): () => void;
|
|
17
|
+
onError(handler: (error: Error) => void): () => void;
|
|
18
|
+
terminate(): Promise<void>;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
type PendingRequest =
|
|
22
|
+
| { kind: "init"; model: MnemopiEmbedModelId; resolve: (ok: boolean) => void }
|
|
23
|
+
| { kind: "embed"; model: MnemopiEmbedModelId; resolve: (vectors: number[][] | Error) => void };
|
|
24
|
+
|
|
25
|
+
// Cold-starting the worker from a compiled binary (decompress + module graph load)
|
|
26
|
+
// is slow on contended CI runners; the probe only proves the worker spawns and
|
|
27
|
+
// ponges, so a generous bound removes flakes without weakening the check.
|
|
28
|
+
const SMOKE_TEST_TIMEOUT_MS = 30_000;
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Hidden subcommand on the main CLI that boots the mnemopi embeddings worker
|
|
32
|
+
* in the spawned subprocess. Kept in sync with the dispatch in `cli.ts`.
|
|
33
|
+
*/
|
|
34
|
+
export const MNEMOPI_EMBED_WORKER_ARG = "__omp_worker_mnemopi_embed";
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Env handed to the embeddings subprocess. The child inherits the parent's
|
|
38
|
+
* environment verbatim — fastembed honours `HF_HUB_*`, `HTTPS_PROXY`, etc.,
|
|
39
|
+
* and our `loadFastembed()` reads the same `OMP_*` runtime-install knobs the
|
|
40
|
+
* parent uses. `process.env` carries `undefined` slots that Bun.spawn rejects;
|
|
41
|
+
* filter them out.
|
|
42
|
+
*/
|
|
43
|
+
function mnemopiEmbedWorkerEnv(): Record<string, string> {
|
|
44
|
+
const base = $env as Record<string, string | undefined>;
|
|
45
|
+
const merged: Record<string, string> = {};
|
|
46
|
+
for (const key in base) {
|
|
47
|
+
const value = base[key];
|
|
48
|
+
if (typeof value === "string") merged[key] = value;
|
|
49
|
+
}
|
|
50
|
+
return merged;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
interface MnemopiEmbedWorkerSpawnCommand {
|
|
54
|
+
cmd: string[];
|
|
55
|
+
cwd?: string;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Resolve the command used to relaunch the agent CLI into mnemopi-embed-worker
|
|
60
|
+
* mode. In a compiled binary the entry point is the binary itself; otherwise
|
|
61
|
+
* re-enter the declared worker-host entry (cwd-relative for reliable Bun IPC),
|
|
62
|
+
* falling back to this package's own `src/cli.ts` when no host entry is
|
|
63
|
+
* declared (bun test, SDK embedding).
|
|
64
|
+
*/
|
|
65
|
+
function mnemopiEmbedWorkerSpawnCmd(): MnemopiEmbedWorkerSpawnCommand {
|
|
66
|
+
if (isCompiledBinary()) return { cmd: [process.execPath, MNEMOPI_EMBED_WORKER_ARG] };
|
|
67
|
+
const hostEntry = workerHostEntry();
|
|
68
|
+
if (hostEntry) {
|
|
69
|
+
return {
|
|
70
|
+
cmd: [process.execPath, path.basename(hostEntry), MNEMOPI_EMBED_WORKER_ARG],
|
|
71
|
+
cwd: path.dirname(hostEntry),
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
const packageRoot = path.resolve(import.meta.dir, "..", "..");
|
|
75
|
+
return { cmd: [process.execPath, "src/cli.ts", MNEMOPI_EMBED_WORKER_ARG], cwd: packageRoot };
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
interface SpawnedSubprocess {
|
|
79
|
+
proc: Subprocess<"ignore", "ignore", "ignore">;
|
|
80
|
+
inbound: Set<(message: MnemopiEmbedWorkerOutbound) => void>;
|
|
81
|
+
errors: Set<(error: Error) => void>;
|
|
82
|
+
/**
|
|
83
|
+
* Flipped to `true` right before the deliberate SIGKILL so `onExit` can
|
|
84
|
+
* distinguish the expected hard-kill from a crash (SIGSEGV from a native
|
|
85
|
+
* fault, OOM SIGKILL, operator `kill -9`). Only the latter surfaces as a
|
|
86
|
+
* worker error so callers don't await forever.
|
|
87
|
+
*/
|
|
88
|
+
intentionalExit: { value: boolean };
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Spawn the mnemopi embeddings worker as a subprocess. Exported for tests and
|
|
93
|
+
* the smoke probe; production callers go through {@link spawnMnemopiEmbedWorker}.
|
|
94
|
+
*/
|
|
95
|
+
export function createMnemopiEmbedSubprocess(): SpawnedSubprocess {
|
|
96
|
+
const inbound = new Set<(message: MnemopiEmbedWorkerOutbound) => void>();
|
|
97
|
+
const errors = new Set<(error: Error) => void>();
|
|
98
|
+
const intentionalExit = { value: false };
|
|
99
|
+
const spawnCommand = mnemopiEmbedWorkerSpawnCmd();
|
|
100
|
+
const proc = Bun.spawn({
|
|
101
|
+
cmd: spawnCommand.cmd,
|
|
102
|
+
cwd: spawnCommand.cwd,
|
|
103
|
+
env: mnemopiEmbedWorkerEnv(),
|
|
104
|
+
stdin: "ignore",
|
|
105
|
+
stdout: "ignore",
|
|
106
|
+
stderr: "ignore",
|
|
107
|
+
serialization: "advanced",
|
|
108
|
+
windowsHide: true,
|
|
109
|
+
ipc(message) {
|
|
110
|
+
for (const handler of inbound) handler(message as MnemopiEmbedWorkerOutbound);
|
|
111
|
+
},
|
|
112
|
+
onExit(_proc, exitCode, signalCode) {
|
|
113
|
+
if (exitCode === 0) return;
|
|
114
|
+
if (exitCode === null && intentionalExit.value) return;
|
|
115
|
+
const reason = exitCode !== null ? `code ${exitCode}` : `signal ${signalCode ?? "unknown"}`;
|
|
116
|
+
const err = new Error(`mnemopi embed subprocess exited with ${reason}`);
|
|
117
|
+
for (const handler of errors) handler(err);
|
|
118
|
+
},
|
|
119
|
+
});
|
|
120
|
+
// Don't keep the parent event loop alive on an idle worker; the agent
|
|
121
|
+
// dispose path calls `terminate()` explicitly. Bun's test runner starves
|
|
122
|
+
// IPC for unref'd subprocesses, so keep it referenced only under tests.
|
|
123
|
+
if (!isBunTestRuntime()) proc.unref();
|
|
124
|
+
return { proc, inbound, errors, intentionalExit };
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function wrapSubprocess({ proc, inbound, errors, intentionalExit }: SpawnedSubprocess): MnemopiEmbedWorkerHandle {
|
|
128
|
+
return {
|
|
129
|
+
send(message) {
|
|
130
|
+
try {
|
|
131
|
+
proc.send(message);
|
|
132
|
+
} catch (error) {
|
|
133
|
+
logger.debug("mnemopi-embed: send to subprocess failed", {
|
|
134
|
+
error: error instanceof Error ? error.message : String(error),
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
},
|
|
138
|
+
onMessage(handler) {
|
|
139
|
+
inbound.add(handler);
|
|
140
|
+
return () => inbound.delete(handler);
|
|
141
|
+
},
|
|
142
|
+
onError(handler) {
|
|
143
|
+
errors.add(handler);
|
|
144
|
+
return () => errors.delete(handler);
|
|
145
|
+
},
|
|
146
|
+
async terminate() {
|
|
147
|
+
// SIGKILL: the point of subprocess isolation is that the parent
|
|
148
|
+
// never runs `onnxruntime-node`'s NAPI finalizer (it crashes Bun
|
|
149
|
+
// on Windows). Hard-kill instead; the OS reclaims the model
|
|
150
|
+
// memory. Flip the intentional-exit flag *before* killing so
|
|
151
|
+
// `onExit` can tell this apart from a native crash.
|
|
152
|
+
intentionalExit.value = true;
|
|
153
|
+
try {
|
|
154
|
+
proc.kill("SIGKILL");
|
|
155
|
+
} catch {
|
|
156
|
+
// Already gone.
|
|
157
|
+
}
|
|
158
|
+
},
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function spawnInlineUnavailableWorker(error: unknown): MnemopiEmbedWorkerHandle {
|
|
163
|
+
const listeners = new Set<(message: MnemopiEmbedWorkerOutbound) => void>();
|
|
164
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
165
|
+
const emit = (message: MnemopiEmbedWorkerOutbound): void => {
|
|
166
|
+
for (const listener of listeners) listener(message);
|
|
167
|
+
};
|
|
168
|
+
return {
|
|
169
|
+
send(message) {
|
|
170
|
+
queueMicrotask(() => {
|
|
171
|
+
if (message.type === "ping") {
|
|
172
|
+
emit({ type: "pong", id: message.id });
|
|
173
|
+
return;
|
|
174
|
+
}
|
|
175
|
+
emit({ type: "error", id: message.id, error: errorMessage });
|
|
176
|
+
});
|
|
177
|
+
},
|
|
178
|
+
onMessage(handler) {
|
|
179
|
+
listeners.add(handler);
|
|
180
|
+
return () => listeners.delete(handler);
|
|
181
|
+
},
|
|
182
|
+
onError() {
|
|
183
|
+
return () => {};
|
|
184
|
+
},
|
|
185
|
+
async terminate() {
|
|
186
|
+
listeners.clear();
|
|
187
|
+
},
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function spawnMnemopiEmbedWorker(): MnemopiEmbedWorkerHandle {
|
|
192
|
+
try {
|
|
193
|
+
return wrapSubprocess(createMnemopiEmbedSubprocess());
|
|
194
|
+
} catch (error) {
|
|
195
|
+
logger.warn("mnemopi embed worker spawn failed; local embeddings disabled", {
|
|
196
|
+
error: error instanceof Error ? error.message : String(error),
|
|
197
|
+
});
|
|
198
|
+
return spawnInlineUnavailableWorker(error);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
function logWorkerMessage(message: Extract<MnemopiEmbedWorkerOutbound, { type: "log" }>): void {
|
|
203
|
+
if (message.level === "debug") logger.debug(message.msg, message.meta);
|
|
204
|
+
else if (message.level === "warn") logger.warn(message.msg, message.meta);
|
|
205
|
+
else logger.error(message.msg, message.meta);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Per-model wrapper produced by {@link MnemopiEmbedClient.initialize}.
|
|
210
|
+
* `embed()` round-trips one batch of texts through the worker subprocess and
|
|
211
|
+
* yields the resulting vectors in a single asynchronous batch — fastembed's
|
|
212
|
+
* own iterator was emitting batches that we collect on the child side anyway,
|
|
213
|
+
* and serializing per-batch over IPC would not improve throughput.
|
|
214
|
+
*/
|
|
215
|
+
export interface MnemopiSubprocessEmbeddingModel {
|
|
216
|
+
embed(texts: string[], batchSize?: number): AsyncIterable<number[][]>;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
export class MnemopiEmbedClient {
|
|
220
|
+
#worker: MnemopiEmbedWorkerHandle | null = null;
|
|
221
|
+
#unsubscribeMessage: (() => void) | null = null;
|
|
222
|
+
#unsubscribeError: (() => void) | null = null;
|
|
223
|
+
#pending = new Map<string, PendingRequest>();
|
|
224
|
+
#nextRequestId = 0;
|
|
225
|
+
#spawnWorker: () => MnemopiEmbedWorkerHandle;
|
|
226
|
+
|
|
227
|
+
constructor(spawnWorker: () => MnemopiEmbedWorkerHandle = spawnMnemopiEmbedWorker) {
|
|
228
|
+
this.#spawnWorker = spawnWorker;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* Load the named fastembed model inside the subprocess. Resolves to a
|
|
233
|
+
* thin wrapper whose `embed()` round-trips through the same worker, or
|
|
234
|
+
* `null` when the worker cannot init the model (missing peer, native
|
|
235
|
+
* load failure, etc.). Multiple calls with the same model reuse the
|
|
236
|
+
* single in-flight worker; calling with a different model loads it on
|
|
237
|
+
* the child without restarting the process.
|
|
238
|
+
*/
|
|
239
|
+
async initialize(
|
|
240
|
+
model: MnemopiEmbedModelId,
|
|
241
|
+
cacheDir: string | undefined,
|
|
242
|
+
): Promise<MnemopiSubprocessEmbeddingModel | null> {
|
|
243
|
+
try {
|
|
244
|
+
const worker = this.#ensureWorker();
|
|
245
|
+
const id = String(++this.#nextRequestId);
|
|
246
|
+
const { promise, resolve } = Promise.withResolvers<boolean>();
|
|
247
|
+
this.#pending.set(id, { kind: "init", model, resolve });
|
|
248
|
+
try {
|
|
249
|
+
worker.send({ type: "init", id, model, cacheDir });
|
|
250
|
+
const ok = await promise;
|
|
251
|
+
if (!ok) return null;
|
|
252
|
+
} finally {
|
|
253
|
+
this.#pending.delete(id);
|
|
254
|
+
}
|
|
255
|
+
} catch (error) {
|
|
256
|
+
logger.debug("mnemopi-embed: init failed", {
|
|
257
|
+
model,
|
|
258
|
+
error: error instanceof Error ? error.message : String(error),
|
|
259
|
+
});
|
|
260
|
+
return null;
|
|
261
|
+
}
|
|
262
|
+
return { embed: (texts, batchSize) => this.#streamEmbed(model, cacheDir, texts, batchSize) };
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
async terminate(): Promise<void> {
|
|
266
|
+
const worker = this.#worker;
|
|
267
|
+
this.#worker = null;
|
|
268
|
+
this.#unsubscribeMessage?.();
|
|
269
|
+
this.#unsubscribeMessage = null;
|
|
270
|
+
this.#unsubscribeError?.();
|
|
271
|
+
this.#unsubscribeError = null;
|
|
272
|
+
for (const pending of this.#pending.values()) {
|
|
273
|
+
if (pending.kind === "init") pending.resolve(false);
|
|
274
|
+
else pending.resolve(new Error("mnemopi embed worker terminated"));
|
|
275
|
+
}
|
|
276
|
+
this.#pending.clear();
|
|
277
|
+
try {
|
|
278
|
+
await worker?.terminate();
|
|
279
|
+
} catch {
|
|
280
|
+
// Already gone.
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
async #embed(
|
|
285
|
+
model: MnemopiEmbedModelId,
|
|
286
|
+
cacheDir: string | undefined,
|
|
287
|
+
texts: string[],
|
|
288
|
+
batchSize: number | undefined,
|
|
289
|
+
): Promise<number[][]> {
|
|
290
|
+
const worker = this.#ensureWorker();
|
|
291
|
+
const id = String(++this.#nextRequestId);
|
|
292
|
+
const { promise, resolve } = Promise.withResolvers<number[][] | Error>();
|
|
293
|
+
this.#pending.set(id, { kind: "embed", model, resolve });
|
|
294
|
+
try {
|
|
295
|
+
// Carry the (model, cacheDir) the wrapper was bound to in every
|
|
296
|
+
// embed message: dispose + respawn between two embeds on the same
|
|
297
|
+
// `LocalEmbeddingModel` handle would otherwise hit a fresh
|
|
298
|
+
// worker's "embed before init" guard. Worker `ensureLoaded` is
|
|
299
|
+
// idempotent so steady-state embeds pay no extra cost.
|
|
300
|
+
worker.send({ type: "embed", id, model, cacheDir, texts, batchSize });
|
|
301
|
+
const result = await promise;
|
|
302
|
+
if (result instanceof Error) throw result;
|
|
303
|
+
return result;
|
|
304
|
+
} finally {
|
|
305
|
+
this.#pending.delete(id);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
async *#streamEmbed(
|
|
310
|
+
model: MnemopiEmbedModelId,
|
|
311
|
+
cacheDir: string | undefined,
|
|
312
|
+
texts: string[],
|
|
313
|
+
batchSize: number | undefined,
|
|
314
|
+
): AsyncIterable<number[][]> {
|
|
315
|
+
const vectors = await this.#embed(model, cacheDir, texts, batchSize);
|
|
316
|
+
// Mnemopi's `collectMatrix` re-batches via async iteration anyway; yield
|
|
317
|
+
// a single batch carrying the full result so the caller's drain loop
|
|
318
|
+
// behaves identically to the in-process fastembed iterator (one yield
|
|
319
|
+
// per `embed()` call) without paying extra IPC round-trips.
|
|
320
|
+
yield vectors;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
#ensureWorker(): MnemopiEmbedWorkerHandle {
|
|
324
|
+
if (this.#worker) return this.#worker;
|
|
325
|
+
const worker = this.#spawnWorker();
|
|
326
|
+
this.#worker = worker;
|
|
327
|
+
this.#unsubscribeMessage = worker.onMessage(message => this.#handleMessage(message));
|
|
328
|
+
this.#unsubscribeError = worker.onError(error => this.#handleWorkerError(error));
|
|
329
|
+
return worker;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
#handleMessage(message: MnemopiEmbedWorkerOutbound): void {
|
|
333
|
+
if (message.type === "log") {
|
|
334
|
+
logWorkerMessage(message);
|
|
335
|
+
return;
|
|
336
|
+
}
|
|
337
|
+
if (message.type === "pong") return;
|
|
338
|
+
|
|
339
|
+
const pending = this.#pending.get(message.id);
|
|
340
|
+
if (!pending) return;
|
|
341
|
+
this.#pending.delete(message.id);
|
|
342
|
+
if (message.type === "ready") {
|
|
343
|
+
if (pending.kind === "init") pending.resolve(true);
|
|
344
|
+
return;
|
|
345
|
+
}
|
|
346
|
+
if (message.type === "vectors") {
|
|
347
|
+
if (pending.kind === "embed") pending.resolve(message.vectors);
|
|
348
|
+
return;
|
|
349
|
+
}
|
|
350
|
+
logger.debug("mnemopi-embed: worker returned error", { error: message.error });
|
|
351
|
+
if (pending.kind === "init") pending.resolve(false);
|
|
352
|
+
else pending.resolve(new Error(message.error));
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
#handleWorkerError(error: Error): void {
|
|
356
|
+
logger.warn("mnemopi-embed: worker error", { error: error.message });
|
|
357
|
+
for (const pending of this.#pending.values()) {
|
|
358
|
+
if (pending.kind === "init") pending.resolve(false);
|
|
359
|
+
else pending.resolve(error);
|
|
360
|
+
}
|
|
361
|
+
this.#pending.clear();
|
|
362
|
+
void this.terminate();
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
export const mnemopiEmbedClient = new MnemopiEmbedClient();
|
|
367
|
+
|
|
368
|
+
export async function shutdownMnemopiEmbedClient(): Promise<void> {
|
|
369
|
+
await mnemopiEmbedClient.terminate();
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
export async function smokeTestMnemopiEmbedWorker({
|
|
373
|
+
timeoutMs = SMOKE_TEST_TIMEOUT_MS,
|
|
374
|
+
}: {
|
|
375
|
+
timeoutMs?: number;
|
|
376
|
+
} = {}): Promise<void> {
|
|
377
|
+
const handle = wrapSubprocess(createMnemopiEmbedSubprocess());
|
|
378
|
+
const { promise, resolve, reject } = Promise.withResolvers<void>();
|
|
379
|
+
const timer = setTimeout(
|
|
380
|
+
() => reject(new Error(`mnemopi embed worker did not pong within ${timeoutMs}ms`)),
|
|
381
|
+
timeoutMs,
|
|
382
|
+
);
|
|
383
|
+
const unsubscribeMessage = handle.onMessage(message => {
|
|
384
|
+
if (message.type === "pong") {
|
|
385
|
+
resolve();
|
|
386
|
+
return;
|
|
387
|
+
}
|
|
388
|
+
if (message.type === "log") return;
|
|
389
|
+
reject(new Error(`mnemopi embed worker: expected pong, got ${JSON.stringify(message)}`));
|
|
390
|
+
});
|
|
391
|
+
const unsubscribeError = handle.onError(reject);
|
|
392
|
+
try {
|
|
393
|
+
handle.send({ type: "ping", id: "smoke" } satisfies MnemopiEmbedWorkerInbound);
|
|
394
|
+
await promise;
|
|
395
|
+
} finally {
|
|
396
|
+
clearTimeout(timer);
|
|
397
|
+
unsubscribeMessage();
|
|
398
|
+
unsubscribeError();
|
|
399
|
+
await handle.terminate();
|
|
400
|
+
}
|
|
401
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wire types between the parent (`MnemopiEmbedClient`) and the local
|
|
3
|
+
* embeddings subprocess. The parent owns the subprocess lifecycle (graceful
|
|
4
|
+
* work, hard `SIGKILL` on shutdown); the protocol carries no explicit close
|
|
5
|
+
* handshake — once the parent decides to terminate, it signals the OS to reap
|
|
6
|
+
* the child so `onnxruntime-node`'s NAPI finalizer never runs in the main
|
|
7
|
+
* agent address space (it crashes Bun on Windows shutdown — issue #3031, the
|
|
8
|
+
* mnemopi sibling of the tiny-model fix from #1606/#1607). See
|
|
9
|
+
* `embed-client.ts` for the spawn/kill glue.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
/** Identifier of the fastembed model the worker should load (e.g. `fast-bge-base-en-v1.5`). */
|
|
13
|
+
export type MnemopiEmbedModelId = string;
|
|
14
|
+
|
|
15
|
+
export type MnemopiEmbedWorkerInbound =
|
|
16
|
+
| { type: "ping"; id: string }
|
|
17
|
+
| { type: "init"; id: string; model: MnemopiEmbedModelId; cacheDir?: string }
|
|
18
|
+
// `embed` always carries the same `model` / `cacheDir` the wrapper was
|
|
19
|
+
// initialized with so a fresh subprocess (after the parent SIGKILLed the
|
|
20
|
+
// previous one but mnemopi still holds the cached `LocalEmbeddingModel`)
|
|
21
|
+
// can lazily reload the model on demand instead of returning
|
|
22
|
+
// "embed before init".
|
|
23
|
+
| { type: "embed"; id: string; model: MnemopiEmbedModelId; cacheDir?: string; texts: string[]; batchSize?: number };
|
|
24
|
+
|
|
25
|
+
export type MnemopiEmbedWorkerOutbound =
|
|
26
|
+
| { type: "pong"; id: string }
|
|
27
|
+
| { type: "ready"; id: string }
|
|
28
|
+
| { type: "vectors"; id: string; vectors: number[][] }
|
|
29
|
+
| { type: "error"; id: string; error: string }
|
|
30
|
+
| { type: "log"; level: "debug" | "warn" | "error"; msg: string; meta?: Record<string, unknown> };
|
|
31
|
+
|
|
32
|
+
export interface MnemopiEmbedTransport {
|
|
33
|
+
send(message: MnemopiEmbedWorkerOutbound): void;
|
|
34
|
+
onMessage(handler: (message: MnemopiEmbedWorkerInbound) => void): () => void;
|
|
35
|
+
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mnemopi local-embeddings worker. Loaded inside the dedicated subprocess
|
|
3
|
+
* spawned by `embed-client.ts` (re-entered through the agent CLI's hidden
|
|
4
|
+
* `__omp_worker_mnemopi_embed` selector). The whole point of this module is
|
|
5
|
+
* that `loadFastembed()` — and therefore `onnxruntime-node`'s NAPI
|
|
6
|
+
* constructor + finalizer — only ever runs in this child address space. The
|
|
7
|
+
* parent `SIGKILL`s us on shutdown so the destructor that crashes Bun on
|
|
8
|
+
* Windows shutdown (issue #3031, mnemopi sibling of #1606/#1607) never runs
|
|
9
|
+
* in either process.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import type { StandardEmbeddingModel } from "@oh-my-pi/pi-mnemopi/core";
|
|
13
|
+
import { loadFastembed } from "@oh-my-pi/pi-mnemopi/core/fastembed-runtime";
|
|
14
|
+
import type { MnemopiEmbedModelId, MnemopiEmbedTransport, MnemopiEmbedWorkerInbound } from "./embed-protocol";
|
|
15
|
+
|
|
16
|
+
interface LoadedModel {
|
|
17
|
+
model: MnemopiEmbedModelId;
|
|
18
|
+
cacheDir: string | undefined;
|
|
19
|
+
instance: {
|
|
20
|
+
embed(texts: string[], batchSize?: number): AsyncIterable<number[][]> | Iterable<number[][]>;
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
let loaded: Promise<LoadedModel> | null = null;
|
|
25
|
+
let loadedKey = "";
|
|
26
|
+
|
|
27
|
+
async function loadModel(model: MnemopiEmbedModelId, cacheDir: string | undefined): Promise<LoadedModel> {
|
|
28
|
+
const { FlagEmbedding } = await loadFastembed();
|
|
29
|
+
// Cast: `model` arrives as a string from the parent (resolved by
|
|
30
|
+
// mnemopi's `fastembedModelName`). Cast to the non-CUSTOM overload's
|
|
31
|
+
// argument so TypeScript picks the standard-model branch — the parent
|
|
32
|
+
// only ever passes pre-vetted fast-* identifiers.
|
|
33
|
+
const instance = await FlagEmbedding.init({
|
|
34
|
+
model: model as StandardEmbeddingModel,
|
|
35
|
+
cacheDir,
|
|
36
|
+
showDownloadProgress: false,
|
|
37
|
+
});
|
|
38
|
+
return { model, cacheDir, instance };
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function ensureLoaded(model: MnemopiEmbedModelId, cacheDir: string | undefined): Promise<LoadedModel> {
|
|
42
|
+
const key = `${model}\u0000${cacheDir ?? ""}`;
|
|
43
|
+
if (loaded !== null && loadedKey === key) return loaded;
|
|
44
|
+
const loading = loadModel(model, cacheDir).catch(error => {
|
|
45
|
+
// Failed loads must not poison the cache — a retry with the same key
|
|
46
|
+
// should re-attempt the load.
|
|
47
|
+
if (loaded === loading) {
|
|
48
|
+
loaded = null;
|
|
49
|
+
loadedKey = "";
|
|
50
|
+
}
|
|
51
|
+
throw error;
|
|
52
|
+
});
|
|
53
|
+
loaded = loading;
|
|
54
|
+
loadedKey = key;
|
|
55
|
+
return loading;
|
|
56
|
+
}
|
|
57
|
+
async function handleEmbed(
|
|
58
|
+
transport: MnemopiEmbedTransport,
|
|
59
|
+
message: Extract<MnemopiEmbedWorkerInbound, { type: "embed" }>,
|
|
60
|
+
): Promise<void> {
|
|
61
|
+
try {
|
|
62
|
+
// Each `embed` carries the model + cacheDir the wrapper was bound to.
|
|
63
|
+
// `ensureLoaded` is idempotent for the same key, so this is a no-op
|
|
64
|
+
// once the model is in memory — and it transparently re-loads after
|
|
65
|
+
// the parent SIGKILLed the previous subprocess but mnemopi still
|
|
66
|
+
// holds the cached `LocalEmbeddingModel` wrapper from before.
|
|
67
|
+
const { instance } = await ensureLoaded(message.model, message.cacheDir);
|
|
68
|
+
const vectors: number[][] = [];
|
|
69
|
+
const batches = instance.embed([...message.texts], message.batchSize);
|
|
70
|
+
for await (const batch of batches) {
|
|
71
|
+
for (const row of batch) vectors.push(row);
|
|
72
|
+
}
|
|
73
|
+
transport.send({ type: "vectors", id: message.id, vectors });
|
|
74
|
+
} catch (error) {
|
|
75
|
+
transport.send({
|
|
76
|
+
type: "error",
|
|
77
|
+
id: message.id,
|
|
78
|
+
error: error instanceof Error ? error.message : String(error),
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
async function handleInit(
|
|
84
|
+
transport: MnemopiEmbedTransport,
|
|
85
|
+
message: Extract<MnemopiEmbedWorkerInbound, { type: "init" }>,
|
|
86
|
+
): Promise<void> {
|
|
87
|
+
try {
|
|
88
|
+
await ensureLoaded(message.model, message.cacheDir);
|
|
89
|
+
transport.send({ type: "ready", id: message.id });
|
|
90
|
+
} catch (error) {
|
|
91
|
+
transport.send({
|
|
92
|
+
type: "error",
|
|
93
|
+
id: message.id,
|
|
94
|
+
error: error instanceof Error ? error.message : String(error),
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export function startMnemopiEmbedWorker(transport: MnemopiEmbedTransport): void {
|
|
100
|
+
transport.onMessage(message => {
|
|
101
|
+
switch (message.type) {
|
|
102
|
+
case "ping":
|
|
103
|
+
transport.send({ type: "pong", id: message.id });
|
|
104
|
+
return;
|
|
105
|
+
case "init":
|
|
106
|
+
void handleInit(transport, message);
|
|
107
|
+
return;
|
|
108
|
+
case "embed":
|
|
109
|
+
void handleEmbed(transport, message);
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
});
|
|
113
|
+
}
|
package/src/mnemopi/state.ts
CHANGED
|
@@ -3,6 +3,7 @@ import type { AgentMessage } from "@oh-my-pi/pi-agent-core";
|
|
|
3
3
|
import type * as MnemopiNs from "@oh-my-pi/pi-mnemopi";
|
|
4
4
|
import type { Mnemopi, RecallResult } from "@oh-my-pi/pi-mnemopi";
|
|
5
5
|
import type * as MnemopiCoreNs from "@oh-my-pi/pi-mnemopi/core";
|
|
6
|
+
import type { LocalModelInitializer } from "@oh-my-pi/pi-mnemopi/core";
|
|
6
7
|
import { logger } from "@oh-my-pi/pi-utils";
|
|
7
8
|
import {
|
|
8
9
|
composeRecallQuery,
|
|
@@ -13,16 +14,42 @@ import {
|
|
|
13
14
|
import { extractMessages } from "../hindsight/transcript";
|
|
14
15
|
import type { AgentSession, AgentSessionEvent } from "../session/agent-session";
|
|
15
16
|
import type { MnemopiBackendConfig, MnemopiScoping } from "./config";
|
|
17
|
+
import { mnemopiEmbedClient } from "./embed-client";
|
|
16
18
|
|
|
17
19
|
// The mnemopi package pulls the embeddings stack; keep it off the CLI startup
|
|
18
20
|
// module graph by loading it lazily at the async boundaries that need it.
|
|
19
21
|
let mnemopiMod: typeof MnemopiNs | undefined;
|
|
20
22
|
let mnemopiCoreMod: typeof MnemopiCoreNs | undefined;
|
|
21
23
|
|
|
22
|
-
|
|
24
|
+
// `setLocalModelInitializer` writes a single module-level slot shared by
|
|
25
|
+
// both the root and `/core` re-exports, so install at most once across both
|
|
26
|
+
// loaders. Either entry point is enough to wire up the override.
|
|
27
|
+
let localModelInitializerInstalled = false;
|
|
28
|
+
|
|
29
|
+
function installLocalModelInitializer(setInitializer: (initializer: LocalModelInitializer) => void): void {
|
|
30
|
+
if (localModelInitializerInstalled) return;
|
|
31
|
+
localModelInitializerInstalled = true;
|
|
32
|
+
setInitializer(({ model, cacheDir }) =>
|
|
33
|
+
mnemopiEmbedClient.initialize(model, cacheDir).then(handle => {
|
|
34
|
+
if (handle) return handle;
|
|
35
|
+
throw new Error("mnemopi embed subprocess unavailable");
|
|
36
|
+
}),
|
|
37
|
+
);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Lazily load `@oh-my-pi/pi-mnemopi` (memoized) and route fastembed loads
|
|
42
|
+
* through the dedicated embeddings subprocess. The override is installed once
|
|
43
|
+
* — before any consumer gets the chance to call `embed()` — so
|
|
44
|
+
* `onnxruntime-node`'s NAPI constructor + finalizer never run inside the
|
|
45
|
+
* agent's address space (issue #3031). Test seams that swap the initializer
|
|
46
|
+
* with `setLocalModelInitializerForTests` still win because both go through
|
|
47
|
+
* the same module-level slot.
|
|
48
|
+
*/
|
|
23
49
|
export async function loadMnemopi(): Promise<typeof MnemopiNs> {
|
|
24
50
|
if (!mnemopiMod) {
|
|
25
51
|
mnemopiMod = await import("@oh-my-pi/pi-mnemopi");
|
|
52
|
+
installLocalModelInitializer(mnemopiMod.setLocalModelInitializer);
|
|
26
53
|
}
|
|
27
54
|
return mnemopiMod;
|
|
28
55
|
}
|
|
@@ -31,6 +58,7 @@ export async function loadMnemopi(): Promise<typeof MnemopiNs> {
|
|
|
31
58
|
export async function loadMnemopiCore(): Promise<typeof MnemopiCoreNs> {
|
|
32
59
|
if (!mnemopiCoreMod) {
|
|
33
60
|
mnemopiCoreMod = await import("@oh-my-pi/pi-mnemopi/core");
|
|
61
|
+
installLocalModelInitializer(mnemopiCoreMod.setLocalModelInitializer);
|
|
34
62
|
}
|
|
35
63
|
return mnemopiCoreMod;
|
|
36
64
|
}
|
|
@@ -4,9 +4,9 @@ import { formatNumber } from "@oh-my-pi/pi-utils";
|
|
|
4
4
|
import { theme } from "../../modes/theme/theme";
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
|
-
* Minimum
|
|
8
|
-
*
|
|
9
|
-
*
|
|
7
|
+
* Minimum prefix the previous turn must have READ back from cache before a
|
|
8
|
+
* collapse on the current turn counts as an invalidation. Filters out tiny
|
|
9
|
+
* contexts and providers below the cacheable-prefix floor, where a zero
|
|
10
10
|
* `cacheRead` is expected rather than a reset.
|
|
11
11
|
*/
|
|
12
12
|
const MIN_CACHE_FOOTPRINT = 2048;
|
|
@@ -18,25 +18,41 @@ export interface CacheInvalidation {
|
|
|
18
18
|
}
|
|
19
19
|
|
|
20
20
|
/**
|
|
21
|
-
* Decide whether `current` turn lost
|
|
21
|
+
* Decide whether `current` turn lost a *working* prompt cache that `prev` was
|
|
22
|
+
* reusing.
|
|
22
23
|
*
|
|
23
24
|
* The provider reports a warm prefix as `cacheRead`; a model/thinking/tool/
|
|
24
25
|
* system-prompt change (or a history rewrite) breaks the prefix, so the next
|
|
25
|
-
* request reads nothing from cache and re-pays for the whole prompt. We
|
|
26
|
-
*
|
|
26
|
+
* request reads nothing from cache and re-pays for the whole prompt. We flag
|
|
27
|
+
* only the transition where a demonstrably warm cache goes cold: the previous
|
|
28
|
+
* turn must have actually READ a meaningful prefix back, and this turn's
|
|
27
29
|
* `cacheRead` collapsed to zero while it still reprocessed a non-trivial prompt.
|
|
28
|
-
*
|
|
29
|
-
*
|
|
30
|
-
*
|
|
31
|
-
*
|
|
32
|
-
*
|
|
33
|
-
*
|
|
34
|
-
*
|
|
30
|
+
*
|
|
31
|
+
* Requiring a prior warm read is deliberate. A turn that merely WROTE the prefix
|
|
32
|
+
* (`cacheRead` 0) has not proven the cache is live — that is the session's first
|
|
33
|
+
* request, or a re-write after expiry — so a following cold turn there is
|
|
34
|
+
* expected, not an invalidation the user caused (e.g. a long-running first tool
|
|
35
|
+
* call outliving the provider's 5-minute cache TTL surfaced a spurious "cache
|
|
36
|
+
* miss" right under the opening message). It also collapses a run of consecutive
|
|
37
|
+
* cold turns to the single marker at the moment the cache actually broke, instead
|
|
38
|
+
* of repeating the banner on every turn while it re-warms.
|
|
39
|
+
*
|
|
40
|
+
* Returns `undefined` (no marker) for the first turn, turns whose predecessor
|
|
41
|
+
* never read a warm prefix, tiny contexts, turns that reused any cache, and —
|
|
42
|
+
* crucially — turns on providers with *implicit* best-effort caching. Only an
|
|
43
|
+
* explicit, prefix-controlled cache (Anthropic / Bedrock `cache_control`)
|
|
44
|
+
* re-creates the prefix on a cold turn (`cacheWrite > 0`); implicit caches
|
|
45
|
+
* (Google / OpenAI / Fireworks) report `cacheWrite: 0` and drop `cacheRead` to
|
|
46
|
+
* zero intermittently as routine propagation noise that self-heals the next
|
|
47
|
+
* turn, so flagging it would be a false positive.
|
|
35
48
|
*/
|
|
36
49
|
export function detectCacheInvalidation(prev: Usage | undefined, current: Usage): CacheInvalidation | undefined {
|
|
37
50
|
if (!prev) return undefined;
|
|
38
|
-
|
|
39
|
-
|
|
51
|
+
// Only flag a warm→cold transition: the previous turn must have actually read
|
|
52
|
+
// a meaningful prefix from cache. A write-only predecessor (first request, or
|
|
53
|
+
// a re-write after expiry) has not proven the cache is live, so a cold turn
|
|
54
|
+
// behind it is expected — not an invalidation worth surfacing.
|
|
55
|
+
if (prev.cacheRead < MIN_CACHE_FOOTPRINT) return undefined;
|
|
40
56
|
// Any cache reuse this turn means the prefix survived (at least partly).
|
|
41
57
|
if (current.cacheRead > 0) return undefined;
|
|
42
58
|
// Only an explicit, prefix-controlled cache re-creates the prefix on a cold
|