@oh-my-pi/pi-coding-agent 16.1.2 → 16.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/CHANGELOG.md +44 -1
  2. package/dist/cli.js +2990 -2991
  3. package/dist/types/config/model-resolver.d.ts +3 -3
  4. package/dist/types/mnemopi/embed-client.d.ts +70 -0
  5. package/dist/types/mnemopi/embed-protocol.d.ts +52 -0
  6. package/dist/types/mnemopi/embed-worker.d.ts +12 -0
  7. package/dist/types/mnemopi/state.d.ts +9 -1
  8. package/dist/types/modes/components/cache-invalidation-marker.d.ts +23 -10
  9. package/dist/types/modes/components/status-line/component.d.ts +2 -3
  10. package/dist/types/sdk.d.ts +12 -0
  11. package/dist/types/session/agent-session.d.ts +2 -0
  12. package/dist/types/session/agent-storage.d.ts +2 -0
  13. package/dist/types/session/auth-broker-config.d.ts +3 -2
  14. package/dist/types/session/history-storage.d.ts +1 -1
  15. package/dist/types/session/tool-choice-queue.d.ts +2 -0
  16. package/dist/types/tools/image-gen.d.ts +2 -2
  17. package/dist/types/tools/index.d.ts +2 -0
  18. package/dist/types/tui/hyperlink.d.ts +3 -2
  19. package/dist/types/utils/image-loading.d.ts +1 -1
  20. package/dist/types/utils/ipc.d.ts +22 -0
  21. package/dist/types/web/search/providers/perplexity-auth.d.ts +37 -0
  22. package/package.json +12 -12
  23. package/src/cli/bench-cli.ts +33 -2
  24. package/src/cli/dry-balance-cli.ts +4 -2
  25. package/src/cli.ts +8 -0
  26. package/src/commands/token.ts +52 -33
  27. package/src/config/append-only-context-mode.ts +45 -0
  28. package/src/config/model-discovery.ts +3 -0
  29. package/src/config/model-registry.ts +21 -3
  30. package/src/config/model-resolver.ts +31 -8
  31. package/src/discovery/builtin-rules/ts-no-return-type.md +0 -1
  32. package/src/extensibility/plugins/manager.ts +82 -22
  33. package/src/lsp/client.ts +24 -0
  34. package/src/mnemopi/backend.ts +49 -3
  35. package/src/mnemopi/embed-client.ts +401 -0
  36. package/src/mnemopi/embed-protocol.ts +35 -0
  37. package/src/mnemopi/embed-worker.ts +113 -0
  38. package/src/mnemopi/state.ts +29 -1
  39. package/src/modes/components/cache-invalidation-marker.ts +31 -15
  40. package/src/modes/components/custom-editor.test.ts +4 -3
  41. package/src/modes/components/custom-editor.ts +1 -1
  42. package/src/modes/components/model-selector.ts +2 -2
  43. package/src/modes/components/status-line/component.ts +64 -18
  44. package/src/modes/components/welcome.ts +1 -1
  45. package/src/modes/controllers/event-controller.ts +8 -0
  46. package/src/modes/controllers/selector-controller.ts +2 -2
  47. package/src/modes/theme/theme.ts +69 -0
  48. package/src/sdk.ts +37 -0
  49. package/src/session/agent-session.ts +13 -0
  50. package/src/session/agent-storage.ts +14 -0
  51. package/src/session/auth-broker-config.ts +2 -1
  52. package/src/session/history-storage.ts +13 -1
  53. package/src/session/tool-choice-queue.ts +6 -0
  54. package/src/stt/asr-client.ts +2 -7
  55. package/src/tiny/title-client.ts +2 -7
  56. package/src/tools/image-gen.ts +4 -8
  57. package/src/tools/index.ts +2 -0
  58. package/src/tools/render-utils.ts +4 -1
  59. package/src/tools/resolve.ts +1 -0
  60. package/src/tts/tts-client.ts +2 -7
  61. package/src/tui/hyperlink.ts +6 -3
  62. package/src/utils/image-loading.ts +12 -2
  63. package/src/utils/ipc.ts +38 -0
  64. package/src/web/search/providers/perplexity-auth.ts +133 -0
  65. package/src/web/search/providers/perplexity.ts +2 -125
@@ -0,0 +1,401 @@
1
+ import * as path from "node:path";
2
+ import { $env, isBunTestRuntime, isCompiledBinary, logger, workerHostEntry } from "@oh-my-pi/pi-utils";
3
+ import type { Subprocess } from "bun";
4
+ import type { MnemopiEmbedModelId, MnemopiEmbedWorkerInbound, MnemopiEmbedWorkerOutbound } from "./embed-protocol";
5
+
6
+ /**
7
+ * Abstraction over the mnemopi embeddings subprocess. The runtime
8
+ * implementation is a Bun child process so `onnxruntime-node`'s NAPI
9
+ * constructor + finalizer never run inside the main agent address space —
10
+ * those destructors segfault Bun on Windows when mnemopi's local embedding
11
+ * provider loads fastembed in the main process (issue #3031; the mnemopi
12
+ * sibling of the tiny-model fix from #1606 / #1607).
13
+ */
14
+ export interface MnemopiEmbedWorkerHandle {
15
+ send(message: MnemopiEmbedWorkerInbound): void;
16
+ onMessage(handler: (message: MnemopiEmbedWorkerOutbound) => void): () => void;
17
+ onError(handler: (error: Error) => void): () => void;
18
+ terminate(): Promise<void>;
19
+ }
20
+
21
+ type PendingRequest =
22
+ | { kind: "init"; model: MnemopiEmbedModelId; resolve: (ok: boolean) => void }
23
+ | { kind: "embed"; model: MnemopiEmbedModelId; resolve: (vectors: number[][] | Error) => void };
24
+
25
+ // Cold-starting the worker from a compiled binary (decompress + module graph load)
26
+ // is slow on contended CI runners; the probe only proves the worker spawns and
27
+ // ponges, so a generous bound removes flakes without weakening the check.
28
+ const SMOKE_TEST_TIMEOUT_MS = 30_000;
29
+
30
+ /**
31
+ * Hidden subcommand on the main CLI that boots the mnemopi embeddings worker
32
+ * in the spawned subprocess. Kept in sync with the dispatch in `cli.ts`.
33
+ */
34
+ export const MNEMOPI_EMBED_WORKER_ARG = "__omp_worker_mnemopi_embed";
35
+
36
+ /**
37
+ * Env handed to the embeddings subprocess. The child inherits the parent's
38
+ * environment verbatim — fastembed honours `HF_HUB_*`, `HTTPS_PROXY`, etc.,
39
+ * and our `loadFastembed()` reads the same `OMP_*` runtime-install knobs the
40
+ * parent uses. `process.env` carries `undefined` slots that Bun.spawn rejects;
41
+ * filter them out.
42
+ */
43
+ function mnemopiEmbedWorkerEnv(): Record<string, string> {
44
+ const base = $env as Record<string, string | undefined>;
45
+ const merged: Record<string, string> = {};
46
+ for (const key in base) {
47
+ const value = base[key];
48
+ if (typeof value === "string") merged[key] = value;
49
+ }
50
+ return merged;
51
+ }
52
+
53
+ interface MnemopiEmbedWorkerSpawnCommand {
54
+ cmd: string[];
55
+ cwd?: string;
56
+ }
57
+
58
+ /**
59
+ * Resolve the command used to relaunch the agent CLI into mnemopi-embed-worker
60
+ * mode. In a compiled binary the entry point is the binary itself; otherwise
61
+ * re-enter the declared worker-host entry (cwd-relative for reliable Bun IPC),
62
+ * falling back to this package's own `src/cli.ts` when no host entry is
63
+ * declared (bun test, SDK embedding).
64
+ */
65
+ function mnemopiEmbedWorkerSpawnCmd(): MnemopiEmbedWorkerSpawnCommand {
66
+ if (isCompiledBinary()) return { cmd: [process.execPath, MNEMOPI_EMBED_WORKER_ARG] };
67
+ const hostEntry = workerHostEntry();
68
+ if (hostEntry) {
69
+ return {
70
+ cmd: [process.execPath, path.basename(hostEntry), MNEMOPI_EMBED_WORKER_ARG],
71
+ cwd: path.dirname(hostEntry),
72
+ };
73
+ }
74
+ const packageRoot = path.resolve(import.meta.dir, "..", "..");
75
+ return { cmd: [process.execPath, "src/cli.ts", MNEMOPI_EMBED_WORKER_ARG], cwd: packageRoot };
76
+ }
77
+
78
+ interface SpawnedSubprocess {
79
+ proc: Subprocess<"ignore", "ignore", "ignore">;
80
+ inbound: Set<(message: MnemopiEmbedWorkerOutbound) => void>;
81
+ errors: Set<(error: Error) => void>;
82
+ /**
83
+ * Flipped to `true` right before the deliberate SIGKILL so `onExit` can
84
+ * distinguish the expected hard-kill from a crash (SIGSEGV from a native
85
+ * fault, OOM SIGKILL, operator `kill -9`). Only the latter surfaces as a
86
+ * worker error so callers don't await forever.
87
+ */
88
+ intentionalExit: { value: boolean };
89
+ }
90
+
91
+ /**
92
+ * Spawn the mnemopi embeddings worker as a subprocess. Exported for tests and
93
+ * the smoke probe; production callers go through {@link spawnMnemopiEmbedWorker}.
94
+ */
95
+ export function createMnemopiEmbedSubprocess(): SpawnedSubprocess {
96
+ const inbound = new Set<(message: MnemopiEmbedWorkerOutbound) => void>();
97
+ const errors = new Set<(error: Error) => void>();
98
+ const intentionalExit = { value: false };
99
+ const spawnCommand = mnemopiEmbedWorkerSpawnCmd();
100
+ const proc = Bun.spawn({
101
+ cmd: spawnCommand.cmd,
102
+ cwd: spawnCommand.cwd,
103
+ env: mnemopiEmbedWorkerEnv(),
104
+ stdin: "ignore",
105
+ stdout: "ignore",
106
+ stderr: "ignore",
107
+ serialization: "advanced",
108
+ windowsHide: true,
109
+ ipc(message) {
110
+ for (const handler of inbound) handler(message as MnemopiEmbedWorkerOutbound);
111
+ },
112
+ onExit(_proc, exitCode, signalCode) {
113
+ if (exitCode === 0) return;
114
+ if (exitCode === null && intentionalExit.value) return;
115
+ const reason = exitCode !== null ? `code ${exitCode}` : `signal ${signalCode ?? "unknown"}`;
116
+ const err = new Error(`mnemopi embed subprocess exited with ${reason}`);
117
+ for (const handler of errors) handler(err);
118
+ },
119
+ });
120
+ // Don't keep the parent event loop alive on an idle worker; the agent
121
+ // dispose path calls `terminate()` explicitly. Bun's test runner starves
122
+ // IPC for unref'd subprocesses, so keep it referenced only under tests.
123
+ if (!isBunTestRuntime()) proc.unref();
124
+ return { proc, inbound, errors, intentionalExit };
125
+ }
126
+
127
+ function wrapSubprocess({ proc, inbound, errors, intentionalExit }: SpawnedSubprocess): MnemopiEmbedWorkerHandle {
128
+ return {
129
+ send(message) {
130
+ try {
131
+ proc.send(message);
132
+ } catch (error) {
133
+ logger.debug("mnemopi-embed: send to subprocess failed", {
134
+ error: error instanceof Error ? error.message : String(error),
135
+ });
136
+ }
137
+ },
138
+ onMessage(handler) {
139
+ inbound.add(handler);
140
+ return () => inbound.delete(handler);
141
+ },
142
+ onError(handler) {
143
+ errors.add(handler);
144
+ return () => errors.delete(handler);
145
+ },
146
+ async terminate() {
147
+ // SIGKILL: the point of subprocess isolation is that the parent
148
+ // never runs `onnxruntime-node`'s NAPI finalizer (it crashes Bun
149
+ // on Windows). Hard-kill instead; the OS reclaims the model
150
+ // memory. Flip the intentional-exit flag *before* killing so
151
+ // `onExit` can tell this apart from a native crash.
152
+ intentionalExit.value = true;
153
+ try {
154
+ proc.kill("SIGKILL");
155
+ } catch {
156
+ // Already gone.
157
+ }
158
+ },
159
+ };
160
+ }
161
+
162
+ function spawnInlineUnavailableWorker(error: unknown): MnemopiEmbedWorkerHandle {
163
+ const listeners = new Set<(message: MnemopiEmbedWorkerOutbound) => void>();
164
+ const errorMessage = error instanceof Error ? error.message : String(error);
165
+ const emit = (message: MnemopiEmbedWorkerOutbound): void => {
166
+ for (const listener of listeners) listener(message);
167
+ };
168
+ return {
169
+ send(message) {
170
+ queueMicrotask(() => {
171
+ if (message.type === "ping") {
172
+ emit({ type: "pong", id: message.id });
173
+ return;
174
+ }
175
+ emit({ type: "error", id: message.id, error: errorMessage });
176
+ });
177
+ },
178
+ onMessage(handler) {
179
+ listeners.add(handler);
180
+ return () => listeners.delete(handler);
181
+ },
182
+ onError() {
183
+ return () => {};
184
+ },
185
+ async terminate() {
186
+ listeners.clear();
187
+ },
188
+ };
189
+ }
190
+
191
+ function spawnMnemopiEmbedWorker(): MnemopiEmbedWorkerHandle {
192
+ try {
193
+ return wrapSubprocess(createMnemopiEmbedSubprocess());
194
+ } catch (error) {
195
+ logger.warn("mnemopi embed worker spawn failed; local embeddings disabled", {
196
+ error: error instanceof Error ? error.message : String(error),
197
+ });
198
+ return spawnInlineUnavailableWorker(error);
199
+ }
200
+ }
201
+
202
+ function logWorkerMessage(message: Extract<MnemopiEmbedWorkerOutbound, { type: "log" }>): void {
203
+ if (message.level === "debug") logger.debug(message.msg, message.meta);
204
+ else if (message.level === "warn") logger.warn(message.msg, message.meta);
205
+ else logger.error(message.msg, message.meta);
206
+ }
207
+
208
+ /**
209
+ * Per-model wrapper produced by {@link MnemopiEmbedClient.initialize}.
210
+ * `embed()` round-trips one batch of texts through the worker subprocess and
211
+ * yields the resulting vectors in a single asynchronous batch — fastembed's
212
+ * own iterator was emitting batches that we collect on the child side anyway,
213
+ * and serializing per-batch over IPC would not improve throughput.
214
+ */
215
+ export interface MnemopiSubprocessEmbeddingModel {
216
+ embed(texts: string[], batchSize?: number): AsyncIterable<number[][]>;
217
+ }
218
+
219
+ export class MnemopiEmbedClient {
220
+ #worker: MnemopiEmbedWorkerHandle | null = null;
221
+ #unsubscribeMessage: (() => void) | null = null;
222
+ #unsubscribeError: (() => void) | null = null;
223
+ #pending = new Map<string, PendingRequest>();
224
+ #nextRequestId = 0;
225
+ #spawnWorker: () => MnemopiEmbedWorkerHandle;
226
+
227
+ constructor(spawnWorker: () => MnemopiEmbedWorkerHandle = spawnMnemopiEmbedWorker) {
228
+ this.#spawnWorker = spawnWorker;
229
+ }
230
+
231
+ /**
232
+ * Load the named fastembed model inside the subprocess. Resolves to a
233
+ * thin wrapper whose `embed()` round-trips through the same worker, or
234
+ * `null` when the worker cannot init the model (missing peer, native
235
+ * load failure, etc.). Multiple calls with the same model reuse the
236
+ * single in-flight worker; calling with a different model loads it on
237
+ * the child without restarting the process.
238
+ */
239
+ async initialize(
240
+ model: MnemopiEmbedModelId,
241
+ cacheDir: string | undefined,
242
+ ): Promise<MnemopiSubprocessEmbeddingModel | null> {
243
+ try {
244
+ const worker = this.#ensureWorker();
245
+ const id = String(++this.#nextRequestId);
246
+ const { promise, resolve } = Promise.withResolvers<boolean>();
247
+ this.#pending.set(id, { kind: "init", model, resolve });
248
+ try {
249
+ worker.send({ type: "init", id, model, cacheDir });
250
+ const ok = await promise;
251
+ if (!ok) return null;
252
+ } finally {
253
+ this.#pending.delete(id);
254
+ }
255
+ } catch (error) {
256
+ logger.debug("mnemopi-embed: init failed", {
257
+ model,
258
+ error: error instanceof Error ? error.message : String(error),
259
+ });
260
+ return null;
261
+ }
262
+ return { embed: (texts, batchSize) => this.#streamEmbed(model, cacheDir, texts, batchSize) };
263
+ }
264
+
265
+ async terminate(): Promise<void> {
266
+ const worker = this.#worker;
267
+ this.#worker = null;
268
+ this.#unsubscribeMessage?.();
269
+ this.#unsubscribeMessage = null;
270
+ this.#unsubscribeError?.();
271
+ this.#unsubscribeError = null;
272
+ for (const pending of this.#pending.values()) {
273
+ if (pending.kind === "init") pending.resolve(false);
274
+ else pending.resolve(new Error("mnemopi embed worker terminated"));
275
+ }
276
+ this.#pending.clear();
277
+ try {
278
+ await worker?.terminate();
279
+ } catch {
280
+ // Already gone.
281
+ }
282
+ }
283
+
284
+ async #embed(
285
+ model: MnemopiEmbedModelId,
286
+ cacheDir: string | undefined,
287
+ texts: string[],
288
+ batchSize: number | undefined,
289
+ ): Promise<number[][]> {
290
+ const worker = this.#ensureWorker();
291
+ const id = String(++this.#nextRequestId);
292
+ const { promise, resolve } = Promise.withResolvers<number[][] | Error>();
293
+ this.#pending.set(id, { kind: "embed", model, resolve });
294
+ try {
295
+ // Carry the (model, cacheDir) the wrapper was bound to in every
296
+ // embed message: dispose + respawn between two embeds on the same
297
+ // `LocalEmbeddingModel` handle would otherwise hit a fresh
298
+ // worker's "embed before init" guard. Worker `ensureLoaded` is
299
+ // idempotent so steady-state embeds pay no extra cost.
300
+ worker.send({ type: "embed", id, model, cacheDir, texts, batchSize });
301
+ const result = await promise;
302
+ if (result instanceof Error) throw result;
303
+ return result;
304
+ } finally {
305
+ this.#pending.delete(id);
306
+ }
307
+ }
308
+
309
+ async *#streamEmbed(
310
+ model: MnemopiEmbedModelId,
311
+ cacheDir: string | undefined,
312
+ texts: string[],
313
+ batchSize: number | undefined,
314
+ ): AsyncIterable<number[][]> {
315
+ const vectors = await this.#embed(model, cacheDir, texts, batchSize);
316
+ // Mnemopi's `collectMatrix` re-batches via async iteration anyway; yield
317
+ // a single batch carrying the full result so the caller's drain loop
318
+ // behaves identically to the in-process fastembed iterator (one yield
319
+ // per `embed()` call) without paying extra IPC round-trips.
320
+ yield vectors;
321
+ }
322
+
323
+ #ensureWorker(): MnemopiEmbedWorkerHandle {
324
+ if (this.#worker) return this.#worker;
325
+ const worker = this.#spawnWorker();
326
+ this.#worker = worker;
327
+ this.#unsubscribeMessage = worker.onMessage(message => this.#handleMessage(message));
328
+ this.#unsubscribeError = worker.onError(error => this.#handleWorkerError(error));
329
+ return worker;
330
+ }
331
+
332
+ #handleMessage(message: MnemopiEmbedWorkerOutbound): void {
333
+ if (message.type === "log") {
334
+ logWorkerMessage(message);
335
+ return;
336
+ }
337
+ if (message.type === "pong") return;
338
+
339
+ const pending = this.#pending.get(message.id);
340
+ if (!pending) return;
341
+ this.#pending.delete(message.id);
342
+ if (message.type === "ready") {
343
+ if (pending.kind === "init") pending.resolve(true);
344
+ return;
345
+ }
346
+ if (message.type === "vectors") {
347
+ if (pending.kind === "embed") pending.resolve(message.vectors);
348
+ return;
349
+ }
350
+ logger.debug("mnemopi-embed: worker returned error", { error: message.error });
351
+ if (pending.kind === "init") pending.resolve(false);
352
+ else pending.resolve(new Error(message.error));
353
+ }
354
+
355
+ #handleWorkerError(error: Error): void {
356
+ logger.warn("mnemopi-embed: worker error", { error: error.message });
357
+ for (const pending of this.#pending.values()) {
358
+ if (pending.kind === "init") pending.resolve(false);
359
+ else pending.resolve(error);
360
+ }
361
+ this.#pending.clear();
362
+ void this.terminate();
363
+ }
364
+ }
365
+
366
+ export const mnemopiEmbedClient = new MnemopiEmbedClient();
367
+
368
+ export async function shutdownMnemopiEmbedClient(): Promise<void> {
369
+ await mnemopiEmbedClient.terminate();
370
+ }
371
+
372
+ export async function smokeTestMnemopiEmbedWorker({
373
+ timeoutMs = SMOKE_TEST_TIMEOUT_MS,
374
+ }: {
375
+ timeoutMs?: number;
376
+ } = {}): Promise<void> {
377
+ const handle = wrapSubprocess(createMnemopiEmbedSubprocess());
378
+ const { promise, resolve, reject } = Promise.withResolvers<void>();
379
+ const timer = setTimeout(
380
+ () => reject(new Error(`mnemopi embed worker did not pong within ${timeoutMs}ms`)),
381
+ timeoutMs,
382
+ );
383
+ const unsubscribeMessage = handle.onMessage(message => {
384
+ if (message.type === "pong") {
385
+ resolve();
386
+ return;
387
+ }
388
+ if (message.type === "log") return;
389
+ reject(new Error(`mnemopi embed worker: expected pong, got ${JSON.stringify(message)}`));
390
+ });
391
+ const unsubscribeError = handle.onError(reject);
392
+ try {
393
+ handle.send({ type: "ping", id: "smoke" } satisfies MnemopiEmbedWorkerInbound);
394
+ await promise;
395
+ } finally {
396
+ clearTimeout(timer);
397
+ unsubscribeMessage();
398
+ unsubscribeError();
399
+ await handle.terminate();
400
+ }
401
+ }
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Wire types between the parent (`MnemopiEmbedClient`) and the local
3
+ * embeddings subprocess. The parent owns the subprocess lifecycle (graceful
4
+ * work, hard `SIGKILL` on shutdown); the protocol carries no explicit close
5
+ * handshake — once the parent decides to terminate, it signals the OS to reap
6
+ * the child so `onnxruntime-node`'s NAPI finalizer never runs in the main
7
+ * agent address space (it crashes Bun on Windows shutdown — issue #3031, the
8
+ * mnemopi sibling of the tiny-model fix from #1606/#1607). See
9
+ * `embed-client.ts` for the spawn/kill glue.
10
+ */
11
+
12
+ /** Identifier of the fastembed model the worker should load (e.g. `fast-bge-base-en-v1.5`). */
13
+ export type MnemopiEmbedModelId = string;
14
+
15
+ export type MnemopiEmbedWorkerInbound =
16
+ | { type: "ping"; id: string }
17
+ | { type: "init"; id: string; model: MnemopiEmbedModelId; cacheDir?: string }
18
+ // `embed` always carries the same `model` / `cacheDir` the wrapper was
19
+ // initialized with so a fresh subprocess (after the parent SIGKILLed the
20
+ // previous one but mnemopi still holds the cached `LocalEmbeddingModel`)
21
+ // can lazily reload the model on demand instead of returning
22
+ // "embed before init".
23
+ | { type: "embed"; id: string; model: MnemopiEmbedModelId; cacheDir?: string; texts: string[]; batchSize?: number };
24
+
25
+ export type MnemopiEmbedWorkerOutbound =
26
+ | { type: "pong"; id: string }
27
+ | { type: "ready"; id: string }
28
+ | { type: "vectors"; id: string; vectors: number[][] }
29
+ | { type: "error"; id: string; error: string }
30
+ | { type: "log"; level: "debug" | "warn" | "error"; msg: string; meta?: Record<string, unknown> };
31
+
32
+ export interface MnemopiEmbedTransport {
33
+ send(message: MnemopiEmbedWorkerOutbound): void;
34
+ onMessage(handler: (message: MnemopiEmbedWorkerInbound) => void): () => void;
35
+ }
@@ -0,0 +1,113 @@
1
+ /**
2
+ * Mnemopi local-embeddings worker. Loaded inside the dedicated subprocess
3
+ * spawned by `embed-client.ts` (re-entered through the agent CLI's hidden
4
+ * `__omp_worker_mnemopi_embed` selector). The whole point of this module is
5
+ * that `loadFastembed()` — and therefore `onnxruntime-node`'s NAPI
6
+ * constructor + finalizer — only ever runs in this child address space. The
7
+ * parent `SIGKILL`s us on shutdown so the destructor that crashes Bun on
8
+ * Windows shutdown (issue #3031, mnemopi sibling of #1606/#1607) never runs
9
+ * in either process.
10
+ */
11
+
12
+ import type { StandardEmbeddingModel } from "@oh-my-pi/pi-mnemopi/core";
13
+ import { loadFastembed } from "@oh-my-pi/pi-mnemopi/core/fastembed-runtime";
14
+ import type { MnemopiEmbedModelId, MnemopiEmbedTransport, MnemopiEmbedWorkerInbound } from "./embed-protocol";
15
+
16
+ interface LoadedModel {
17
+ model: MnemopiEmbedModelId;
18
+ cacheDir: string | undefined;
19
+ instance: {
20
+ embed(texts: string[], batchSize?: number): AsyncIterable<number[][]> | Iterable<number[][]>;
21
+ };
22
+ }
23
+
24
+ let loaded: Promise<LoadedModel> | null = null;
25
+ let loadedKey = "";
26
+
27
+ async function loadModel(model: MnemopiEmbedModelId, cacheDir: string | undefined): Promise<LoadedModel> {
28
+ const { FlagEmbedding } = await loadFastembed();
29
+ // Cast: `model` arrives as a string from the parent (resolved by
30
+ // mnemopi's `fastembedModelName`). Cast to the non-CUSTOM overload's
31
+ // argument so TypeScript picks the standard-model branch — the parent
32
+ // only ever passes pre-vetted fast-* identifiers.
33
+ const instance = await FlagEmbedding.init({
34
+ model: model as StandardEmbeddingModel,
35
+ cacheDir,
36
+ showDownloadProgress: false,
37
+ });
38
+ return { model, cacheDir, instance };
39
+ }
40
+
41
+ function ensureLoaded(model: MnemopiEmbedModelId, cacheDir: string | undefined): Promise<LoadedModel> {
42
+ const key = `${model}\u0000${cacheDir ?? ""}`;
43
+ if (loaded !== null && loadedKey === key) return loaded;
44
+ const loading = loadModel(model, cacheDir).catch(error => {
45
+ // Failed loads must not poison the cache — a retry with the same key
46
+ // should re-attempt the load.
47
+ if (loaded === loading) {
48
+ loaded = null;
49
+ loadedKey = "";
50
+ }
51
+ throw error;
52
+ });
53
+ loaded = loading;
54
+ loadedKey = key;
55
+ return loading;
56
+ }
57
+ async function handleEmbed(
58
+ transport: MnemopiEmbedTransport,
59
+ message: Extract<MnemopiEmbedWorkerInbound, { type: "embed" }>,
60
+ ): Promise<void> {
61
+ try {
62
+ // Each `embed` carries the model + cacheDir the wrapper was bound to.
63
+ // `ensureLoaded` is idempotent for the same key, so this is a no-op
64
+ // once the model is in memory — and it transparently re-loads after
65
+ // the parent SIGKILLed the previous subprocess but mnemopi still
66
+ // holds the cached `LocalEmbeddingModel` wrapper from before.
67
+ const { instance } = await ensureLoaded(message.model, message.cacheDir);
68
+ const vectors: number[][] = [];
69
+ const batches = instance.embed([...message.texts], message.batchSize);
70
+ for await (const batch of batches) {
71
+ for (const row of batch) vectors.push(row);
72
+ }
73
+ transport.send({ type: "vectors", id: message.id, vectors });
74
+ } catch (error) {
75
+ transport.send({
76
+ type: "error",
77
+ id: message.id,
78
+ error: error instanceof Error ? error.message : String(error),
79
+ });
80
+ }
81
+ }
82
+
83
+ async function handleInit(
84
+ transport: MnemopiEmbedTransport,
85
+ message: Extract<MnemopiEmbedWorkerInbound, { type: "init" }>,
86
+ ): Promise<void> {
87
+ try {
88
+ await ensureLoaded(message.model, message.cacheDir);
89
+ transport.send({ type: "ready", id: message.id });
90
+ } catch (error) {
91
+ transport.send({
92
+ type: "error",
93
+ id: message.id,
94
+ error: error instanceof Error ? error.message : String(error),
95
+ });
96
+ }
97
+ }
98
+
99
+ export function startMnemopiEmbedWorker(transport: MnemopiEmbedTransport): void {
100
+ transport.onMessage(message => {
101
+ switch (message.type) {
102
+ case "ping":
103
+ transport.send({ type: "pong", id: message.id });
104
+ return;
105
+ case "init":
106
+ void handleInit(transport, message);
107
+ return;
108
+ case "embed":
109
+ void handleEmbed(transport, message);
110
+ return;
111
+ }
112
+ });
113
+ }
@@ -3,6 +3,7 @@ import type { AgentMessage } from "@oh-my-pi/pi-agent-core";
3
3
  import type * as MnemopiNs from "@oh-my-pi/pi-mnemopi";
4
4
  import type { Mnemopi, RecallResult } from "@oh-my-pi/pi-mnemopi";
5
5
  import type * as MnemopiCoreNs from "@oh-my-pi/pi-mnemopi/core";
6
+ import type { LocalModelInitializer } from "@oh-my-pi/pi-mnemopi/core";
6
7
  import { logger } from "@oh-my-pi/pi-utils";
7
8
  import {
8
9
  composeRecallQuery,
@@ -13,16 +14,42 @@ import {
13
14
  import { extractMessages } from "../hindsight/transcript";
14
15
  import type { AgentSession, AgentSessionEvent } from "../session/agent-session";
15
16
  import type { MnemopiBackendConfig, MnemopiScoping } from "./config";
17
+ import { mnemopiEmbedClient } from "./embed-client";
16
18
 
17
19
  // The mnemopi package pulls the embeddings stack; keep it off the CLI startup
18
20
  // module graph by loading it lazily at the async boundaries that need it.
19
21
  let mnemopiMod: typeof MnemopiNs | undefined;
20
22
  let mnemopiCoreMod: typeof MnemopiCoreNs | undefined;
21
23
 
22
- /** Lazily load `@oh-my-pi/pi-mnemopi` (memoized). */
24
+ // `setLocalModelInitializer` writes a single module-level slot shared by
25
+ // both the root and `/core` re-exports, so install at most once across both
26
+ // loaders. Either entry point is enough to wire up the override.
27
+ let localModelInitializerInstalled = false;
28
+
29
+ function installLocalModelInitializer(setInitializer: (initializer: LocalModelInitializer) => void): void {
30
+ if (localModelInitializerInstalled) return;
31
+ localModelInitializerInstalled = true;
32
+ setInitializer(({ model, cacheDir }) =>
33
+ mnemopiEmbedClient.initialize(model, cacheDir).then(handle => {
34
+ if (handle) return handle;
35
+ throw new Error("mnemopi embed subprocess unavailable");
36
+ }),
37
+ );
38
+ }
39
+
40
+ /**
41
+ * Lazily load `@oh-my-pi/pi-mnemopi` (memoized) and route fastembed loads
42
+ * through the dedicated embeddings subprocess. The override is installed once
43
+ * — before any consumer gets the chance to call `embed()` — so
44
+ * `onnxruntime-node`'s NAPI constructor + finalizer never run inside the
45
+ * agent's address space (issue #3031). Test seams that swap the initializer
46
+ * with `setLocalModelInitializerForTests` still win because both go through
47
+ * the same module-level slot.
48
+ */
23
49
  export async function loadMnemopi(): Promise<typeof MnemopiNs> {
24
50
  if (!mnemopiMod) {
25
51
  mnemopiMod = await import("@oh-my-pi/pi-mnemopi");
52
+ installLocalModelInitializer(mnemopiMod.setLocalModelInitializer);
26
53
  }
27
54
  return mnemopiMod;
28
55
  }
@@ -31,6 +58,7 @@ export async function loadMnemopi(): Promise<typeof MnemopiNs> {
31
58
  export async function loadMnemopiCore(): Promise<typeof MnemopiCoreNs> {
32
59
  if (!mnemopiCoreMod) {
33
60
  mnemopiCoreMod = await import("@oh-my-pi/pi-mnemopi/core");
61
+ installLocalModelInitializer(mnemopiCoreMod.setLocalModelInitializer);
34
62
  }
35
63
  return mnemopiCoreMod;
36
64
  }
@@ -4,9 +4,9 @@ import { formatNumber } from "@oh-my-pi/pi-utils";
4
4
  import { theme } from "../../modes/theme/theme";
5
5
 
6
6
  /**
7
- * Minimum cached prefix (read + write) the previous turn must have established
8
- * before a collapse on the current turn counts as an invalidation. Filters out
9
- * tiny contexts and providers below the cacheable-prefix floor, where a zero
7
+ * Minimum prefix the previous turn must have READ back from cache before a
8
+ * collapse on the current turn counts as an invalidation. Filters out tiny
9
+ * contexts and providers below the cacheable-prefix floor, where a zero
10
10
  * `cacheRead` is expected rather than a reset.
11
11
  */
12
12
  const MIN_CACHE_FOOTPRINT = 2048;
@@ -18,25 +18,41 @@ export interface CacheInvalidation {
18
18
  }
19
19
 
20
20
  /**
21
- * Decide whether `current` turn lost the prompt cache that `prev` established.
21
+ * Decide whether `current` turn lost a *working* prompt cache that `prev` was
22
+ * reusing.
22
23
  *
23
24
  * The provider reports a warm prefix as `cacheRead`; a model/thinking/tool/
24
25
  * system-prompt change (or a history rewrite) breaks the prefix, so the next
25
- * request reads nothing from cache and re-pays for the whole prompt. We detect
26
- * that as: the previous turn cached a meaningful prefix, yet this turn's
26
+ * request reads nothing from cache and re-pays for the whole prompt. We flag
27
+ * only the transition where a demonstrably warm cache goes cold: the previous
28
+ * turn must have actually READ a meaningful prefix back, and this turn's
27
29
  * `cacheRead` collapsed to zero while it still reprocessed a non-trivial prompt.
28
- * Returns `undefined` (no marker) for the first turn, tiny contexts, turns
29
- * that reused any cache, and crucially turns on providers with *implicit*
30
- * best-effort caching. Only an explicit, prefix-controlled cache (Anthropic /
31
- * Bedrock `cache_control`) re-creates the prefix on a cold turn (`cacheWrite >
32
- * 0`); implicit caches (Google / OpenAI / Fireworks) report `cacheWrite: 0` and
33
- * drop `cacheRead` to zero intermittently as routine propagation noise that
34
- * self-heals the next turn, so flagging it would be a false positive.
30
+ *
31
+ * Requiring a prior warm read is deliberate. A turn that merely WROTE the prefix
32
+ * (`cacheRead` 0) has not proven the cache is live — that is the session's first
33
+ * request, or a re-write after expiry so a following cold turn there is
34
+ * expected, not an invalidation the user caused (e.g. a long-running first tool
35
+ * call outliving the provider's 5-minute cache TTL surfaced a spurious "cache
36
+ * miss" right under the opening message). It also collapses a run of consecutive
37
+ * cold turns to the single marker at the moment the cache actually broke, instead
38
+ * of repeating the banner on every turn while it re-warms.
39
+ *
40
+ * Returns `undefined` (no marker) for the first turn, turns whose predecessor
41
+ * never read a warm prefix, tiny contexts, turns that reused any cache, and —
42
+ * crucially — turns on providers with *implicit* best-effort caching. Only an
43
+ * explicit, prefix-controlled cache (Anthropic / Bedrock `cache_control`)
44
+ * re-creates the prefix on a cold turn (`cacheWrite > 0`); implicit caches
45
+ * (Google / OpenAI / Fireworks) report `cacheWrite: 0` and drop `cacheRead` to
46
+ * zero intermittently as routine propagation noise that self-heals the next
47
+ * turn, so flagging it would be a false positive.
35
48
  */
36
49
  export function detectCacheInvalidation(prev: Usage | undefined, current: Usage): CacheInvalidation | undefined {
37
50
  if (!prev) return undefined;
38
- const prevFootprint = prev.cacheRead + prev.cacheWrite;
39
- if (prevFootprint < MIN_CACHE_FOOTPRINT) return undefined;
51
+ // Only flag a warm→cold transition: the previous turn must have actually read
52
+ // a meaningful prefix from cache. A write-only predecessor (first request, or
53
+ // a re-write after expiry) has not proven the cache is live, so a cold turn
54
+ // behind it is expected — not an invalidation worth surfacing.
55
+ if (prev.cacheRead < MIN_CACHE_FOOTPRINT) return undefined;
40
56
  // Any cache reuse this turn means the prefix survived (at least partly).
41
57
  if (current.cacheRead > 0) return undefined;
42
58
  // Only an explicit, prefix-controlled cache re-creates the prefix on a cold