@oh-my-pi/pi-mnemopi 16.1.2 → 16.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,17 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [16.1.3] - 2026-06-19
6
+
7
+ ### Added
8
+
9
+ - Exposed `setLocalModelInitializer` (and the `LocalEmbeddingModel`, `LocalModelInitializer`, `LocalModelInitOptions`, `StandardEmbeddingModel` types) so hosts can route fastembed loads through a dedicated subprocess and keep `onnxruntime-node`'s NAPI constructor + finalizer out of their own address space. Same wipe semantics as the existing `setLocalModelInitializerForTests` seam; the agent CLI uses it to crash-proof Windows when `memory.backend: mnemopi` is enabled ([#3031](https://github.com/can1357/oh-my-pi/issues/3031)).
10
+
11
+ ### Fixed
12
+
13
+ - Fixed background fact extraction skipping runtime-configured remote LLM endpoints when `MNEMOPI_LLM_BASE_URL` was unset, so `remember(..., { extract: true })` now stores remote-distilled facts from `mnemopi.llm` config instead of falling back to regex heuristics. ([#3041](https://github.com/can1357/oh-my-pi/issues/3041))
14
+ - Fixed local fastembed startup on macOS ARM64 by letting `fastembed@2.1.0` install its matching `onnxruntime-node@1.21.0` native runtime instead of forcing `1.26.0`, and by repairing missing tokenizer sidecars from the upstream Hugging Face model cache when a stale fastembed archive lacks them. ([#3054](https://github.com/can1357/oh-my-pi/issues/3054))
15
+
5
16
  ## [16.0.6] - 2026-06-18
6
17
 
7
18
  ### Fixed
@@ -8,17 +8,17 @@ export interface EmbeddingProvider {
8
8
  embed(texts: readonly string[]): EmbeddingOutput | Promise<EmbeddingOutput>;
9
9
  available?(): boolean | Promise<boolean>;
10
10
  }
11
- type StandardEmbeddingModel = Exclude<EmbeddingModel, EmbeddingModel.CUSTOM>;
12
- interface LocalEmbeddingModel {
11
+ export type StandardEmbeddingModel = Exclude<EmbeddingModel, EmbeddingModel.CUSTOM>;
12
+ export interface LocalEmbeddingModel {
13
13
  embed(texts: string[], batchSize?: number): EmbeddingOutput;
14
14
  queryEmbed?(query: string): Promise<number[]>;
15
15
  }
16
- type LocalModelInitOptions = {
16
+ export type LocalModelInitOptions = {
17
17
  model: StandardEmbeddingModel;
18
18
  cacheDir?: string;
19
19
  showDownloadProgress?: boolean;
20
20
  };
21
- type LocalModelInitializer = (options: LocalModelInitOptions) => Promise<LocalEmbeddingModel>;
21
+ export type LocalModelInitializer = (options: LocalModelInitOptions) => Promise<LocalEmbeddingModel>;
22
22
  export declare function embeddingsDisabled(): boolean;
23
23
  /**
24
24
  * Resolve the embedding model name for the currently active runtime scope.
@@ -34,6 +34,15 @@ export declare function embeddingDimFor(modelName: string): number;
34
34
  export declare function setEmbeddingProviderForTests(provider: EmbeddingProvider | null | undefined): void;
35
35
  export declare const setEmbeddingProvider: typeof setEmbeddingProviderForTests;
36
36
  export declare function setLocalModelInitializerForTests(initializer: LocalModelInitializer | null | undefined): void;
37
+ /**
38
+ * Override the function used to construct the local fastembed model the next
39
+ * time `embed()` is called. Lets a host (e.g. the agent CLI) keep
40
+ * `onnxruntime-node` out of its own address space by routing every fastembed
41
+ * load + inference through a dedicated subprocess. Same wipe semantics as the
42
+ * `*ForTests` form: clears the cached model promise and the query cache so
43
+ * subsequent embeds run through the new initializer immediately.
44
+ */
45
+ export declare const setLocalModelInitializer: typeof setLocalModelInitializerForTests;
37
46
  export declare function resetEmbeddingProviderForTests(): void;
38
47
  export declare const resetEmbeddingStateForTests: typeof resetEmbeddingProviderForTests;
39
48
  export declare function available(): Promise<boolean>;
@@ -0,0 +1,2 @@
1
+ /** Download missing config/tokenizer sidecars into a fastembed model cache directory. */
2
+ export declare function ensureFastembedModelSidecars(model: string, cacheDir?: string): Promise<boolean>;
@@ -1,5 +1,6 @@
1
1
  export { configureRecallFeatures, type RecallFeatureFlags } from "../config";
2
2
  export * from "./banks";
3
3
  export * from "./beam/index";
4
+ export { type LocalEmbeddingModel, type LocalModelInitializer, type LocalModelInitOptions, type StandardEmbeddingModel, setLocalModelInitializer, } from "./embeddings";
4
5
  export * from "./memory";
5
6
  export { addMemory, forget, get, getBank, getContext, getDefaultInstance, getStats, Mnemopi, query, recall, recallEnhanced, remember, resetDefaultInstanceForTests, resetMemoryForTests, resetModuleStateForTests, saveMemory, scratchpadClear, scratchpadRead, scratchpadWrite, search, setBank, sleep, sleepAllSessions, storeMemory, update, } from "./memory";
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-mnemopi",
4
- "version": "16.1.2",
4
+ "version": "16.1.4",
5
5
  "description": "Local SQLite memory engine for Oh My Pi agents",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -39,14 +39,14 @@
39
39
  "fmt": "biome format --write ."
40
40
  },
41
41
  "dependencies": {
42
- "@oh-my-pi/pi-ai": "16.1.2",
43
- "@oh-my-pi/pi-catalog": "16.1.2",
44
- "@oh-my-pi/pi-utils": "16.1.2",
42
+ "@oh-my-pi/pi-ai": "16.1.4",
43
+ "@oh-my-pi/pi-catalog": "16.1.4",
44
+ "@oh-my-pi/pi-utils": "16.1.4",
45
45
  "lru-cache": "11.5.1"
46
46
  },
47
47
  "peerDependencies": {
48
48
  "fastembed": "2.1.0",
49
- "onnxruntime-node": "1.26.0"
49
+ "onnxruntime-node": "1.21.0"
50
50
  },
51
51
  "peerDependenciesMeta": {
52
52
  "fastembed": {
@@ -11,6 +11,7 @@ import {
11
11
  } from "@oh-my-pi/pi-utils";
12
12
  import type { EmbeddingModel } from "fastembed";
13
13
  import { LRUCache } from "lru-cache/raw";
14
+ import { ensureFastembedModelSidecars } from "./fastembed-model-cache";
14
15
  import { loadFastembed } from "./fastembed-runtime";
15
16
  import {
16
17
  type EmbeddingOutput,
@@ -30,19 +31,19 @@ export interface EmbeddingProvider {
30
31
  available?(): boolean | Promise<boolean>;
31
32
  }
32
33
 
33
- type StandardEmbeddingModel = Exclude<EmbeddingModel, EmbeddingModel.CUSTOM>;
34
+ export type StandardEmbeddingModel = Exclude<EmbeddingModel, EmbeddingModel.CUSTOM>;
34
35
 
35
- interface LocalEmbeddingModel {
36
+ export interface LocalEmbeddingModel {
36
37
  embed(texts: string[], batchSize?: number): EmbeddingOutput;
37
38
  queryEmbed?(query: string): Promise<number[]>;
38
39
  }
39
40
 
40
- type LocalModelInitOptions = {
41
+ export type LocalModelInitOptions = {
41
42
  model: StandardEmbeddingModel;
42
43
  cacheDir?: string;
43
44
  showDownloadProgress?: boolean;
44
45
  };
45
- type LocalModelInitializer = (options: LocalModelInitOptions) => Promise<LocalEmbeddingModel>;
46
+ export type LocalModelInitializer = (options: LocalModelInitOptions) => Promise<LocalEmbeddingModel>;
46
47
 
47
48
  const QUERY_CACHE_MAX = 512;
48
49
 
@@ -62,7 +63,20 @@ let nextProviderId = 1;
62
63
 
63
64
  async function defaultLocalModelInitializer(options: LocalModelInitOptions): Promise<LocalEmbeddingModel> {
64
65
  const { FlagEmbedding } = await loadFastembed();
65
- return FlagEmbedding.init(options);
66
+ try {
67
+ return await FlagEmbedding.init(options);
68
+ } catch (error) {
69
+ const message = error instanceof Error ? error.message : "";
70
+ if (
71
+ !/(?:Config file not found at .*config|Tokenizer file not found at .*tokenizer|Tokens map file not found at .*special_tokens_map)/u.test(
72
+ message,
73
+ )
74
+ ) {
75
+ throw error;
76
+ }
77
+ if (!(await ensureFastembedModelSidecars(options.model, options.cacheDir))) throw error;
78
+ return FlagEmbedding.init(options);
79
+ }
66
80
  }
67
81
 
68
82
  function activeEmbeddingOptions() {
@@ -324,6 +338,16 @@ export function setLocalModelInitializerForTests(initializer: LocalModelInitiali
324
338
  queryCache.clear();
325
339
  }
326
340
 
341
+ /**
342
+ * Override the function used to construct the local fastembed model the next
343
+ * time `embed()` is called. Lets a host (e.g. the agent CLI) keep
344
+ * `onnxruntime-node` out of its own address space by routing every fastembed
345
+ * load + inference through a dedicated subprocess. Same wipe semantics as the
346
+ * `*ForTests` form: clears the cached model promise and the query cache so
347
+ * subsequent embeds run through the new initializer immediately.
348
+ */
349
+ export const setLocalModelInitializer = setLocalModelInitializerForTests;
350
+
327
351
  export function resetEmbeddingProviderForTests(): void {
328
352
  providerOverride = null;
329
353
  localModelPromise = null;
@@ -35,10 +35,6 @@ function hostLlmEnabled(): boolean {
35
35
  return envBool("MNEMOPI_HOST_LLM_ENABLED", false);
36
36
  }
37
37
 
38
- function llmBaseUrl(): string {
39
- return env("MNEMOPI_LLM_BASE_URL").replace(/\/+$/, "");
40
- }
41
-
42
38
  function llmMaxTokens(): number {
43
39
  return envInt("MNEMOPI_LLM_MAX_TOKENS", 2048);
44
40
  }
@@ -301,23 +297,21 @@ export async function extractFacts(text: string | null | undefined, options: Rem
301
297
  return [];
302
298
  }
303
299
 
304
- if (llmEnabled() && llmBaseUrl() !== "") {
305
- diag.recordAttempt("remote");
306
- try {
307
- const raw = await callRemoteLlm(prompt, 0, options);
308
- if (raw !== null) {
309
- const facts = parseFacts(cleanOutput(raw));
310
- if (facts.length > 0) {
311
- diag.recordSuccess("remote", facts.length);
312
- diag.recordCall({ succeeded: true });
313
- return facts;
314
- }
300
+ diag.recordAttempt("remote");
301
+ try {
302
+ const raw = await callRemoteLlm(prompt, 0, options);
303
+ if (raw !== null) {
304
+ const facts = parseFacts(cleanOutput(raw));
305
+ if (facts.length > 0) {
306
+ diag.recordSuccess("remote", facts.length);
307
+ diag.recordCall({ succeeded: true });
308
+ return facts;
315
309
  }
316
- diag.recordNoOutput("remote");
317
- } catch (exc) {
318
- diag.recordFailure("remote", exc, "remote_call_raised");
319
- console.warn(`extractFacts: remote LLM raised: ${safeForLog(exc)}`);
320
310
  }
311
+ diag.recordNoOutput("remote");
312
+ } catch (exc) {
313
+ diag.recordFailure("remote", exc, "remote_call_raised");
314
+ console.warn(`extractFacts: remote LLM raised: ${safeForLog(exc)}`);
321
315
  }
322
316
 
323
317
  return localFallback(prompt, text, diag);
@@ -0,0 +1,39 @@
1
+ import * as path from "node:path";
2
+
3
+ const FASTEMBED_MODEL_SIDECARS = [
4
+ "config.json",
5
+ "tokenizer.json",
6
+ "tokenizer_config.json",
7
+ "special_tokens_map.json",
8
+ ] as const;
9
+
10
+ const FASTEMBED_HF_REPOS: Record<string, string> = {
11
+ "fast-all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
12
+ "fast-bge-base-en": "BAAI/bge-base-en",
13
+ "fast-bge-base-en-v1.5": "BAAI/bge-base-en-v1.5",
14
+ "fast-bge-small-en": "BAAI/bge-small-en",
15
+ "fast-bge-small-en-v1.5": "BAAI/bge-small-en-v1.5",
16
+ "fast-bge-small-zh-v1.5": "BAAI/bge-small-zh-v1.5",
17
+ "fast-multilingual-e5-large": "intfloat/multilingual-e5-large",
18
+ };
19
+
20
+ /** Download missing config/tokenizer sidecars into a fastembed model cache directory. */
21
+ export async function ensureFastembedModelSidecars(model: string, cacheDir = "local_cache"): Promise<boolean> {
22
+ const repo = FASTEMBED_HF_REPOS[model];
23
+ if (repo === undefined) return false;
24
+
25
+ const modelDir = path.join(cacheDir, model);
26
+ for (const fileName of FASTEMBED_MODEL_SIDECARS) {
27
+ const target = path.join(modelDir, fileName);
28
+ if (await Bun.file(target).exists()) continue;
29
+
30
+ const response = await fetch(`https://huggingface.co/${repo}/resolve/main/${fileName}`);
31
+ if (!response.ok) {
32
+ throw new Error(
33
+ `Failed to download ${model} ${fileName} from ${repo}: ${response.status} ${response.statusText}`,
34
+ );
35
+ }
36
+ await Bun.write(target, await response.arrayBuffer());
37
+ }
38
+ return true;
39
+ }
@@ -22,27 +22,27 @@ export interface FastembedRuntimeInstallPlan {
22
22
  }
23
23
 
24
24
  /**
25
- * `fastembed` and `onnxruntime-node` are optional peers (~270MB of native
26
- * assets across platforms), never bundled and never installed eagerly. When
27
- * the direct import cannot resolve — bundled `dist/cli.js`, compiled binary,
28
- * a consumer that skipped the optional peers, or a native loader failure from
29
- * fastembed's nested ORT the pinned pair is `bun install`ed into a
30
- * per-version runtime cache on first use and loaded from there (#2389, #2920).
25
+ * `fastembed` is an optional peer (~270MB of native assets across platforms),
26
+ * never bundled and never installed eagerly. When the direct import cannot
27
+ * resolve — bundled `dist/cli.js`, compiled binary, a consumer that skipped the
28
+ * optional peer, or a native loader failure — fastembed is `bun install`ed into
29
+ * a per-version runtime cache on first use and loaded from there (#2389).
31
30
  *
32
- * The pins live in `peerDependencies` as exact versions (not `catalog:`) so
33
- * this module reads concrete specs even when the workspace manifest is
34
- * inlined into a bundle; a workspace test asserts they match the catalog.
31
+ * The fastembed pin lives in `peerDependencies` as an exact version (not
32
+ * `catalog:`) so this module reads a concrete spec even when the workspace
33
+ * manifest is inlined into a bundle. The runtime install deliberately does not
34
+ * override fastembed's `onnxruntime-node` dependency: the prebuilt native addon
35
+ * links against that package's bundled ORT dylib/so/dll name.
35
36
  */
36
37
  const FASTEMBED_SPEC = packageManifest.peerDependencies.fastembed;
37
- const ORT_SPEC = packageManifest.peerDependencies["onnxruntime-node"];
38
38
 
39
39
  /** Build the deterministic fastembed runtime install plan used by local embeddings. */
40
40
  export function fastembedRuntimeInstallPlan(): FastembedRuntimeInstallPlan {
41
41
  return {
42
- versionKey: `fastembed-${FASTEMBED_SPEC}_ort-${ORT_SPEC}_forced-ort`.replace(/[^A-Za-z0-9._-]/g, "_"),
42
+ versionKey: `fastembed-${FASTEMBED_SPEC}_transitive-ort`.replace(/[^A-Za-z0-9._-]/g, "_"),
43
43
  install: {
44
- dependencies: { fastembed: FASTEMBED_SPEC, "onnxruntime-node": ORT_SPEC },
45
- overrides: { "onnxruntime-common": ORT_SPEC, "onnxruntime-node": ORT_SPEC },
44
+ dependencies: { fastembed: FASTEMBED_SPEC },
45
+ trustedDependencies: ["onnxruntime-node"],
46
46
  },
47
47
  };
48
48
  }
package/src/core/index.ts CHANGED
@@ -1,6 +1,13 @@
1
1
  export { configureRecallFeatures, type RecallFeatureFlags } from "../config";
2
2
  export * from "./banks";
3
3
  export * from "./beam/index";
4
+ export {
5
+ type LocalEmbeddingModel,
6
+ type LocalModelInitializer,
7
+ type LocalModelInitOptions,
8
+ type StandardEmbeddingModel,
9
+ setLocalModelInitializer,
10
+ } from "./embeddings";
4
11
  export * from "./memory";
5
12
  export {
6
13
  addMemory,