npm - @oh-my-pi/pi-mnemopi - Versions diffs - 16.1.2 → 16.1.4 - Mend

@oh-my-pi/pi-mnemopi 16.1.2 → 16.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/CHANGELOG.md +11 -0
package/dist/types/core/embeddings.d.ts +13 -4
package/dist/types/core/fastembed-model-cache.d.ts +2 -0
package/dist/types/core/index.d.ts +1 -0
package/package.json +5 -5
package/src/core/embeddings.ts +29 -5
package/src/core/extraction.ts +13 -19
package/src/core/fastembed-model-cache.ts +39 -0
package/src/core/fastembed-runtime.ts +13 -13
package/src/core/index.ts +7 -0

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,17 @@
 ## [Unreleased]
+## [16.1.3] - 2026-06-19
+### Added
+- Exposed `setLocalModelInitializer` (and the `LocalEmbeddingModel`, `LocalModelInitializer`, `LocalModelInitOptions`, `StandardEmbeddingModel` types) so hosts can route fastembed loads through a dedicated subprocess and keep `onnxruntime-node`'s NAPI constructor + finalizer out of their own address space. Same wipe semantics as the existing `setLocalModelInitializerForTests` seam; the agent CLI uses it to crash-proof Windows when `memory.backend: mnemopi` is enabled ([#3031](https://github.com/can1357/oh-my-pi/issues/3031)).
+### Fixed
+- Fixed background fact extraction skipping runtime-configured remote LLM endpoints when `MNEMOPI_LLM_BASE_URL` was unset, so `remember(..., { extract: true })` now stores remote-distilled facts from `mnemopi.llm` config instead of falling back to regex heuristics. ([#3041](https://github.com/can1357/oh-my-pi/issues/3041))
+- Fixed local fastembed startup on macOS ARM64 by letting `fastembed@2.1.0` install its matching `onnxruntime-node@1.21.0` native runtime instead of forcing `1.26.0`, and by repairing missing tokenizer sidecars from the upstream Hugging Face model cache when a stale fastembed archive lacks them. ([#3054](https://github.com/can1357/oh-my-pi/issues/3054))
 ## [16.0.6] - 2026-06-18
 ### Fixed

package/dist/types/core/embeddings.d.ts CHANGED Viewed

@@ -8,17 +8,17 @@ export interface EmbeddingProvider {
     embed(texts: readonly string[]): EmbeddingOutput | Promise<EmbeddingOutput>;
     available?(): boolean | Promise<boolean>;
 }
-type StandardEmbeddingModel = Exclude<EmbeddingModel, EmbeddingModel.CUSTOM>;
-interface LocalEmbeddingModel {
+export type StandardEmbeddingModel = Exclude<EmbeddingModel, EmbeddingModel.CUSTOM>;
+export interface LocalEmbeddingModel {
     embed(texts: string[], batchSize?: number): EmbeddingOutput;
     queryEmbed?(query: string): Promise<number[]>;
 }
-type LocalModelInitOptions = {
+export type LocalModelInitOptions = {
     model: StandardEmbeddingModel;
     cacheDir?: string;
     showDownloadProgress?: boolean;
 };
-type LocalModelInitializer = (options: LocalModelInitOptions) => Promise<LocalEmbeddingModel>;
+export type LocalModelInitializer = (options: LocalModelInitOptions) => Promise<LocalEmbeddingModel>;
 export declare function embeddingsDisabled(): boolean;
 /**
  * Resolve the embedding model name for the currently active runtime scope.
@@ -34,6 +34,15 @@ export declare function embeddingDimFor(modelName: string): number;
 export declare function setEmbeddingProviderForTests(provider: EmbeddingProvider | null | undefined): void;
 export declare const setEmbeddingProvider: typeof setEmbeddingProviderForTests;
 export declare function setLocalModelInitializerForTests(initializer: LocalModelInitializer | null | undefined): void;
+/**
+ * Override the function used to construct the local fastembed model the next
+ * time `embed()` is called. Lets a host (e.g. the agent CLI) keep
+ * `onnxruntime-node` out of its own address space by routing every fastembed
+ * load + inference through a dedicated subprocess. Same wipe semantics as the
+ * `*ForTests` form: clears the cached model promise and the query cache so
+ * subsequent embeds run through the new initializer immediately.
+ */
+export declare const setLocalModelInitializer: typeof setLocalModelInitializerForTests;
 export declare function resetEmbeddingProviderForTests(): void;
 export declare const resetEmbeddingStateForTests: typeof resetEmbeddingProviderForTests;
 export declare function available(): Promise<boolean>;

package/dist/types/core/fastembed-model-cache.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ /** Download missing config/tokenizer sidecars into a fastembed model cache directory. */
2	+ export declare function ensureFastembedModelSidecars(model: string, cacheDir?: string): Promise<boolean>;

package/dist/types/core/index.d.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 export { configureRecallFeatures, type RecallFeatureFlags } from "../config";
 export * from "./banks";
 export * from "./beam/index";
+export { type LocalEmbeddingModel, type LocalModelInitializer, type LocalModelInitOptions, type StandardEmbeddingModel, setLocalModelInitializer, } from "./embeddings";
 export * from "./memory";
 export { addMemory, forget, get, getBank, getContext, getDefaultInstance, getStats, Mnemopi, query, recall, recallEnhanced, remember, resetDefaultInstanceForTests, resetMemoryForTests, resetModuleStateForTests, saveMemory, scratchpadClear, scratchpadRead, scratchpadWrite, search, setBank, sleep, sleepAllSessions, storeMemory, update, } from "./memory";

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-mnemopi",
-	"version": "16.1.2",
+	"version": "16.1.4",
 	"description": "Local SQLite memory engine for Oh My Pi agents",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -39,14 +39,14 @@
 		"fmt": "biome format --write ."
 	},
 	"dependencies": {
-		"@oh-my-pi/pi-ai": "16.1.2",
-		"@oh-my-pi/pi-catalog": "16.1.2",
-		"@oh-my-pi/pi-utils": "16.1.2",
+		"@oh-my-pi/pi-ai": "16.1.4",
+		"@oh-my-pi/pi-catalog": "16.1.4",
+		"@oh-my-pi/pi-utils": "16.1.4",
 		"lru-cache": "11.5.1"
 	},
 	"peerDependencies": {
 		"fastembed": "2.1.0",
-		"onnxruntime-node": "1.26.0"
+		"onnxruntime-node": "1.21.0"
 	},
 	"peerDependenciesMeta": {
 		"fastembed": {

package/src/core/embeddings.ts CHANGED Viewed

@@ -11,6 +11,7 @@ import {
 } from "@oh-my-pi/pi-utils";
 import type { EmbeddingModel } from "fastembed";
 import { LRUCache } from "lru-cache/raw";
+import { ensureFastembedModelSidecars } from "./fastembed-model-cache";
 import { loadFastembed } from "./fastembed-runtime";
 import {
 	type EmbeddingOutput,
@@ -30,19 +31,19 @@ export interface EmbeddingProvider {
 	available?(): boolean | Promise<boolean>;
 }
-type StandardEmbeddingModel = Exclude<EmbeddingModel, EmbeddingModel.CUSTOM>;
+export type StandardEmbeddingModel = Exclude<EmbeddingModel, EmbeddingModel.CUSTOM>;
-interface LocalEmbeddingModel {
+export interface LocalEmbeddingModel {
 	embed(texts: string[], batchSize?: number): EmbeddingOutput;
 	queryEmbed?(query: string): Promise<number[]>;
 }
-type LocalModelInitOptions = {
+export type LocalModelInitOptions = {
 	model: StandardEmbeddingModel;
 	cacheDir?: string;
 	showDownloadProgress?: boolean;
 };
-type LocalModelInitializer = (options: LocalModelInitOptions) => Promise<LocalEmbeddingModel>;
+export type LocalModelInitializer = (options: LocalModelInitOptions) => Promise<LocalEmbeddingModel>;
 const QUERY_CACHE_MAX = 512;
@@ -62,7 +63,20 @@ let nextProviderId = 1;
 async function defaultLocalModelInitializer(options: LocalModelInitOptions): Promise<LocalEmbeddingModel> {
 	const { FlagEmbedding } = await loadFastembed();
-	return FlagEmbedding.init(options);
+	try {
+		return await FlagEmbedding.init(options);
+	} catch (error) {
+		const message = error instanceof Error ? error.message : "";
+		if (
+			!/(?:Config file not found at .*config|Tokenizer file not found at .*tokenizer|Tokens map file not found at .*special_tokens_map)/u.test(
+				message,
+			)
+		) {
+			throw error;
+		}
+		if (!(await ensureFastembedModelSidecars(options.model, options.cacheDir))) throw error;
+		return FlagEmbedding.init(options);
+	}
 }
 function activeEmbeddingOptions() {
@@ -324,6 +338,16 @@ export function setLocalModelInitializerForTests(initializer: LocalModelInitiali
 	queryCache.clear();
 }
+/**
+ * Override the function used to construct the local fastembed model the next
+ * time `embed()` is called. Lets a host (e.g. the agent CLI) keep
+ * `onnxruntime-node` out of its own address space by routing every fastembed
+ * load + inference through a dedicated subprocess. Same wipe semantics as the
+ * `*ForTests` form: clears the cached model promise and the query cache so
+ * subsequent embeds run through the new initializer immediately.
+ */
+export const setLocalModelInitializer = setLocalModelInitializerForTests;
 export function resetEmbeddingProviderForTests(): void {
 	providerOverride = null;
 	localModelPromise = null;

package/src/core/extraction.ts CHANGED Viewed

@@ -35,10 +35,6 @@ function hostLlmEnabled(): boolean {
 	return envBool("MNEMOPI_HOST_LLM_ENABLED", false);
 }
-function llmBaseUrl(): string {
-	return env("MNEMOPI_LLM_BASE_URL").replace(/\/+$/, "");
-}
 function llmMaxTokens(): number {
 	return envInt("MNEMOPI_LLM_MAX_TOKENS", 2048);
 }
@@ -301,23 +297,21 @@ export async function extractFacts(text: string | null | undefined, options: Rem
 		return [];
 	}
-	if (llmEnabled() && llmBaseUrl() !== "") {
-		diag.recordAttempt("remote");
-		try {
-			const raw = await callRemoteLlm(prompt, 0, options);
-			if (raw !== null) {
-				const facts = parseFacts(cleanOutput(raw));
-				if (facts.length > 0) {
-					diag.recordSuccess("remote", facts.length);
-					diag.recordCall({ succeeded: true });
-					return facts;
-				}
+	diag.recordAttempt("remote");
+	try {
+		const raw = await callRemoteLlm(prompt, 0, options);
+		if (raw !== null) {
+			const facts = parseFacts(cleanOutput(raw));
+			if (facts.length > 0) {
+				diag.recordSuccess("remote", facts.length);
+				diag.recordCall({ succeeded: true });
+				return facts;
 			}
-			diag.recordNoOutput("remote");
-		} catch (exc) {
-			diag.recordFailure("remote", exc, "remote_call_raised");
-			console.warn(`extractFacts: remote LLM raised: ${safeForLog(exc)}`);
 		}
+		diag.recordNoOutput("remote");
+	} catch (exc) {
+		diag.recordFailure("remote", exc, "remote_call_raised");
+		console.warn(`extractFacts: remote LLM raised: ${safeForLog(exc)}`);
 	}
 	return localFallback(prompt, text, diag);

package/src/core/fastembed-model-cache.ts ADDED Viewed

@@ -0,0 +1,39 @@
+import * as path from "node:path";
+const FASTEMBED_MODEL_SIDECARS = [
+	"config.json",
+	"tokenizer.json",
+	"tokenizer_config.json",
+	"special_tokens_map.json",
+] as const;
+const FASTEMBED_HF_REPOS: Record<string, string> = {
+	"fast-all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
+	"fast-bge-base-en": "BAAI/bge-base-en",
+	"fast-bge-base-en-v1.5": "BAAI/bge-base-en-v1.5",
+	"fast-bge-small-en": "BAAI/bge-small-en",
+	"fast-bge-small-en-v1.5": "BAAI/bge-small-en-v1.5",
+	"fast-bge-small-zh-v1.5": "BAAI/bge-small-zh-v1.5",
+	"fast-multilingual-e5-large": "intfloat/multilingual-e5-large",
+};
+/** Download missing config/tokenizer sidecars into a fastembed model cache directory. */
+export async function ensureFastembedModelSidecars(model: string, cacheDir = "local_cache"): Promise<boolean> {
+	const repo = FASTEMBED_HF_REPOS[model];
+	if (repo === undefined) return false;
+	const modelDir = path.join(cacheDir, model);
+	for (const fileName of FASTEMBED_MODEL_SIDECARS) {
+		const target = path.join(modelDir, fileName);
+		if (await Bun.file(target).exists()) continue;
+		const response = await fetch(`https://huggingface.co/${repo}/resolve/main/${fileName}`);
+		if (!response.ok) {
+			throw new Error(
+				`Failed to download ${model} ${fileName} from ${repo}: ${response.status} ${response.statusText}`,
+			);
+		}
+		await Bun.write(target, await response.arrayBuffer());
+	}
+	return true;
+}

package/src/core/fastembed-runtime.ts CHANGED Viewed

@@ -22,27 +22,27 @@ export interface FastembedRuntimeInstallPlan {
 }
 /**
- * `fastembed` and `onnxruntime-node` are optional peers (~270MB of native
- * assets across platforms), never bundled and never installed eagerly. When
- * the direct import cannot resolve — bundled `dist/cli.js`, compiled binary,
- * a consumer that skipped the optional peers, or a native loader failure from
- * fastembed's nested ORT — the pinned pair is `bun install`ed into a
- * per-version runtime cache on first use and loaded from there (#2389, #2920).
+ * `fastembed` is an optional peer (~270MB of native assets across platforms),
+ * never bundled and never installed eagerly. When the direct import cannot
+ * resolve — bundled `dist/cli.js`, compiled binary, a consumer that skipped the
+ * optional peer, or a native loader failure — fastembed is `bun install`ed into
+ * a per-version runtime cache on first use and loaded from there (#2389).
  *
- * The pins live in `peerDependencies` as exact versions (not `catalog:`) so
- * this module reads concrete specs even when the workspace manifest is
- * inlined into a bundle; a workspace test asserts they match the catalog.
+ * The fastembed pin lives in `peerDependencies` as an exact version (not
+ * `catalog:`) so this module reads a concrete spec even when the workspace
+ * manifest is inlined into a bundle. The runtime install deliberately does not
+ * override fastembed's `onnxruntime-node` dependency: the prebuilt native addon
+ * links against that package's bundled ORT dylib/so/dll name.
  */
 const FASTEMBED_SPEC = packageManifest.peerDependencies.fastembed;
-const ORT_SPEC = packageManifest.peerDependencies["onnxruntime-node"];
 /** Build the deterministic fastembed runtime install plan used by local embeddings. */
 export function fastembedRuntimeInstallPlan(): FastembedRuntimeInstallPlan {
 	return {
-		versionKey: `fastembed-${FASTEMBED_SPEC}_ort-${ORT_SPEC}_forced-ort`.replace(/[^A-Za-z0-9._-]/g, "_"),
+		versionKey: `fastembed-${FASTEMBED_SPEC}_transitive-ort`.replace(/[^A-Za-z0-9._-]/g, "_"),
 		install: {
-			dependencies: { fastembed: FASTEMBED_SPEC, "onnxruntime-node": ORT_SPEC },
-			overrides: { "onnxruntime-common": ORT_SPEC, "onnxruntime-node": ORT_SPEC },
+			dependencies: { fastembed: FASTEMBED_SPEC },
+			trustedDependencies: ["onnxruntime-node"],
 		},
 	};
 }

package/src/core/index.ts CHANGED Viewed

@@ -1,6 +1,13 @@
 export { configureRecallFeatures, type RecallFeatureFlags } from "../config";
 export * from "./banks";
 export * from "./beam/index";
+export {
+	type LocalEmbeddingModel,
+	type LocalModelInitializer,
+	type LocalModelInitOptions,
+	type StandardEmbeddingModel,
+	setLocalModelInitializer,
+} from "./embeddings";
 export * from "./memory";
 export {
 	addMemory,