npm - @prometheus-ai/memory - Versions diffs - 0.5.3 → 0.5.8 - Mend

@prometheus-ai/memory 0.5.3 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/CHANGELOG.md +15 -0
package/README.md +4 -4
package/dist/types/config.d.ts +13 -2
package/dist/types/core/beam/store.d.ts +20 -0
package/dist/types/core/embeddings.d.ts +2 -1
package/dist/types/core/extraction/client.d.ts +11 -7
package/dist/types/core/extraction.d.ts +2 -1
package/dist/types/core/fastembed-runtime.d.ts +4 -0
package/dist/types/core/index.d.ts +1 -0
package/dist/types/core/llm-backends.d.ts +2 -0
package/dist/types/core/local-llm.d.ts +8 -3
package/dist/types/core/memory.d.ts +12 -3
package/dist/types/core/query-cache.d.ts +1 -2
package/dist/types/core/runtime-options.d.ts +10 -5
package/dist/types/core/shmr.d.ts +11 -5
package/dist/types/core/vector-index.d.ts +16 -0
package/dist/types/index.d.ts +2 -1
package/package.json +30 -7
package/src/cli.ts +19 -19
package/src/config.ts +98 -68
package/src/core/banks.ts +2 -2
package/src/core/beam/consolidate.ts +34 -5
package/src/core/beam/helpers.ts +21 -28
package/src/core/beam/index.ts +2 -2
package/src/core/beam/recall.ts +98 -25
package/src/core/beam/store.ts +96 -4
package/src/core/binary-vectors.ts +1 -1
package/src/core/content-sanitizer.ts +3 -3
package/src/core/cost-log.ts +1 -1
package/src/core/embeddings.ts +75 -50
package/src/core/extraction/client.ts +44 -20
package/src/core/extraction.ts +10 -9
package/src/core/fastembed-runtime.ts +89 -0
package/src/core/index.ts +1 -0
package/src/core/llm-backends.ts +3 -0
package/src/core/local-llm.ts +81 -43
package/src/core/memory.ts +25 -5
package/src/core/plugins.ts +1 -1
package/src/core/polyphonic-recall.ts +4 -4
package/src/core/query-cache.ts +2 -3
package/src/core/runtime-options.ts +13 -5
package/src/core/shmr.ts +141 -39
package/src/core/streaming.ts +1 -1
package/src/core/triples.ts +3 -3
package/src/core/vector-index.ts +84 -0
package/src/diagnose.ts +2 -2
package/src/dr/recovery.ts +5 -5
package/src/index.ts +1 -1
package/src/mcp-server.ts +2 -2
package/src/mcp-tools.ts +61 -61

package/src/core/beam/recall.ts CHANGED Viewed

@@ -2,7 +2,7 @@ import { normalizedRecallWeights, temporalHalflifeHours } from "../../config";
 import { embedQuery } from "../embeddings";
 import { mmrRerank } from "../mmr";
 import { adjustWeights, classifyIntent } from "../query-intent";
-import { getSynonyms, normalizeQuery } from "../synonyms";
+import { getSynonyms, normalizeQuery, STOP_WORDS as QUERY_STOP_WORDS } from "../synonyms";
 import { extractTemporal } from "../temporal-parser";
 import { cosineSimilarity } from "../vector-math";
 import type { BeamMemoryState, RecallEnhancedOptions, RecallOptions, RecallResult } from "./types";
@@ -101,6 +101,31 @@ const STOP_WORDS = new Set([
 	"with",
 ]);
+const FACT_QUERY_FILLER_WORDS = new Set([
+	...QUERY_STOP_WORDS,
+	"active",
+	"current",
+	"currently",
+	"d",
+	"know",
+	"latest",
+	"ll",
+	"m",
+	"please",
+	"present",
+	"re",
+	"recent",
+	"remind",
+	"remember",
+	"s",
+	"t",
+	"tell",
+	"today",
+	"ve",
+]);
+const FACT_CLITIC_FRAGMENTS = new Set(["d", "ll", "m", "re", "s", "t", "ve"]);
 function nowIso(): string {
 	return new Date().toISOString();
 }
@@ -176,6 +201,35 @@ function expandedTokenGroups(query: string, useSynonyms = true): string[][] {
 	return groups;
 }
+function factExpandedTokenGroups(query: string, content: string): string[][] {
+	const contentLower = content.toLowerCase();
+	const contentTokens = new Set(tokenize(contentLower));
+	const groups: string[][] = [];
+	for (const token of tokenize(query)) {
+		if (FACT_QUERY_FILLER_WORDS.has(token) && (FACT_CLITIC_FRAGMENTS.has(token) || !contentTokens.has(token))) {
+			continue;
+		}
+		const seen = new Set<string>();
+		for (const variant of recallSynonyms(token, true)) {
+			for (const part of tokenize(variant)) {
+				if (!FACT_QUERY_FILLER_WORDS.has(part) || (!FACT_CLITIC_FRAGMENTS.has(part) && contentTokens.has(part))) {
+					seen.add(part);
+				}
+			}
+		}
+		if (seen.size > 0) groups.push([...seen]);
+	}
+	return groups;
+}
+function tokensFromGroups(groups: readonly (readonly string[])[]): string[] {
+	const seen = new Set<string>();
+	for (const group of groups) {
+		for (const token of group) seen.add(token);
+	}
+	return [...seen];
+}
 function contentMatchesToken(contentLower: string, contentTokens: ReadonlySet<string>, token: string): boolean {
 	if (contentTokens.has(token) || contentLower.includes(token)) return true;
 	for (const contentToken of contentTokens) {
@@ -1062,13 +1116,11 @@ export function factRecall(beam: BeamMemoryState, query: string, topK = 30): Fac
 		}
 	}
 	if (matched.length === 0) return [];
-	const rowids = matched
-		.slice(0, topK)
-		.map(row => asNumber(row.rowid))
-		.filter(rowid => rowid > 0);
+	const rowids = matched.map(row => asNumber(row.rowid)).filter(rowid => rowid > 0);
 	if (rowids.length === 0) return [];
 	const visibility = factVisibilityWhere(beam, "");
 	const ranks = normalizeRanks(matched, "rowid");
+	const normalized = normalizeQuery(query).toLowerCase();
 	const rows = queryAll(
 		beam,
 		`SELECT rowid, fact_id, subject, predicate, object, timestamp, confidence
@@ -1076,25 +1128,46 @@ export function factRecall(beam: BeamMemoryState, query: string, topK = 30): Fac
 		 WHERE rowid IN (${placeholders(rowids.length)}) AND ${visibility.where}
 		 ORDER BY confidence DESC
 		 LIMIT ?`,
-		[...rowids, ...visibility.params, topK],
+		[...rowids, ...visibility.params, rowids.length],
 	);
-	return rows.map(row => {
-		const subject = asString(row.subject);
-		const predicate = asString(row.predicate);
-		const object = asString(row.object);
-		const confidence = asNumber(row.confidence, 0.5);
-		const result: FactRecallResult = {
-			id: asString(row.fact_id),
-			content: object.length > 0 ? object : `${subject} ${predicate}`.trim(),
-			score: round4(confidence * 0.8 + (ranks.get(asNumber(row.rowid)) ?? 0) * 0.2),
-			fact_id: asString(row.fact_id),
-			subject,
-			predicate,
-			timestamp: asNullableString(row.timestamp),
-			tier_label: "fact",
-			tier: "fact",
-			source: "facts",
-		};
-		return result;
-	});
+	return rows
+		.map(row => {
+			const subject = asString(row.subject);
+			const predicate = asString(row.predicate);
+			const object = asString(row.object);
+			const confidence = asNumber(row.confidence, 0.5);
+			const content = object.length > 0 ? object : `${subject} ${predicate}`.trim();
+			const searchable = `${subject} ${predicate} ${object}`.trim();
+			const queryGroups = factExpandedTokenGroups(query, searchable);
+			const queryTokens = tokensFromGroups(queryGroups);
+			const lexical =
+				queryGroups.length > 0
+					? lexicalGroupRelevance(queryGroups, searchable, normalized)
+					: lexicalRelevance(queryTokens, searchable, normalized);
+			const rank = ranks.get(asNumber(row.rowid)) ?? 0;
+			const result: FactRecallResult = {
+				id: asString(row.fact_id),
+				content,
+				score: round4(lexical * (0.7 + confidence * 0.2 + rank * 0.1)),
+				fact_id: asString(row.fact_id),
+				subject,
+				predicate,
+				timestamp: asNullableString(row.timestamp),
+				tier_label: "fact",
+				tier: "fact",
+				source: "facts",
+				keyword_score: round4(lexical),
+				fts_score: round4(rank),
+				importance_score: round4(confidence),
+				explanation: `fact keyword=${round4(lexical)}`,
+				voice_scores: {
+					keyword: round4(lexical),
+					fts: round4(rank),
+					importance: round4(confidence),
+				},
+			};
+			return result;
+		})
+		.sort((left, right) => (right.score ?? 0) - (left.score ?? 0))
+		.slice(0, topK);
 }

package/src/core/beam/store.ts CHANGED Viewed

@@ -1,12 +1,14 @@
 import type { Database, SQLQueryBindings } from "bun:sqlite";
+import { logger } from "@prometheus-ai/utils";
 import { transaction } from "../../db";
 import { toUtcIso } from "../../util/datetime";
 import { generateId } from "../../util/ids";
+import { currentEmbeddingModel, embeddingsDisabled } from "../embeddings";
 import { EpisodicGraph } from "../episodic-graph";
 import { extractFactsSafe } from "../extraction";
 import { getMnemopiRuntimeOptions, withMnemopiRuntimeOptions } from "../runtime-options";
 import { storeFactStrings } from "./consolidate";
-import { scheduleEmbedding, vecAvailable, vecInsert } from "./helpers";
+import { type EmbedItem, scheduleEmbedding, vecAvailable, vecInsert } from "./helpers";
 import type {
 	BeamEvent,
 	BeamMemoryState,
@@ -58,7 +60,7 @@ const TRUST_TIERS: Record<string, true> = {
 	EXTERNAL_WRITE: true,
 	IMPORTED: true,
 };
-const SCRATCHPAD_MAX_ITEMS = Number.parseInt(process.env.PROMETHEUS_MEMORY_SP_MAX ?? "1000", 10);
+const SCRATCHPAD_MAX_ITEMS = Number.parseInt(process.env.MNEMOPROMETHEUS_SP_MAX ?? "1000", 10);
 function metadataJson(metadata: Metadata | null | undefined): string | null {
 	return metadata == null ? null : JSON.stringify(metadata);
@@ -191,7 +193,7 @@ function proactiveLinkIfEnabled(
 	content: string,
 	extractEntities: boolean,
 ): void {
-	if (process.env.PROMETHEUS_MEMORY_PROACTIVE_LINKING !== "1") return;
+	if (process.env.MNEMOPROMETHEUS_PROACTIVE_LINKING !== "1") return;
 	try {
 		const graph =
 			beam.episodicGraph instanceof EpisodicGraph
@@ -248,6 +250,96 @@ function rowToDict(row: Row): Row {
 	return { ...row };
 }
+/** Re-embedding batch size for a model-change rebuild — bounds each background
+ *  embedding request instead of embedding the whole corpus in one call. */
+const EMBED_REBUILD_BATCH = 128;
+/**
+ * Reconcile stored embeddings against the active embedding model at store open.
+ *
+ * Every `memory_embeddings` row is stamped with the model that produced it (see
+ * `runEmbedding` in `helpers.ts`). When the configured embedding model changes,
+ * its vector dimension changes too, so the previously-stored vectors are no
+ * longer comparable. On a mismatch we wipe every stored vector — the
+ * `memory_embeddings` table, the `episodic_memory.binary_vector` column, and the
+ * sqlite-vec `vec_episodes` index — then enqueue all live memories for
+ * background re-embedding under the new model via `scheduleEmbedding`.
+ *
+ * Runs once per store open; a fresh store (no embeddings) or an already-current
+ * store is a no-op. The destructive wipe is skipped whenever it could not be
+ * rebuilt — embeddings disabled via the runtime option OR the
+ * `MNEMOPROMETHEUS_NO_EMBEDDINGS` env, or an unresolved (empty) active model — so a
+ * stale-but-valid corpus is never destroyed without a replacement. MUST run
+ * inside the active runtime-options scope so `currentEmbeddingModel()` /
+ * `embeddingsDisabled()` reflect the per-instance configuration.
+ */
+export function reconcileEmbeddingModel(beam: BeamMemoryState): void {
+	if (embeddingsDisabled()) return;
+	const active = currentEmbeddingModel().trim();
+	if (active === "") return;
+	// Re-embed in bounded batches so a corpus-wide rebuild never issues one giant
+	// embedding request; each batch is its own tracked background task.
+	const rebuild = (items: readonly EmbedItem[]): void => {
+		for (let offset = 0; offset < items.length; offset += EMBED_REBUILD_BATCH) {
+			scheduleEmbedding(beam, items.slice(offset, offset + EMBED_REBUILD_BATCH));
+		}
+	};
+	// Stop at the first row whose stamped model differs from the active one
+	// (NULL/unstamped counts as a mismatch via `IS NOT`).
+	const mismatch = beam.db.query("SELECT 1 FROM memory_embeddings WHERE model IS NOT ? LIMIT 1").get(active);
+	if (mismatch) {
+		const staleModels = beam.db
+			.query("SELECT DISTINCT model FROM memory_embeddings WHERE model IS NOT ?")
+			.all(active) as { model: string | null }[];
+		const live = beam.db
+			.query(`
+				SELECT id AS memoryId, content FROM working_memory WHERE superseded_by IS NULL
+				UNION ALL
+				SELECT id AS memoryId, content FROM episodic_memory WHERE superseded_by IS NULL
+			`)
+			.all() as EmbedItem[];
+		transaction(beam.db, () => {
+			beam.db.prepare("DELETE FROM memory_embeddings").run();
+			beam.db.prepare("UPDATE episodic_memory SET binary_vector = NULL").run();
+			if (vecAvailable(beam.db)) {
+				try {
+					beam.db.prepare("DELETE FROM vec_episodes").run();
+				} catch {
+					// sqlite-vec cleanup is best-effort; rebuild correctness takes precedence.
+				}
+			}
+		});
+		logger.info("mnemopi: embedding model changed, rebuilding", {
+			from: staleModels.map(row => row.model ?? "(unstamped)"),
+			to: active,
+			count: live.length,
+		});
+		rebuild(live);
+		return;
+	}
+	// No stale embeddings, but a previously-interrupted rebuild (a failed embed or a process
+	// exit after the wipe) can leave live memories with no active-model embedding. Treating an
+	// empty/partial table as "reconciled" would strand them FTS-only, so re-enqueue any live
+	// row still missing an active-model embedding.
+	const missing = beam.db
+		.query(`
+			SELECT id AS memoryId, content FROM working_memory
+			WHERE superseded_by IS NULL AND id NOT IN (SELECT memory_id FROM memory_embeddings WHERE model = ?)
+			UNION ALL
+			SELECT id AS memoryId, content FROM episodic_memory
+			WHERE superseded_by IS NULL AND id NOT IN (SELECT memory_id FROM memory_embeddings WHERE model = ?)
+		`)
+		.all(active, active) as EmbedItem[];
+	if (missing.length === 0) return;
+	logger.info("mnemopi: resuming interrupted embedding rebuild", { to: active, count: missing.length });
+	rebuild(missing);
+}
 export function remember(beam: BeamMemoryState, content: string, options: StoreRememberOptions = {}): string {
 	const source = options.source ?? "conversation";
 	const importance = options.importance ?? 0.5;
@@ -594,7 +686,7 @@ export function scratchpadClear(beam: BeamMemoryState): void {
 export function exportToDict(beam: BeamMemoryState): Record<string, unknown> {
 	const db = beam.db;
 	return {
-		prometheus_memory_export: {
+		mnemopi_export: {
 			version: "1.0",
 			export_date: toUtcIso(),
 			source_db: beam.dbPath ?? ":memory:",

package/src/core/binary-vectors.ts CHANGED Viewed

@@ -99,7 +99,7 @@ function isReadonlyMap(
 }
 export function getVecType(env: NodeJS.ProcessEnv = process.env): VecType {
-	const value = (env.PROMETHEUS_MEMORY_VEC_TYPE ?? "int8").trim().toLowerCase();
+	const value = (env.MNEMOPROMETHEUS_VEC_TYPE ?? "int8").trim().toLowerCase();
 	if (value === "float32" || value === "int8" || value === "bit") {
 		return value;
 	}

package/src/core/content-sanitizer.ts CHANGED Viewed

@@ -19,9 +19,9 @@ export interface BlobMetadata {
 }
 export function blobRoot(env: NodeJS.ProcessEnv = process.env): string {
-	return env.PROMETHEUS_MEMORY_BLOB_DIR && env.PROMETHEUS_MEMORY_BLOB_DIR.length > 0
-		? env.PROMETHEUS_MEMORY_BLOB_DIR
-		: join(homedir(), ".prometheus", "memory", "blobs");
+	return env.MNEMOPROMETHEUS_BLOB_DIR && env.MNEMOPROMETHEUS_BLOB_DIR.length > 0
+		? env.MNEMOPROMETHEUS_BLOB_DIR
+		: join(homedir(), ".hermes", "mnemopi", "blobs");
 }
 export function computeSha256(data: Uint8Array | string): string {

package/src/core/cost-log.ts CHANGED Viewed

@@ -3,7 +3,7 @@ import { mkdirSync } from "node:fs";
 import { homedir } from "node:os";
 import { dirname, join } from "node:path";
-export const DEFAULT_LOG_DIR = join(homedir(), ".prometheus", "memory", "data");
+export const DEFAULT_LOG_DIR = join(homedir(), ".mnemopi", "data");
 export const DEFAULT_LOG_DB = join(DEFAULT_LOG_DIR, "cost_log.db");
 export interface CostStats {

package/src/core/embeddings.ts CHANGED Viewed

@@ -1,8 +1,9 @@
 import { mkdirSync } from "node:fs";
+import { type ApiKey, ProviderHttpError, withAuth } from "@prometheus-ai/ai";
+import { hostMatchesUrl } from "@prometheus-ai/catalog/hosts";
 import {
 	$env,
 	$flag,
-	APP_DISPLAY_NAME,
 	extractHttpStatusFromError,
 	fetchWithRetry,
 	getFastembedCacheDir,
@@ -11,7 +12,13 @@ import {
 import type { EmbeddingModel } from "fastembed";
 import { LRUCache } from "lru-cache/raw";
 import packageJson from "../../package.json" with { type: "json" };
-import { type EmbeddingOutput, getMnemopiRuntimeOptions, resolveEmbeddingProvider } from "./runtime-options";
+import { loadFastembed } from "./fastembed-runtime";
+import {
+	type EmbeddingOutput,
+	getMnemopiRuntimeOptions,
+	mnemopiDebugEnabled,
+	resolveEmbeddingProvider,
+} from "./runtime-options";
 export type { EmbeddingOutput } from "./runtime-options";
 export { cosineSimilarity } from "./vector-math";
@@ -55,17 +62,7 @@ const providerIds = new WeakMap<object, number>();
 let nextProviderId = 1;
 async function defaultLocalModelInitializer(options: LocalModelInitOptions): Promise<LocalEmbeddingModel> {
-	// Preload ORT 1.24 before fastembed's bundled ORT 1.21 — only on Windows,
-	// where loading the older binding first triggers a DLL-reuse crash. The 1.24
-	// line also has no darwin/x64 prebuilt, so importing it unconditionally breaks
-	// the darwin-x64 `bun build --compile` (Bun folds process.platform/arch and
-	// fails to resolve a binding that doesn't ship). The `win32` literal guard is
-	// statically foldable, so Bun dead-code-eliminates this import on every
-	// non-Windows target; fastembed loads its own ORT 1.21 binding there.
-	if (process.platform === "win32") {
-		await import("onnxruntime-node");
-	}
-	const { FlagEmbedding } = await import("fastembed");
+	const { FlagEmbedding } = await loadFastembed();
 	return FlagEmbedding.init(options);
 }
@@ -102,20 +99,30 @@ function inTestRuntime(): boolean {
 	return $env.NODE_ENV === "test" || $env.BUN_ENV === "test";
 }
-function embeddingsDisabled(): boolean {
+export function embeddingsDisabled(): boolean {
 	const active = activeEmbeddingOptions();
 	if (active?.disabled !== undefined) {
 		return active.disabled;
 	}
-	return $flag("PROMETHEUS_MEMORY_NO_EMBEDDINGS");
+	return $flag("MNEMOPROMETHEUS_NO_EMBEDDINGS");
 }
-function embeddingApiKey(): string {
+function embeddingApiKey(): ApiKey {
 	const active = activeEmbeddingOptions();
 	if (active?.apiKey !== undefined) {
 		return active.apiKey;
 	}
-	return $env.PROMETHEUS_MEMORY_EMBEDDING_API_KEY || $env.OPENROUTER_API_KEY || $env.OPENAI_API_KEY || "";
+	return (
+		$env.MNEMOPROMETHEUS_EMBEDDING_APROMETHEUS_KEY ||
+		$env.OPENROUTER_APROMETHEUS_KEY ||
+		$env.OPENAI_APROMETHEUS_KEY ||
+		""
+	);
+}
+/** A resolver always counts as configured; a static key only when non-empty. */
+function embeddingKeyConfigured(key: ApiKey = embeddingApiKey()): boolean {
+	return typeof key === "function" || key !== "";
 }
 function embeddingBaseUrl(): string {
@@ -123,7 +130,7 @@ function embeddingBaseUrl(): string {
 	if (active?.apiUrl !== undefined) {
 		return active.apiUrl;
 	}
-	return $env.PROMETHEUS_MEMORY_EMBEDDING_API_URL || $env.OPENROUTER_BASE_URL || "https://openrouter.ai/api/v1";
+	return $env.MNEMOPROMETHEUS_EMBEDDING_APROMETHEUS_URL || $env.OPENROUTER_BASE_URL || "https://openrouter.ai/api/v1";
 }
 function defaultModel(): string {
@@ -131,14 +138,14 @@ function defaultModel(): string {
 	if (active?.model !== undefined) {
 		return active.model;
 	}
-	return $env.PROMETHEUS_MEMORY_EMBEDDING_MODEL || "BAAI/bge-small-en-v1.5";
+	return $env.MNEMOPROMETHEUS_EMBEDDING_MODEL || "BAAI/bge-small-en-v1.5";
 }
 /**
  * Resolve the embedding model name for the currently active runtime scope.
  *
  * Reads (in order): the active provider's `model` from `withMnemopiRuntimeOptions`,
- * the `PROMETHEUS_MEMORY_EMBEDDING_MODEL` env var, then the bundled fastembed default. Stored
+ * the `MNEMOPROMETHEUS_EMBEDDING_MODEL` env var, then the bundled fastembed default. Stored
  * alongside each row in `memory_embeddings.model` so migrations can re-embed when
  * the active model changes.
  */
@@ -155,11 +162,11 @@ export function isApiModel(modelName: string): boolean {
 		return true;
 	}
 	const active = activeEmbeddingOptions();
-	const baseUrl = active?.apiUrl ?? ($env.PROMETHEUS_MEMORY_EMBEDDING_API_URL || $env.OPENROUTER_BASE_URL);
-	if (baseUrl !== undefined && baseUrl !== "" && !baseUrl.includes("openrouter.ai")) {
+	const baseUrl = active?.apiUrl ?? ($env.MNEMOPROMETHEUS_EMBEDDING_APROMETHEUS_URL || $env.OPENROUTER_BASE_URL);
+	if (baseUrl !== undefined && baseUrl !== "" && !hostMatchesUrl(baseUrl, "openrouter")) {
 		return true;
 	}
-	return $flag("PROMETHEUS_MEMORY_EMBEDDINGS_VIA_API");
+	return $flag("MNEMOPROMETHEUS_EMBEDDINGS_VIA_API");
 }
 const MODEL_DIMS: Record<string, number> = {
@@ -182,7 +189,7 @@ const MODEL_DIMS: Record<string, number> = {
 	"jina-embeddings-v5-omni-small": 1024,
 };
 export function embeddingDimFor(modelName: string): number {
-	const override = Number.parseInt($env.PROMETHEUS_MEMORY_EMBEDDING_DIM ?? "", 10);
+	const override = Number.parseInt($env.MNEMOPROMETHEUS_EMBEDDING_DIM ?? "", 10);
 	if (Number.isFinite(override)) {
 		return override;
 	}
@@ -239,7 +246,11 @@ async function getLocalModel(): Promise<LocalEmbeddingModel | null> {
 	localModelPromise = loading;
 	try {
 		return await loading;
-	} catch {
+	} catch (error) {
+		logger[mnemopiDebugEnabled() ? "warn" : "debug"]("mnemopi: local embedding model failed to load", {
+			model: modelName,
+			error: String(error),
+		});
 		if (localModelPromise === loading) localModelPromise = null;
 		return null;
 	}
@@ -247,31 +258,41 @@ async function getLocalModel(): Promise<LocalEmbeddingModel | null> {
 async function embedApi(texts: readonly string[]): Promise<EmbeddingMatrix | null> {
 	const baseUrl = embeddingBaseUrl();
-	const isCustom = !baseUrl.includes("openrouter.ai");
+	const isCustom = !hostMatchesUrl(baseUrl, "openrouter");
 	const apiKey = embeddingApiKey();
-	if (!isCustom && apiKey === "") {
+	if (!isCustom && !embeddingKeyConfigured(apiKey)) {
 		return null;
 	}
-	const headers: Record<string, string> = {
-		"Content-Type": "application/json",
-		"User-Agent": `${APP_DISPLAY_NAME}/${packageJson.version}`,
-		"HTTP-Referer": "https://prometheus.trivlab.com/",
-		"X-OpenRouter-Title": APP_DISPLAY_NAME,
-		"X-OpenRouter-Categories": "cli-agent",
-	};
-	if (apiKey !== "") {
-		headers.Authorization = `Bearer ${apiKey}`;
-	}
+	const body = JSON.stringify({ model: defaultModel(), input: texts });
 	try {
-		const response = await fetchWithRetry(`${baseUrl.replace(/\/+$/, "")}/embeddings`, {
-			method: "POST",
-			headers,
-			body: JSON.stringify({ model: defaultModel(), input: texts }),
-			signal: AbortSignal.timeout(30000),
-			maxAttempts: 3,
-			defaultDelayMs: attempt => 2 ** attempt * 1000,
+		// withAuth re-resolves the key on 401 (force-refresh, then sibling
+		// rotation) when `apiKey` is a resolver. The 429 backoff stays inside
+		// the attempt via fetchWithRetry. An empty static key attempts without
+		// an Authorization header (local/proxy setups).
+		const response = await withAuth(apiKey, async key => {
+			const headers: Record<string, string> = {
+				"Content-Type": "application/json",
+				"User-Agent": `Oh-My-Pi/${packageJson.version}`,
+				"HTTP-Referer": "https://prometheus.sh/",
+				"X-OpenRouter-Title": "Oh-My-Pi",
+				"X-OpenRouter-Categories": "cli-agent",
+			};
+			if (key !== "") {
+				headers.Authorization = `Bearer ${key}`;
+			}
+			const res = await fetchWithRetry(`${baseUrl.replace(/\/+$/, "")}/embeddings`, {
+				method: "POST",
+				headers,
+				body,
+				signal: AbortSignal.timeout(30000),
+				maxAttempts: 3,
+				defaultDelayMs: attempt => 2 ** attempt * 1000,
+			});
+			if (res.status === 401) {
+				throw new ProviderHttpError("mnemopi embedding request unauthorized (401)", 401, { headers: res.headers });
+			}
+			return res;
 		});
 		if (!response.ok) {
 			return null;
@@ -335,11 +356,11 @@ export async function available(): Promise<boolean> {
 		return providerAvailable(providerOverride);
 	}
 	if (isApiModel(defaultModel())) {
-		const baseUrl = active?.apiUrl ?? ($env.PROMETHEUS_MEMORY_EMBEDDING_API_URL || $env.OPENROUTER_BASE_URL);
-		if (baseUrl !== undefined && baseUrl !== "" && !baseUrl.includes("openrouter.ai")) {
+		const baseUrl = active?.apiUrl ?? ($env.MNEMOPROMETHEUS_EMBEDDING_APROMETHEUS_URL || $env.OPENROUTER_BASE_URL);
+		if (baseUrl !== undefined && baseUrl !== "" && !hostMatchesUrl(baseUrl, "openrouter")) {
 			return true;
 		}
-		return embeddingApiKey() !== "";
+		return embeddingKeyConfigured();
 	}
 	if (inTestRuntime()) {
 		return false;
@@ -348,7 +369,7 @@ export async function available(): Promise<boolean> {
 }
 export function availableApi(): boolean {
-	return embeddingApiKey() !== "";
+	return embeddingKeyConfigured();
 }
 export async function embedQuery(text: string): Promise<Vector | null> {
@@ -410,7 +431,11 @@ export async function embed(texts: readonly string[]): Promise<EmbeddingMatrix |
 			}
 		}
 		return vectors;
-	} catch {
+	} catch (error) {
+		logger[mnemopiDebugEnabled() ? "warn" : "debug"]("mnemopi: local embedding failed", {
+			textCount: texts.length,
+			error: String(error),
+		});
 		return null;
 	}
 }