npm - @prometheus-ai/memory - Versions diffs - 0.5.3 → 0.5.8 - Mend

@prometheus-ai/memory 0.5.3 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/CHANGELOG.md +15 -0
package/README.md +4 -4
package/dist/types/config.d.ts +13 -2
package/dist/types/core/beam/store.d.ts +20 -0
package/dist/types/core/embeddings.d.ts +2 -1
package/dist/types/core/extraction/client.d.ts +11 -7
package/dist/types/core/extraction.d.ts +2 -1
package/dist/types/core/fastembed-runtime.d.ts +4 -0
package/dist/types/core/index.d.ts +1 -0
package/dist/types/core/llm-backends.d.ts +2 -0
package/dist/types/core/local-llm.d.ts +8 -3
package/dist/types/core/memory.d.ts +12 -3
package/dist/types/core/query-cache.d.ts +1 -2
package/dist/types/core/runtime-options.d.ts +10 -5
package/dist/types/core/shmr.d.ts +11 -5
package/dist/types/core/vector-index.d.ts +16 -0
package/dist/types/index.d.ts +2 -1
package/package.json +30 -7
package/src/cli.ts +19 -19
package/src/config.ts +98 -68
package/src/core/banks.ts +2 -2
package/src/core/beam/consolidate.ts +34 -5
package/src/core/beam/helpers.ts +21 -28
package/src/core/beam/index.ts +2 -2
package/src/core/beam/recall.ts +98 -25
package/src/core/beam/store.ts +96 -4
package/src/core/binary-vectors.ts +1 -1
package/src/core/content-sanitizer.ts +3 -3
package/src/core/cost-log.ts +1 -1
package/src/core/embeddings.ts +75 -50
package/src/core/extraction/client.ts +44 -20
package/src/core/extraction.ts +10 -9
package/src/core/fastembed-runtime.ts +89 -0
package/src/core/index.ts +1 -0
package/src/core/llm-backends.ts +3 -0
package/src/core/local-llm.ts +81 -43
package/src/core/memory.ts +25 -5
package/src/core/plugins.ts +1 -1
package/src/core/polyphonic-recall.ts +4 -4
package/src/core/query-cache.ts +2 -3
package/src/core/runtime-options.ts +13 -5
package/src/core/shmr.ts +141 -39
package/src/core/streaming.ts +1 -1
package/src/core/triples.ts +3 -3
package/src/core/vector-index.ts +84 -0
package/src/diagnose.ts +2 -2
package/src/dr/recovery.ts +5 -5
package/src/index.ts +1 -1
package/src/mcp-server.ts +2 -2
package/src/mcp-tools.ts +61 -61

package/src/core/extraction/client.ts CHANGED Viewed

@@ -1,7 +1,9 @@
+import { type ApiKey, type FetchImpl, withAuth } from "@prometheus-ai/ai";
 import { getDiagnostics } from "./diagnostics";
 import { EXTRACTION_SYSTEM_PROMPT, EXTRACTION_USER_TEMPLATE } from "./prompts";
-export const DEFAULT_EXTRACTION_MODEL = process.env.PROMETHEUS_MEMORY_EXTRACTION_MODEL || "google/gemini-2.5-flash";
+export const DEFAULT_EXTRACTION_MODEL = process.env.MNEMOPROMETHEUS_EXTRACTION_MODEL || "google/gemini-2.5-flash";
 export const OPENROUTER_BASE_URL = (process.env.OPENROUTER_BASE_URL || "https://openrouter.ai/api/v1").replace(
 	/\/+$/,
 	"",
@@ -26,6 +28,13 @@ export interface ExtractedFact {
 	[key: string]: unknown;
 }
+export interface ExtractionClientOptions {
+	model?: string | null;
+	apiKey?: ApiKey | null;
+	baseUrl?: string | null;
+	fetch?: FetchImpl;
+}
 function sleep(ms: number): Promise<void> {
 	const { promise, resolve } = Promise.withResolvers<void>();
 	setTimeout(resolve, ms);
@@ -42,14 +51,16 @@ function authHeader(apiKey: string): Record<string, string> {
 export class ExtractionClient {
 	model: string;
-	apiKey: string;
+	apiKey: ApiKey;
 	baseUrl: string;
 	callCount = 0;
+	private readonly fetchImpl: FetchImpl;
-	constructor(opts: { model?: string | null; apiKey?: string | null; baseUrl?: string | null } = {}) {
+	constructor(opts: ExtractionClientOptions = {}) {
 		this.model = opts.model || DEFAULT_EXTRACTION_MODEL;
-		this.apiKey = opts.apiKey ?? process.env.OPENROUTER_API_KEY ?? "";
+		this.apiKey = opts.apiKey ?? process.env.OPENROUTER_APROMETHEUS_KEY ?? "";
 		this.baseUrl = (opts.baseUrl || OPENROUTER_BASE_URL).replace(/\/+$/, "");
+		this.fetchImpl = opts.fetch ?? fetch;
 	}
 	async chat(messages: readonly ChatMessage[], temperature = 0, maxTokens = 4096): Promise<string> {
@@ -59,22 +70,34 @@ export class ExtractionClient {
 		let lastError: unknown = null;
 		for (const model of models) {
-			for (let attempt = 0; attempt < 3; attempt += 1) {
-				try {
-					const result = await this.callApi(model, messages, temperature, maxTokens);
-					if (result === "") {
-						diag.recordNoOutput("cloud");
-					}
-					return result;
-				} catch (exc) {
-					lastError = exc;
-					const msg = String(exc).toLowerCase();
-					if (msg.includes("429") || msg.includes("rate")) {
-						await sleep(Math.min(RATE_LIMIT_BACKOFF_MAX_MS, RATE_LIMIT_BACKOFF_BASE_MS * 2 ** attempt));
-						continue;
+			try {
+				// withAuth re-resolves the key on 401/usage-limit (force-refresh,
+				// then sibling rotation) when `apiKey` is a resolver; the 429
+				// backoff loop stays inside the attempt so rate-limit retries
+				// reuse the already-resolved key.
+				const result = await withAuth(this.apiKey, async key => {
+					let rateLimitError: unknown = null;
+					for (let attempt = 0; attempt < 3; attempt += 1) {
+						try {
+							return await this.callApi(model, messages, temperature, maxTokens, key);
+						} catch (exc) {
+							const msg = String(exc).toLowerCase();
+							if (msg.includes("429") || msg.includes("rate")) {
+								rateLimitError = exc;
+								await sleep(Math.min(RATE_LIMIT_BACKOFF_MAX_MS, RATE_LIMIT_BACKOFF_BASE_MS * 2 ** attempt));
+								continue;
+							}
+							throw exc;
+						}
 					}
-					break;
+					throw rateLimitError;
+				});
+				if (result === "") {
+					diag.recordNoOutput("cloud");
 				}
+				return result;
+			} catch (exc) {
+				lastError = exc;
 			}
 			await sleep(FALLBACK_MODEL_DELAY_MS);
 		}
@@ -88,10 +111,11 @@ export class ExtractionClient {
 		messages: readonly ChatMessage[],
 		temperature: number,
 		maxTokens: number,
+		apiKey = "",
 	): Promise<string> {
-		const response = await fetch(`${this.baseUrl}/chat/completions`, {
+		const response = await this.fetchImpl(`${this.baseUrl}/chat/completions`, {
 			method: "POST",
-			headers: authHeader(this.apiKey),
+			headers: authHeader(apiKey),
 			body: JSON.stringify({ model, messages, temperature, max_tokens: maxTokens }),
 			signal: AbortSignal.timeout(60000),
 		});

package/src/core/extraction.ts CHANGED Viewed

@@ -7,6 +7,7 @@ import {
 	cleanOutput,
 	configuredLlmWillHandleCall,
 	llmAvailable,
+	type RemoteLlmOptions,
 } from "./local-llm";
 import { getMnemopiRuntimeOptions } from "./runtime-options";
@@ -27,23 +28,23 @@ function envInt(name: string, defaultValue: number): number {
 }
 function llmEnabled(): boolean {
-	return envBool("PROMETHEUS_MEMORY_LLM_ENABLED", true);
+	return envBool("MNEMOPROMETHEUS_LLM_ENABLED", true);
 }
 function hostLlmEnabled(): boolean {
-	return envBool("PROMETHEUS_MEMORY_HOST_LLM_ENABLED", false);
+	return envBool("MNEMOPROMETHEUS_HOST_LLM_ENABLED", false);
 }
 function llmBaseUrl(): string {
-	return env("PROMETHEUS_MEMORY_LLM_BASE_URL").replace(/\/+$/, "");
+	return env("MNEMOPROMETHEUS_LLM_BASE_URL").replace(/\/+$/, "");
 }
 function llmMaxTokens(): number {
-	return envInt("PROMETHEUS_MEMORY_LLM_MAX_TOKENS", 2048);
+	return envInt("MNEMOPROMETHEUS_LLM_MAX_TOKENS", 2048);
 }
 export const EXTRACTION_PROMPT_TEMPLATE =
-	env("PROMETHEUS_MEMORY_EXTRACTION_PROMPT") ||
+	env("MNEMOPROMETHEUS_EXTRACTION_PROMPT") ||
 	`You are an expert structured memory extractor for Mnemopi v3.0+ MEMORIA tables.
 The user message below may be in English, German, Russian, or another language.
 First detect the language, then extract ONLY high-signal, long-term relevant items.
@@ -195,8 +196,8 @@ async function tryHostExtraction(prompt: string): Promise<[boolean, string | nul
 		maxTokens: llmMaxTokens(),
 		temperature: 0,
 		timeout: 15,
-		provider: env("PROMETHEUS_MEMORY_HOST_LLM_PROVIDER").trim() || null,
-		model: env("PROMETHEUS_MEMORY_HOST_LLM_MODEL").trim() || null,
+		provider: env("MNEMOPROMETHEUS_HOST_LLM_PROVIDER").trim() || null,
+		model: env("MNEMOPROMETHEUS_HOST_LLM_MODEL").trim() || null,
 	});
 	const text = typeof raw === "string" ? raw.trim() : "";
 	return [true, text === "" ? null : text];
@@ -231,7 +232,7 @@ async function localFallback(prompt: string, sourceText: string, diag = getDiagn
 	return [];
 }
-export async function extractFacts(text: string | null | undefined): Promise<string[]> {
+export async function extractFacts(text: string | null | undefined, options: RemoteLlmOptions = {}): Promise<string[]> {
 	const diag = getDiagnostics();
 	if (typeof text !== "string" || text.trim() === "") {
 		return [];
@@ -303,7 +304,7 @@ export async function extractFacts(text: string | null | undefined): Promise<str
 	if (llmEnabled() && llmBaseUrl() !== "") {
 		diag.recordAttempt("remote");
 		try {
-			const raw = await callRemoteLlm(prompt, 0);
+			const raw = await callRemoteLlm(prompt, 0, options);
 			if (raw !== null) {
 				const facts = parseFacts(cleanOutput(raw));
 				if (facts.length > 0) {

package/src/core/fastembed-runtime.ts ADDED Viewed

@@ -0,0 +1,89 @@
+import { createRequire } from "node:module";
+import * as path from "node:path";
+import {
+	ensureRuntimeInstalled,
+	getFastembedRuntimeDir,
+	installRuntimeModuleResolver,
+	logger,
+	resolveRuntimeModule,
+} from "@prometheus-ai/utils";
+import type * as Fastembed from "fastembed";
+import packageManifest from "../../package.json" with { type: "json" };
+type FastembedModule = typeof Fastembed;
+/**
+ * `fastembed` and `onnxruntime-node` are optional peers (~270MB of native
+ * assets across platforms), never bundled and never installed eagerly. When
+ * the direct import cannot resolve — bundled `dist/cli.js`, compiled binary,
+ * or a consumer that skipped the optional peers — the pinned pair is
+ * `bun install`ed into a per-version runtime cache on first use and loaded
+ * from there (#2389).
+ *
+ * The pins live in `peerDependencies` as exact versions (not `catalog:`) so
+ * this module reads concrete specs even when the workspace manifest is
+ * inlined into a bundle; a workspace test asserts they match the catalog.
+ */
+const FASTEMBED_SPEC = packageManifest.peerDependencies.fastembed;
+const ORT_SPEC = packageManifest.peerDependencies["onnxruntime-node"];
+let fastembedLoad: Promise<FastembedModule> | null = null;
+export function loadFastembed(): Promise<FastembedModule> {
+	fastembedLoad ??= loadFastembedOnce().catch(error => {
+		fastembedLoad = null;
+		throw error;
+	});
+	return fastembedLoad;
+}
+async function loadFastembedOnce(): Promise<FastembedModule> {
+	// Dynamic imports: both packages are optional peers that eagerly load
+	// native addons and may be absent at runtime — a static import would load
+	// the addon at module-init and crash every consumer without the peers.
+	try {
+		// Preload ORT 1.24 before fastembed's nested ORT 1.21 — only on Windows,
+		// where loading the older binding first triggers a DLL-reuse crash.
+		if (process.platform === "win32") {
+			await import("onnxruntime-node");
+		}
+		return await import("fastembed");
+	} catch (error) {
+		if (!isModuleResolutionError(error)) throw error;
+		logger.debug("mnemopi: fastembed not resolvable, using on-demand runtime install", {
+			error: String(error),
+		});
+		return loadFromRuntimeInstall();
+	}
+}
+async function loadFromRuntimeInstall(): Promise<FastembedModule> {
+	const versionKey = `fastembed-${FASTEMBED_SPEC}_ort-${ORT_SPEC}`.replace(/[^A-Za-z0-9._-]/g, "_");
+	const runtimeDir = await ensureRuntimeInstalled({
+		runtimeDir: path.join(getFastembedRuntimeDir(), versionKey),
+		install: { dependencies: { fastembed: FASTEMBED_SPEC, "onnxruntime-node": ORT_SPEC } },
+		probePackage: "fastembed",
+	});
+	const nodeModules = path.join(runtimeDir, "node_modules");
+	// The compiled-binary resolver ignores `main`/`exports` for real-FS bare
+	// specifiers (Bun #1763); route the runtime graph's requires (fastembed →
+	// onnxruntime-node, @anush008/tokenizers → platform binding, …) through
+	// the runtime cache.
+	installRuntimeModuleResolver({ runtimeNodeModules: nodeModules });
+	if (process.platform === "win32") {
+		const ortEntry = resolveRuntimeModule(nodeModules, "onnxruntime-node");
+		if (ortEntry) createRequire(ortEntry)(ortEntry);
+	}
+	const entry = resolveRuntimeModule(nodeModules, "fastembed");
+	if (!entry) throw new Error(`fastembed runtime install at ${runtimeDir} has no loadable entry`);
+	const requireRuntime = createRequire(entry);
+	return requireRuntime(entry) as FastembedModule;
+}
+function isModuleResolutionError(error: unknown): boolean {
+	if (typeof error !== "object" || error === null) return false;
+	const { name, code, message } = error as { name?: unknown; code?: unknown; message?: unknown };
+	if (name === "ResolveMessage") return true;
+	if (code === "ERR_MODULE_NOT_FOUND" || code === "MODULE_NOT_FOUND") return true;
+	return typeof message === "string" && /cannot find (module|package)/i.test(message);
+}

package/src/core/index.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+export { configureRecallFeatures, type RecallFeatureFlags } from "../config";
 export * from "./banks";
 export * from "./beam/index";
 export * from "./memory";

package/src/core/llm-backends.ts CHANGED Viewed

@@ -1,9 +1,12 @@
+import type { FetchImpl } from "@prometheus-ai/ai";
 export interface CompleteOptions {
 	maxTokens?: number;
 	temperature?: number;
 	timeout?: number;
 	provider?: string | null;
 	model?: string | null;
+	fetch?: FetchImpl;
 }
 export interface LlmBackend {

package/src/core/local-llm.ts CHANGED Viewed

@@ -1,5 +1,14 @@
-import { type Api, type AssistantMessage, completeSimple, type Model } from "@prometheus-ai/ai";
-import { callHostLlm, getHostLlmBackend } from "./llm-backends";
+import {
+	type Api,
+	type ApiKey,
+	type AssistantMessage,
+	completeSimple,
+	type FetchImpl,
+	type Model,
+	ProviderHttpError,
+	withAuth,
+} from "@prometheus-ai/ai";
+import { type CompleteOptions, callHostLlm, getHostLlmBackend } from "./llm-backends";
 import {
 	getMnemopiRuntimeOptions,
 	isPiAiModel,
@@ -7,8 +16,12 @@ import {
 	type MnemopiLlmCompletion,
 } from "./runtime-options";
-const ENV_MODEL_REPO = process.env.PROMETHEUS_MEMORY_LLM_REPO ?? "";
-const ENV_MODEL_FILE = process.env.PROMETHEUS_MEMORY_LLM_FILE ?? "";
+const ENV_MODEL_REPO = process.env.MNEMOPROMETHEUS_LLM_REPO ?? "";
+export interface RemoteLlmOptions {
+	fetch?: FetchImpl;
+}
+const ENV_MODEL_FILE = process.env.MNEMOPROMETHEUS_LLM_FILE ?? "";
 export const DEFAULT_MODEL_REPO =
 	ENV_MODEL_REPO !== "" && ENV_MODEL_FILE !== "" ? ENV_MODEL_REPO : "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF";
 export const DEFAULT_MODEL_FILE =
@@ -59,7 +72,7 @@ function llmEnabled(): boolean {
 	if (activeCustomCompletion() !== undefined || activePiAiModel() !== undefined) {
 		return true;
 	}
-	return envBool("PROMETHEUS_MEMORY_LLM_ENABLED", true);
+	return envBool("MNEMOPROMETHEUS_LLM_ENABLED", true);
 }
 function llmMaxTokens(): number {
@@ -67,11 +80,11 @@ function llmMaxTokens(): number {
 	if (active?.maxTokens !== undefined) {
 		return active.maxTokens;
 	}
-	return envInt("PROMETHEUS_MEMORY_LLM_MAX_TOKENS", 2048);
+	return envInt("MNEMOPROMETHEUS_LLM_MAX_TOKENS", 2048);
 }
 function llmContextTokens(): number {
-	return envInt("PROMETHEUS_MEMORY_LLM_N_CTX", 2048);
+	return envInt("MNEMOPROMETHEUS_LLM_N_CTX", 2048);
 }
 function hostLlmEnabled(): boolean {
@@ -82,11 +95,11 @@ function hostLlmEnabled(): boolean {
 	if (active?.baseUrl !== undefined || (typeof active?.model === "string" && active.model !== "")) {
 		return false;
 	}
-	return envBool("PROMETHEUS_MEMORY_HOST_LLM_ENABLED", false);
+	return envBool("MNEMOPROMETHEUS_HOST_LLM_ENABLED", false);
 }
 function hostLlmContextTokens(): number {
-	return envInt("PROMETHEUS_MEMORY_HOST_LLM_N_CTX", 32000);
+	return envInt("MNEMOPROMETHEUS_HOST_LLM_N_CTX", 32000);
 }
 function llmBaseUrl(): string {
@@ -94,7 +107,7 @@ function llmBaseUrl(): string {
 	if (active?.baseUrl !== undefined) {
 		return stripTrailingSlash(active.baseUrl);
 	}
-	return stripTrailingSlash(env("PROMETHEUS_MEMORY_LLM_BASE_URL"));
+	return stripTrailingSlash(env("MNEMOPROMETHEUS_LLM_BASE_URL"));
 }
 function llmModelName(): string {
@@ -102,19 +115,19 @@ function llmModelName(): string {
 	if (typeof model === "string") {
 		return model;
 	}
-	return env("PROMETHEUS_MEMORY_LLM_MODEL") || "local";
+	return env("MNEMOPROMETHEUS_LLM_MODEL") || "local";
 }
-function llmApiKey(): string {
+function llmApiKey(): ApiKey {
 	const active = activeLlmOptions();
 	if (active?.apiKey !== undefined) {
 		return active.apiKey;
 	}
-	return env("PROMETHEUS_MEMORY_LLM_API_KEY");
+	return env("MNEMOPROMETHEUS_LLM_APROMETHEUS_KEY");
 }
 function sleepPrompt(): string {
-	return env("PROMETHEUS_MEMORY_SLEEP_PROMPT").trim();
+	return env("MNEMOPROMETHEUS_SLEEP_PROMPT").trim();
 }
 function memoryLines(memories: readonly string[]): string {
@@ -228,8 +241,8 @@ async function tryHostLlm(prompt: string, maxTokens: number, temperature: number
 		maxTokens,
 		temperature,
 		timeout: 15,
-		provider: env("PROMETHEUS_MEMORY_HOST_LLM_PROVIDER").trim() || null,
-		model: env("PROMETHEUS_MEMORY_HOST_LLM_MODEL").trim() || null,
+		provider: env("MNEMOPROMETHEUS_HOST_LLM_PROVIDER").trim() || null,
+		model: env("MNEMOPROMETHEUS_HOST_LLM_MODEL").trim() || null,
 	});
 	const text = typeof raw === "string" ? raw.trim() : "";
 	return [true, text === "" ? null : text];
@@ -309,30 +322,43 @@ export function llmAvailable(): boolean {
 	return llmEnabled() && llmBaseUrl() !== "";
 }
-export async function callRemoteLlm(prompt: string, temperature = 0.3): Promise<string | null> {
+export async function callRemoteLlm(
+	prompt: string,
+	temperature = 0.3,
+	options: RemoteLlmOptions = {},
+): Promise<string | null> {
 	const baseUrl = llmBaseUrl();
 	if (baseUrl === "") {
 		return null;
 	}
-	const headers: Record<string, string> = { "Content-Type": "application/json" };
-	const apiKey = llmApiKey();
-	if (apiKey !== "") {
-		headers.Authorization = `Bearer ${apiKey}`;
-	}
+	const body = JSON.stringify({
+		model: llmModelName(),
+		messages: [{ role: "user", content: prompt }],
+		max_tokens: llmMaxTokens(),
+		temperature,
+		stop: ["</s>", "<|user|>"],
+	});
+	const fetchImpl = options.fetch ?? fetch;
 	try {
-		const response = await fetch(`${baseUrl}/chat/completions`, {
-			method: "POST",
-			headers,
-			body: JSON.stringify({
-				model: llmModelName(),
-				messages: [{ role: "user", content: prompt }],
-				max_tokens: llmMaxTokens(),
-				temperature,
-				stop: ["</s>", "<|user|>"],
-			}),
-			signal: AbortSignal.timeout(60000),
+		// withAuth re-resolves the key on 401 (force-refresh, then sibling
+		// rotation) when the configured key is a resolver. An empty static key
+		// attempts without an Authorization header (local/proxy setups).
+		const response = await withAuth(llmApiKey(), async key => {
+			const headers: Record<string, string> = { "Content-Type": "application/json" };
+			if (key !== "") {
+				headers.Authorization = `Bearer ${key}`;
+			}
+			const res = await fetchImpl(`${baseUrl}/chat/completions`, {
+				method: "POST",
+				headers,
+				body,
+				signal: AbortSignal.timeout(60000),
+			});
+			if (res.status === 401) {
+				throw new ProviderHttpError("mnemopi remote LLM request unauthorized (401)", 401, { headers: res.headers });
+			}
+			return res;
 		});
 		if (!response.ok) {
 			return null;
@@ -355,7 +381,11 @@ export async function callLocalLlm(_prompt: string): Promise<string | null> {
 	return null;
 }
-async function summarizeChunk(memories: readonly string[], source = ""): Promise<string | null> {
+async function summarizeChunk(
+	memories: readonly string[],
+	source = "",
+	options: RemoteLlmOptions = {},
+): Promise<string | null> {
 	const hostPrompt = buildHostPrompt(memories, source);
 	const prompt = buildPrompt(memories, source);
 	if (configuredLlmWillHandleCall()) {
@@ -379,8 +409,8 @@ async function summarizeChunk(memories: readonly string[], source = ""): Promise
 		return null;
 	}
-	if (llmEnabled() && llmBaseUrl() !== "" && !envBool("PROMETHEUS_MEMORY_FORCE_LOCAL", false)) {
-		const raw = await callRemoteLlm(prompt);
+	if (llmEnabled() && llmBaseUrl() !== "" && !envBool("MNEMOPROMETHEUS_FORCE_LOCAL", false)) {
+		const raw = await callRemoteLlm(prompt, 0.3, options);
 		if (raw !== null) {
 			const cleaned = cleanOutput(raw);
 			return cleaned === "" ? null : cleaned;
@@ -395,7 +425,11 @@ async function summarizeChunk(memories: readonly string[], source = ""): Promise
 	return null;
 }
-export async function summarizeMemories(memories: readonly string[], source = ""): Promise<string | null> {
+export async function summarizeMemories(
+	memories: readonly string[],
+	source = "",
+	options: RemoteLlmOptions = {},
+): Promise<string | null> {
 	if (memories.length === 0) {
 		return null;
 	}
@@ -403,7 +437,7 @@ export async function summarizeMemories(memories: readonly string[], source = ""
 	const chunks = chunkMemoriesByBudget(memories, source);
 	const chunkSummaries: string[] = [];
 	for (const chunk of chunks) {
-		const summary = await summarizeChunk(chunk, source);
+		const summary = await summarizeChunk(chunk, source, options);
 		if (summary !== null) {
 			chunkSummaries.push(summary);
 		}
@@ -413,13 +447,17 @@ export async function summarizeMemories(memories: readonly string[], source = ""
 		return null;
 	}
 	if (chunkSummaries.length > 1) {
-		const final = await summarizeChunk(chunkSummaries, `${source} [chunked ${chunks.length} parts]`);
+		const final = await summarizeChunk(chunkSummaries, `${source} [chunked ${chunks.length} parts]`, options);
 		return final ?? chunkSummaries[0] ?? null;
 	}
 	return chunkSummaries[0] ?? null;
 }
-export async function complete(prompt: string, temperature = 0.3): Promise<string | null> {
+export async function complete(
+	prompt: string,
+	temperature = 0.3,
+	options: CompleteOptions = {},
+): Promise<string | null> {
 	if (configuredLlmWillHandleCall()) {
 		const raw = await callConfiguredCompletion(prompt, temperature, { maxTokens: llmMaxTokens() });
 		return raw === null ? null : cleanOutput(raw) || null;
@@ -428,8 +466,8 @@ export async function complete(prompt: string, temperature = 0.3): Promise<strin
 	if (attempted) {
 		return hostText;
 	}
-	if (llmEnabled() && llmBaseUrl() !== "" && !envBool("PROMETHEUS_MEMORY_FORCE_LOCAL", false)) {
-		const remote = await callRemoteLlm(prompt, temperature);
+	if (llmEnabled() && llmBaseUrl() !== "" && !envBool("MNEMOPROMETHEUS_FORCE_LOCAL", false)) {
+		const remote = await callRemoteLlm(prompt, temperature, options);
 		return remote === null ? null : cleanOutput(remote) || null;
 	}
 	return callLocalLlm(prompt);

package/src/core/memory.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { Database } from "bun:sqlite";
-import type { Api, Model } from "@prometheus-ai/ai";
+import type { Api, ApiKey, Model } from "@prometheus-ai/ai";
 import { dbPath as configuredDbPath } from "../config";
 import { closeQuietly } from "../db";
@@ -7,6 +7,7 @@ import type { MemoryInput, Metadata } from "../types";
 import { AnnotationStore } from "./annotations";
 import { BankManager } from "./banks";
 import { BeamMemory, initBeam } from "./beam/index";
+import { reconcileEmbeddingModel } from "./beam/store";
 import type { RecallEnhancedOptions, RecallOptions, RecallResult, SleepResult } from "./beam/types";
 import { EpisodicGraph } from "./episodic-graph";
 import {
@@ -35,13 +36,22 @@ export interface MnemopiOptions {
 	readonly noEmbeddings?: boolean;
 	readonly embeddingModel?: string;
 	readonly embeddingApiUrl?: string;
-	readonly embeddingApiKey?: string;
+	readonly embeddingApiKey?: ApiKey;
 	readonly embeddings?: false | MnemopiEmbeddingRuntimeOptions;
 	readonly llmEnabled?: boolean;
 	readonly llmBaseUrl?: string;
-	readonly llmApiKey?: string;
+	readonly llmApiKey?: ApiKey;
 	readonly llmModel?: string | Model<Api>;
 	readonly llm?: false | MnemopiLlmRuntimeOptions | Model<Api> | MnemopiLlmCompletion;
+	/** Escalate best-effort failure logs (embedding pipeline) from debug to warn. */
+	readonly debug?: boolean;
+	/**
+	 * When `false`, skip the embedding-model reconcile (wipe-and-rebuild) on open.
+	 * Read-only / ephemeral consumers (e.g. a stats snapshot) set this so an open
+	 * never triggers a destructive migration whose background rebuild the process
+	 * would exit before completing. Defaults to `true`.
+	 */
+	readonly reconcile?: boolean;
 }
 export interface RememberInput extends MemoryInput {
@@ -219,10 +229,11 @@ function resolveRuntimeOptions(options: MnemopiOptions): ResolvedMnemopiRuntimeO
 		}
 	}
-	if (embeddings === undefined && llm === undefined) {
+	const debug = options.debug ? true : undefined;
+	if (embeddings === undefined && llm === undefined && debug === undefined) {
 		return undefined;
 	}
-	return { embeddings, llm };
+	return { embeddings, llm, debug };
 }
 let defaultInstance: Mnemopi | null = null;
@@ -385,6 +396,15 @@ export class Mnemopi {
 		}
 		this.conn = this.beam.db;
 		this.db = this.beam.db;
+		// Wipe-and-rebuild stale embeddings when the configured model changed since
+		// the vectors were written. Runs inside the runtime scope so
+		// `currentEmbeddingModel()` reflects this instance's configured model.
+		// Skipped for read-only opens (`reconcile: false`) so an ephemeral stats
+		// reader never triggers a destructive migration whose async rebuild it would
+		// exit before completing — which would otherwise lose the embeddings.
+		if (options.reconcile !== false) {
+			this.#withRuntimeOptions(() => reconcileEmbeddingModel(this.beam));
+		}
 	}
 	close(): void {

package/src/core/plugins.ts CHANGED Viewed

@@ -2,7 +2,7 @@ import { existsSync } from "node:fs";
 import { homedir } from "node:os";
 import { join } from "node:path";
-export const DEFAULT_PLUGIN_DIR = join(homedir(), ".prometheus", "memory", "plugins");
+export const DEFAULT_PLUGIN_DIR = join(homedir(), ".hermes", "mnemopi", "plugins");
 export type PluginConfig = Record<string, unknown>;
 export type MemoryDict = Record<string, unknown>;

package/src/core/polyphonic-recall.ts CHANGED Viewed

@@ -220,7 +220,7 @@ export class PolyphonicRecallEngine {
 	}
 	vectorVoice(queryEmbedding: readonly number[] | Float32Array | null): VoiceRecallResult[] {
-		if (envDisabled("PROMETHEUS_MEMORY_VOICE_VECTOR") || queryEmbedding === null) return [];
+		if (envDisabled("MNEMOPROMETHEUS_VOICE_VECTOR") || queryEmbedding === null) return [];
 		const queryUnit = normalizeVector(queryEmbedding);
 		if (queryUnit === null) return [];
 		const now = new Date().toISOString();
@@ -277,7 +277,7 @@ export class PolyphonicRecallEngine {
 		return [...byId.values()].sort((a, b) => b.score - a.score || a.memoryId.localeCompare(b.memoryId)).slice(0, 20);
 	}
 	graphVoice(query: string): VoiceRecallResult[] {
-		if (envDisabled("PROMETHEUS_MEMORY_VOICE_GRAPH")) return [];
+		if (envDisabled("MNEMOPROMETHEUS_VOICE_GRAPH")) return [];
 		const results: VoiceRecallResult[] = [];
 		const seedIds = new Set<string>();
 		for (const entity of extractEntities(query)) {
@@ -323,7 +323,7 @@ export class PolyphonicRecallEngine {
 		return results;
 	}
 	factVoice(query: string): VoiceRecallResult[] {
-		if (envDisabled("PROMETHEUS_MEMORY_VOICE_FACT")) return [];
+		if (envDisabled("MNEMOPROMETHEUS_VOICE_FACT")) return [];
 		const byId = new Map<string, VoiceRecallResult>();
 		for (const word of queryWords(query)) {
 			const subject = word[0] === undefined ? word : word[0].toUpperCase() + word.slice(1);
@@ -351,7 +351,7 @@ export class PolyphonicRecallEngine {
 		return [...byId.values()].sort((a, b) => b.score - a.score || a.memoryId.localeCompare(b.memoryId));
 	}
 	temporalVoice(query: string): VoiceRecallResult[] {
-		if (envDisabled("PROMETHEUS_MEMORY_VOICE_TEMPORAL") || !looksTemporal(query)) return [];
+		if (envDisabled("MNEMOPROMETHEUS_VOICE_TEMPORAL") || !looksTemporal(query)) return [];
 		const weekAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString();
 		let rows: TemporalRow[] = [];
 		try {

package/src/core/query-cache.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import { Database } from "bun:sqlite";
 import { mkdirSync } from "node:fs";
 import { dirname } from "node:path";
+import { type Env, enhancedRecallEnabled } from "../config";
 import { cosineSimilarity } from "./vector-math";
 export type QueryCacheResult = Record<string, unknown>;
@@ -39,10 +40,8 @@ interface CacheRow {
 	readonly results_json: string;
 }
-type Env = Readonly<Record<string, string | undefined>>;
 export function isEnhancedRecallEnabled(env: Env = process.env): boolean {
-	return env.PROMETHEUS_MEMORY_ENHANCED_RECALL === "1";
+	return enhancedRecallEnabled(env);
 }
 export function isQueryCacheEnabled(useCache = true, env: Env = process.env): boolean {