@prometheus-ai/memory 0.5.3 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/README.md +4 -4
  3. package/dist/types/config.d.ts +13 -2
  4. package/dist/types/core/beam/store.d.ts +20 -0
  5. package/dist/types/core/embeddings.d.ts +2 -1
  6. package/dist/types/core/extraction/client.d.ts +11 -7
  7. package/dist/types/core/extraction.d.ts +2 -1
  8. package/dist/types/core/fastembed-runtime.d.ts +4 -0
  9. package/dist/types/core/index.d.ts +1 -0
  10. package/dist/types/core/llm-backends.d.ts +2 -0
  11. package/dist/types/core/local-llm.d.ts +8 -3
  12. package/dist/types/core/memory.d.ts +12 -3
  13. package/dist/types/core/query-cache.d.ts +1 -2
  14. package/dist/types/core/runtime-options.d.ts +10 -5
  15. package/dist/types/core/shmr.d.ts +11 -5
  16. package/dist/types/core/vector-index.d.ts +16 -0
  17. package/dist/types/index.d.ts +2 -1
  18. package/package.json +30 -7
  19. package/src/cli.ts +19 -19
  20. package/src/config.ts +98 -68
  21. package/src/core/banks.ts +2 -2
  22. package/src/core/beam/consolidate.ts +34 -5
  23. package/src/core/beam/helpers.ts +21 -28
  24. package/src/core/beam/index.ts +2 -2
  25. package/src/core/beam/recall.ts +98 -25
  26. package/src/core/beam/store.ts +96 -4
  27. package/src/core/binary-vectors.ts +1 -1
  28. package/src/core/content-sanitizer.ts +3 -3
  29. package/src/core/cost-log.ts +1 -1
  30. package/src/core/embeddings.ts +75 -50
  31. package/src/core/extraction/client.ts +44 -20
  32. package/src/core/extraction.ts +10 -9
  33. package/src/core/fastembed-runtime.ts +89 -0
  34. package/src/core/index.ts +1 -0
  35. package/src/core/llm-backends.ts +3 -0
  36. package/src/core/local-llm.ts +81 -43
  37. package/src/core/memory.ts +25 -5
  38. package/src/core/plugins.ts +1 -1
  39. package/src/core/polyphonic-recall.ts +4 -4
  40. package/src/core/query-cache.ts +2 -3
  41. package/src/core/runtime-options.ts +13 -5
  42. package/src/core/shmr.ts +141 -39
  43. package/src/core/streaming.ts +1 -1
  44. package/src/core/triples.ts +3 -3
  45. package/src/core/vector-index.ts +84 -0
  46. package/src/diagnose.ts +2 -2
  47. package/src/dr/recovery.ts +5 -5
  48. package/src/index.ts +1 -1
  49. package/src/mcp-server.ts +2 -2
  50. package/src/mcp-tools.ts +61 -61
@@ -1,7 +1,9 @@
1
+ import { type ApiKey, type FetchImpl, withAuth } from "@prometheus-ai/ai";
2
+
1
3
  import { getDiagnostics } from "./diagnostics";
2
4
  import { EXTRACTION_SYSTEM_PROMPT, EXTRACTION_USER_TEMPLATE } from "./prompts";
3
5
 
4
- export const DEFAULT_EXTRACTION_MODEL = process.env.PROMETHEUS_MEMORY_EXTRACTION_MODEL || "google/gemini-2.5-flash";
6
+ export const DEFAULT_EXTRACTION_MODEL = process.env.MNEMOPROMETHEUS_EXTRACTION_MODEL || "google/gemini-2.5-flash";
5
7
  export const OPENROUTER_BASE_URL = (process.env.OPENROUTER_BASE_URL || "https://openrouter.ai/api/v1").replace(
6
8
  /\/+$/,
7
9
  "",
@@ -26,6 +28,13 @@ export interface ExtractedFact {
26
28
  [key: string]: unknown;
27
29
  }
28
30
 
31
+ export interface ExtractionClientOptions {
32
+ model?: string | null;
33
+ apiKey?: ApiKey | null;
34
+ baseUrl?: string | null;
35
+ fetch?: FetchImpl;
36
+ }
37
+
29
38
  function sleep(ms: number): Promise<void> {
30
39
  const { promise, resolve } = Promise.withResolvers<void>();
31
40
  setTimeout(resolve, ms);
@@ -42,14 +51,16 @@ function authHeader(apiKey: string): Record<string, string> {
42
51
 
43
52
  export class ExtractionClient {
44
53
  model: string;
45
- apiKey: string;
54
+ apiKey: ApiKey;
46
55
  baseUrl: string;
47
56
  callCount = 0;
57
+ private readonly fetchImpl: FetchImpl;
48
58
 
49
- constructor(opts: { model?: string | null; apiKey?: string | null; baseUrl?: string | null } = {}) {
59
+ constructor(opts: ExtractionClientOptions = {}) {
50
60
  this.model = opts.model || DEFAULT_EXTRACTION_MODEL;
51
- this.apiKey = opts.apiKey ?? process.env.OPENROUTER_API_KEY ?? "";
61
+ this.apiKey = opts.apiKey ?? process.env.OPENROUTER_APROMETHEUS_KEY ?? "";
52
62
  this.baseUrl = (opts.baseUrl || OPENROUTER_BASE_URL).replace(/\/+$/, "");
63
+ this.fetchImpl = opts.fetch ?? fetch;
53
64
  }
54
65
 
55
66
  async chat(messages: readonly ChatMessage[], temperature = 0, maxTokens = 4096): Promise<string> {
@@ -59,22 +70,34 @@ export class ExtractionClient {
59
70
  let lastError: unknown = null;
60
71
 
61
72
  for (const model of models) {
62
- for (let attempt = 0; attempt < 3; attempt += 1) {
63
- try {
64
- const result = await this.callApi(model, messages, temperature, maxTokens);
65
- if (result === "") {
66
- diag.recordNoOutput("cloud");
67
- }
68
- return result;
69
- } catch (exc) {
70
- lastError = exc;
71
- const msg = String(exc).toLowerCase();
72
- if (msg.includes("429") || msg.includes("rate")) {
73
- await sleep(Math.min(RATE_LIMIT_BACKOFF_MAX_MS, RATE_LIMIT_BACKOFF_BASE_MS * 2 ** attempt));
74
- continue;
73
+ try {
74
+ // withAuth re-resolves the key on 401/usage-limit (force-refresh,
75
+ // then sibling rotation) when `apiKey` is a resolver; the 429
76
+ // backoff loop stays inside the attempt so rate-limit retries
77
+ // reuse the already-resolved key.
78
+ const result = await withAuth(this.apiKey, async key => {
79
+ let rateLimitError: unknown = null;
80
+ for (let attempt = 0; attempt < 3; attempt += 1) {
81
+ try {
82
+ return await this.callApi(model, messages, temperature, maxTokens, key);
83
+ } catch (exc) {
84
+ const msg = String(exc).toLowerCase();
85
+ if (msg.includes("429") || msg.includes("rate")) {
86
+ rateLimitError = exc;
87
+ await sleep(Math.min(RATE_LIMIT_BACKOFF_MAX_MS, RATE_LIMIT_BACKOFF_BASE_MS * 2 ** attempt));
88
+ continue;
89
+ }
90
+ throw exc;
91
+ }
75
92
  }
76
- break;
93
+ throw rateLimitError;
94
+ });
95
+ if (result === "") {
96
+ diag.recordNoOutput("cloud");
77
97
  }
98
+ return result;
99
+ } catch (exc) {
100
+ lastError = exc;
78
101
  }
79
102
  await sleep(FALLBACK_MODEL_DELAY_MS);
80
103
  }
@@ -88,10 +111,11 @@ export class ExtractionClient {
88
111
  messages: readonly ChatMessage[],
89
112
  temperature: number,
90
113
  maxTokens: number,
114
+ apiKey = "",
91
115
  ): Promise<string> {
92
- const response = await fetch(`${this.baseUrl}/chat/completions`, {
116
+ const response = await this.fetchImpl(`${this.baseUrl}/chat/completions`, {
93
117
  method: "POST",
94
- headers: authHeader(this.apiKey),
118
+ headers: authHeader(apiKey),
95
119
  body: JSON.stringify({ model, messages, temperature, max_tokens: maxTokens }),
96
120
  signal: AbortSignal.timeout(60000),
97
121
  });
@@ -7,6 +7,7 @@ import {
7
7
  cleanOutput,
8
8
  configuredLlmWillHandleCall,
9
9
  llmAvailable,
10
+ type RemoteLlmOptions,
10
11
  } from "./local-llm";
11
12
  import { getMnemopiRuntimeOptions } from "./runtime-options";
12
13
 
@@ -27,23 +28,23 @@ function envInt(name: string, defaultValue: number): number {
27
28
  }
28
29
 
29
30
  function llmEnabled(): boolean {
30
- return envBool("PROMETHEUS_MEMORY_LLM_ENABLED", true);
31
+ return envBool("MNEMOPROMETHEUS_LLM_ENABLED", true);
31
32
  }
32
33
 
33
34
  function hostLlmEnabled(): boolean {
34
- return envBool("PROMETHEUS_MEMORY_HOST_LLM_ENABLED", false);
35
+ return envBool("MNEMOPROMETHEUS_HOST_LLM_ENABLED", false);
35
36
  }
36
37
 
37
38
  function llmBaseUrl(): string {
38
- return env("PROMETHEUS_MEMORY_LLM_BASE_URL").replace(/\/+$/, "");
39
+ return env("MNEMOPROMETHEUS_LLM_BASE_URL").replace(/\/+$/, "");
39
40
  }
40
41
 
41
42
  function llmMaxTokens(): number {
42
- return envInt("PROMETHEUS_MEMORY_LLM_MAX_TOKENS", 2048);
43
+ return envInt("MNEMOPROMETHEUS_LLM_MAX_TOKENS", 2048);
43
44
  }
44
45
 
45
46
  export const EXTRACTION_PROMPT_TEMPLATE =
46
- env("PROMETHEUS_MEMORY_EXTRACTION_PROMPT") ||
47
+ env("MNEMOPROMETHEUS_EXTRACTION_PROMPT") ||
47
48
  `You are an expert structured memory extractor for Mnemopi v3.0+ MEMORIA tables.
48
49
  The user message below may be in English, German, Russian, or another language.
49
50
  First detect the language, then extract ONLY high-signal, long-term relevant items.
@@ -195,8 +196,8 @@ async function tryHostExtraction(prompt: string): Promise<[boolean, string | nul
195
196
  maxTokens: llmMaxTokens(),
196
197
  temperature: 0,
197
198
  timeout: 15,
198
- provider: env("PROMETHEUS_MEMORY_HOST_LLM_PROVIDER").trim() || null,
199
- model: env("PROMETHEUS_MEMORY_HOST_LLM_MODEL").trim() || null,
199
+ provider: env("MNEMOPROMETHEUS_HOST_LLM_PROVIDER").trim() || null,
200
+ model: env("MNEMOPROMETHEUS_HOST_LLM_MODEL").trim() || null,
200
201
  });
201
202
  const text = typeof raw === "string" ? raw.trim() : "";
202
203
  return [true, text === "" ? null : text];
@@ -231,7 +232,7 @@ async function localFallback(prompt: string, sourceText: string, diag = getDiagn
231
232
  return [];
232
233
  }
233
234
 
234
- export async function extractFacts(text: string | null | undefined): Promise<string[]> {
235
+ export async function extractFacts(text: string | null | undefined, options: RemoteLlmOptions = {}): Promise<string[]> {
235
236
  const diag = getDiagnostics();
236
237
  if (typeof text !== "string" || text.trim() === "") {
237
238
  return [];
@@ -303,7 +304,7 @@ export async function extractFacts(text: string | null | undefined): Promise<str
303
304
  if (llmEnabled() && llmBaseUrl() !== "") {
304
305
  diag.recordAttempt("remote");
305
306
  try {
306
- const raw = await callRemoteLlm(prompt, 0);
307
+ const raw = await callRemoteLlm(prompt, 0, options);
307
308
  if (raw !== null) {
308
309
  const facts = parseFacts(cleanOutput(raw));
309
310
  if (facts.length > 0) {
@@ -0,0 +1,89 @@
1
+ import { createRequire } from "node:module";
2
+ import * as path from "node:path";
3
+ import {
4
+ ensureRuntimeInstalled,
5
+ getFastembedRuntimeDir,
6
+ installRuntimeModuleResolver,
7
+ logger,
8
+ resolveRuntimeModule,
9
+ } from "@prometheus-ai/utils";
10
+ import type * as Fastembed from "fastembed";
11
+ import packageManifest from "../../package.json" with { type: "json" };
12
+
13
+ type FastembedModule = typeof Fastembed;
14
+
15
+ /**
16
+ * `fastembed` and `onnxruntime-node` are optional peers (~270MB of native
17
+ * assets across platforms), never bundled and never installed eagerly. When
18
+ * the direct import cannot resolve — bundled `dist/cli.js`, compiled binary,
19
+ * or a consumer that skipped the optional peers — the pinned pair is
20
+ * `bun install`ed into a per-version runtime cache on first use and loaded
21
+ * from there (#2389).
22
+ *
23
+ * The pins live in `peerDependencies` as exact versions (not `catalog:`) so
24
+ * this module reads concrete specs even when the workspace manifest is
25
+ * inlined into a bundle; a workspace test asserts they match the catalog.
26
+ */
27
+ const FASTEMBED_SPEC = packageManifest.peerDependencies.fastembed;
28
+ const ORT_SPEC = packageManifest.peerDependencies["onnxruntime-node"];
29
+
30
+ let fastembedLoad: Promise<FastembedModule> | null = null;
31
+
32
+ export function loadFastembed(): Promise<FastembedModule> {
33
+ fastembedLoad ??= loadFastembedOnce().catch(error => {
34
+ fastembedLoad = null;
35
+ throw error;
36
+ });
37
+ return fastembedLoad;
38
+ }
39
+
40
+ async function loadFastembedOnce(): Promise<FastembedModule> {
41
+ // Dynamic imports: both packages are optional peers that eagerly load
42
+ // native addons and may be absent at runtime — a static import would load
43
+ // the addon at module-init and crash every consumer without the peers.
44
+ try {
45
+ // Preload ORT 1.24 before fastembed's nested ORT 1.21 — only on Windows,
46
+ // where loading the older binding first triggers a DLL-reuse crash.
47
+ if (process.platform === "win32") {
48
+ await import("onnxruntime-node");
49
+ }
50
+ return await import("fastembed");
51
+ } catch (error) {
52
+ if (!isModuleResolutionError(error)) throw error;
53
+ logger.debug("mnemopi: fastembed not resolvable, using on-demand runtime install", {
54
+ error: String(error),
55
+ });
56
+ return loadFromRuntimeInstall();
57
+ }
58
+ }
59
+
60
+ async function loadFromRuntimeInstall(): Promise<FastembedModule> {
61
+ const versionKey = `fastembed-${FASTEMBED_SPEC}_ort-${ORT_SPEC}`.replace(/[^A-Za-z0-9._-]/g, "_");
62
+ const runtimeDir = await ensureRuntimeInstalled({
63
+ runtimeDir: path.join(getFastembedRuntimeDir(), versionKey),
64
+ install: { dependencies: { fastembed: FASTEMBED_SPEC, "onnxruntime-node": ORT_SPEC } },
65
+ probePackage: "fastembed",
66
+ });
67
+ const nodeModules = path.join(runtimeDir, "node_modules");
68
+ // The compiled-binary resolver ignores `main`/`exports` for real-FS bare
69
+ // specifiers (Bun #1763); route the runtime graph's requires (fastembed →
70
+ // onnxruntime-node, @anush008/tokenizers → platform binding, …) through
71
+ // the runtime cache.
72
+ installRuntimeModuleResolver({ runtimeNodeModules: nodeModules });
73
+ if (process.platform === "win32") {
74
+ const ortEntry = resolveRuntimeModule(nodeModules, "onnxruntime-node");
75
+ if (ortEntry) createRequire(ortEntry)(ortEntry);
76
+ }
77
+ const entry = resolveRuntimeModule(nodeModules, "fastembed");
78
+ if (!entry) throw new Error(`fastembed runtime install at ${runtimeDir} has no loadable entry`);
79
+ const requireRuntime = createRequire(entry);
80
+ return requireRuntime(entry) as FastembedModule;
81
+ }
82
+
83
+ function isModuleResolutionError(error: unknown): boolean {
84
+ if (typeof error !== "object" || error === null) return false;
85
+ const { name, code, message } = error as { name?: unknown; code?: unknown; message?: unknown };
86
+ if (name === "ResolveMessage") return true;
87
+ if (code === "ERR_MODULE_NOT_FOUND" || code === "MODULE_NOT_FOUND") return true;
88
+ return typeof message === "string" && /cannot find (module|package)/i.test(message);
89
+ }
package/src/core/index.ts CHANGED
@@ -1,3 +1,4 @@
1
+ export { configureRecallFeatures, type RecallFeatureFlags } from "../config";
1
2
  export * from "./banks";
2
3
  export * from "./beam/index";
3
4
  export * from "./memory";
@@ -1,9 +1,12 @@
1
+ import type { FetchImpl } from "@prometheus-ai/ai";
2
+
1
3
  export interface CompleteOptions {
2
4
  maxTokens?: number;
3
5
  temperature?: number;
4
6
  timeout?: number;
5
7
  provider?: string | null;
6
8
  model?: string | null;
9
+ fetch?: FetchImpl;
7
10
  }
8
11
 
9
12
  export interface LlmBackend {
@@ -1,5 +1,14 @@
1
- import { type Api, type AssistantMessage, completeSimple, type Model } from "@prometheus-ai/ai";
2
- import { callHostLlm, getHostLlmBackend } from "./llm-backends";
1
+ import {
2
+ type Api,
3
+ type ApiKey,
4
+ type AssistantMessage,
5
+ completeSimple,
6
+ type FetchImpl,
7
+ type Model,
8
+ ProviderHttpError,
9
+ withAuth,
10
+ } from "@prometheus-ai/ai";
11
+ import { type CompleteOptions, callHostLlm, getHostLlmBackend } from "./llm-backends";
3
12
  import {
4
13
  getMnemopiRuntimeOptions,
5
14
  isPiAiModel,
@@ -7,8 +16,12 @@ import {
7
16
  type MnemopiLlmCompletion,
8
17
  } from "./runtime-options";
9
18
 
10
- const ENV_MODEL_REPO = process.env.PROMETHEUS_MEMORY_LLM_REPO ?? "";
11
- const ENV_MODEL_FILE = process.env.PROMETHEUS_MEMORY_LLM_FILE ?? "";
19
+ const ENV_MODEL_REPO = process.env.MNEMOPROMETHEUS_LLM_REPO ?? "";
20
+ export interface RemoteLlmOptions {
21
+ fetch?: FetchImpl;
22
+ }
23
+
24
+ const ENV_MODEL_FILE = process.env.MNEMOPROMETHEUS_LLM_FILE ?? "";
12
25
  export const DEFAULT_MODEL_REPO =
13
26
  ENV_MODEL_REPO !== "" && ENV_MODEL_FILE !== "" ? ENV_MODEL_REPO : "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF";
14
27
  export const DEFAULT_MODEL_FILE =
@@ -59,7 +72,7 @@ function llmEnabled(): boolean {
59
72
  if (activeCustomCompletion() !== undefined || activePiAiModel() !== undefined) {
60
73
  return true;
61
74
  }
62
- return envBool("PROMETHEUS_MEMORY_LLM_ENABLED", true);
75
+ return envBool("MNEMOPROMETHEUS_LLM_ENABLED", true);
63
76
  }
64
77
 
65
78
  function llmMaxTokens(): number {
@@ -67,11 +80,11 @@ function llmMaxTokens(): number {
67
80
  if (active?.maxTokens !== undefined) {
68
81
  return active.maxTokens;
69
82
  }
70
- return envInt("PROMETHEUS_MEMORY_LLM_MAX_TOKENS", 2048);
83
+ return envInt("MNEMOPROMETHEUS_LLM_MAX_TOKENS", 2048);
71
84
  }
72
85
 
73
86
  function llmContextTokens(): number {
74
- return envInt("PROMETHEUS_MEMORY_LLM_N_CTX", 2048);
87
+ return envInt("MNEMOPROMETHEUS_LLM_N_CTX", 2048);
75
88
  }
76
89
 
77
90
  function hostLlmEnabled(): boolean {
@@ -82,11 +95,11 @@ function hostLlmEnabled(): boolean {
82
95
  if (active?.baseUrl !== undefined || (typeof active?.model === "string" && active.model !== "")) {
83
96
  return false;
84
97
  }
85
- return envBool("PROMETHEUS_MEMORY_HOST_LLM_ENABLED", false);
98
+ return envBool("MNEMOPROMETHEUS_HOST_LLM_ENABLED", false);
86
99
  }
87
100
 
88
101
  function hostLlmContextTokens(): number {
89
- return envInt("PROMETHEUS_MEMORY_HOST_LLM_N_CTX", 32000);
102
+ return envInt("MNEMOPROMETHEUS_HOST_LLM_N_CTX", 32000);
90
103
  }
91
104
 
92
105
  function llmBaseUrl(): string {
@@ -94,7 +107,7 @@ function llmBaseUrl(): string {
94
107
  if (active?.baseUrl !== undefined) {
95
108
  return stripTrailingSlash(active.baseUrl);
96
109
  }
97
- return stripTrailingSlash(env("PROMETHEUS_MEMORY_LLM_BASE_URL"));
110
+ return stripTrailingSlash(env("MNEMOPROMETHEUS_LLM_BASE_URL"));
98
111
  }
99
112
 
100
113
  function llmModelName(): string {
@@ -102,19 +115,19 @@ function llmModelName(): string {
102
115
  if (typeof model === "string") {
103
116
  return model;
104
117
  }
105
- return env("PROMETHEUS_MEMORY_LLM_MODEL") || "local";
118
+ return env("MNEMOPROMETHEUS_LLM_MODEL") || "local";
106
119
  }
107
120
 
108
- function llmApiKey(): string {
121
+ function llmApiKey(): ApiKey {
109
122
  const active = activeLlmOptions();
110
123
  if (active?.apiKey !== undefined) {
111
124
  return active.apiKey;
112
125
  }
113
- return env("PROMETHEUS_MEMORY_LLM_API_KEY");
126
+ return env("MNEMOPROMETHEUS_LLM_APROMETHEUS_KEY");
114
127
  }
115
128
 
116
129
  function sleepPrompt(): string {
117
- return env("PROMETHEUS_MEMORY_SLEEP_PROMPT").trim();
130
+ return env("MNEMOPROMETHEUS_SLEEP_PROMPT").trim();
118
131
  }
119
132
 
120
133
  function memoryLines(memories: readonly string[]): string {
@@ -228,8 +241,8 @@ async function tryHostLlm(prompt: string, maxTokens: number, temperature: number
228
241
  maxTokens,
229
242
  temperature,
230
243
  timeout: 15,
231
- provider: env("PROMETHEUS_MEMORY_HOST_LLM_PROVIDER").trim() || null,
232
- model: env("PROMETHEUS_MEMORY_HOST_LLM_MODEL").trim() || null,
244
+ provider: env("MNEMOPROMETHEUS_HOST_LLM_PROVIDER").trim() || null,
245
+ model: env("MNEMOPROMETHEUS_HOST_LLM_MODEL").trim() || null,
233
246
  });
234
247
  const text = typeof raw === "string" ? raw.trim() : "";
235
248
  return [true, text === "" ? null : text];
@@ -309,30 +322,43 @@ export function llmAvailable(): boolean {
309
322
  return llmEnabled() && llmBaseUrl() !== "";
310
323
  }
311
324
 
312
- export async function callRemoteLlm(prompt: string, temperature = 0.3): Promise<string | null> {
325
+ export async function callRemoteLlm(
326
+ prompt: string,
327
+ temperature = 0.3,
328
+ options: RemoteLlmOptions = {},
329
+ ): Promise<string | null> {
313
330
  const baseUrl = llmBaseUrl();
314
331
  if (baseUrl === "") {
315
332
  return null;
316
333
  }
317
334
 
318
- const headers: Record<string, string> = { "Content-Type": "application/json" };
319
- const apiKey = llmApiKey();
320
- if (apiKey !== "") {
321
- headers.Authorization = `Bearer ${apiKey}`;
322
- }
323
-
335
+ const body = JSON.stringify({
336
+ model: llmModelName(),
337
+ messages: [{ role: "user", content: prompt }],
338
+ max_tokens: llmMaxTokens(),
339
+ temperature,
340
+ stop: ["</s>", "<|user|>"],
341
+ });
342
+ const fetchImpl = options.fetch ?? fetch;
324
343
  try {
325
- const response = await fetch(`${baseUrl}/chat/completions`, {
326
- method: "POST",
327
- headers,
328
- body: JSON.stringify({
329
- model: llmModelName(),
330
- messages: [{ role: "user", content: prompt }],
331
- max_tokens: llmMaxTokens(),
332
- temperature,
333
- stop: ["</s>", "<|user|>"],
334
- }),
335
- signal: AbortSignal.timeout(60000),
344
+ // withAuth re-resolves the key on 401 (force-refresh, then sibling
345
+ // rotation) when the configured key is a resolver. An empty static key
346
+ // attempts without an Authorization header (local/proxy setups).
347
+ const response = await withAuth(llmApiKey(), async key => {
348
+ const headers: Record<string, string> = { "Content-Type": "application/json" };
349
+ if (key !== "") {
350
+ headers.Authorization = `Bearer ${key}`;
351
+ }
352
+ const res = await fetchImpl(`${baseUrl}/chat/completions`, {
353
+ method: "POST",
354
+ headers,
355
+ body,
356
+ signal: AbortSignal.timeout(60000),
357
+ });
358
+ if (res.status === 401) {
359
+ throw new ProviderHttpError("mnemopi remote LLM request unauthorized (401)", 401, { headers: res.headers });
360
+ }
361
+ return res;
336
362
  });
337
363
  if (!response.ok) {
338
364
  return null;
@@ -355,7 +381,11 @@ export async function callLocalLlm(_prompt: string): Promise<string | null> {
355
381
  return null;
356
382
  }
357
383
 
358
- async function summarizeChunk(memories: readonly string[], source = ""): Promise<string | null> {
384
+ async function summarizeChunk(
385
+ memories: readonly string[],
386
+ source = "",
387
+ options: RemoteLlmOptions = {},
388
+ ): Promise<string | null> {
359
389
  const hostPrompt = buildHostPrompt(memories, source);
360
390
  const prompt = buildPrompt(memories, source);
361
391
  if (configuredLlmWillHandleCall()) {
@@ -379,8 +409,8 @@ async function summarizeChunk(memories: readonly string[], source = ""): Promise
379
409
  return null;
380
410
  }
381
411
 
382
- if (llmEnabled() && llmBaseUrl() !== "" && !envBool("PROMETHEUS_MEMORY_FORCE_LOCAL", false)) {
383
- const raw = await callRemoteLlm(prompt);
412
+ if (llmEnabled() && llmBaseUrl() !== "" && !envBool("MNEMOPROMETHEUS_FORCE_LOCAL", false)) {
413
+ const raw = await callRemoteLlm(prompt, 0.3, options);
384
414
  if (raw !== null) {
385
415
  const cleaned = cleanOutput(raw);
386
416
  return cleaned === "" ? null : cleaned;
@@ -395,7 +425,11 @@ async function summarizeChunk(memories: readonly string[], source = ""): Promise
395
425
  return null;
396
426
  }
397
427
 
398
- export async function summarizeMemories(memories: readonly string[], source = ""): Promise<string | null> {
428
+ export async function summarizeMemories(
429
+ memories: readonly string[],
430
+ source = "",
431
+ options: RemoteLlmOptions = {},
432
+ ): Promise<string | null> {
399
433
  if (memories.length === 0) {
400
434
  return null;
401
435
  }
@@ -403,7 +437,7 @@ export async function summarizeMemories(memories: readonly string[], source = ""
403
437
  const chunks = chunkMemoriesByBudget(memories, source);
404
438
  const chunkSummaries: string[] = [];
405
439
  for (const chunk of chunks) {
406
- const summary = await summarizeChunk(chunk, source);
440
+ const summary = await summarizeChunk(chunk, source, options);
407
441
  if (summary !== null) {
408
442
  chunkSummaries.push(summary);
409
443
  }
@@ -413,13 +447,17 @@ export async function summarizeMemories(memories: readonly string[], source = ""
413
447
  return null;
414
448
  }
415
449
  if (chunkSummaries.length > 1) {
416
- const final = await summarizeChunk(chunkSummaries, `${source} [chunked ${chunks.length} parts]`);
450
+ const final = await summarizeChunk(chunkSummaries, `${source} [chunked ${chunks.length} parts]`, options);
417
451
  return final ?? chunkSummaries[0] ?? null;
418
452
  }
419
453
  return chunkSummaries[0] ?? null;
420
454
  }
421
455
 
422
- export async function complete(prompt: string, temperature = 0.3): Promise<string | null> {
456
+ export async function complete(
457
+ prompt: string,
458
+ temperature = 0.3,
459
+ options: CompleteOptions = {},
460
+ ): Promise<string | null> {
423
461
  if (configuredLlmWillHandleCall()) {
424
462
  const raw = await callConfiguredCompletion(prompt, temperature, { maxTokens: llmMaxTokens() });
425
463
  return raw === null ? null : cleanOutput(raw) || null;
@@ -428,8 +466,8 @@ export async function complete(prompt: string, temperature = 0.3): Promise<strin
428
466
  if (attempted) {
429
467
  return hostText;
430
468
  }
431
- if (llmEnabled() && llmBaseUrl() !== "" && !envBool("PROMETHEUS_MEMORY_FORCE_LOCAL", false)) {
432
- const remote = await callRemoteLlm(prompt, temperature);
469
+ if (llmEnabled() && llmBaseUrl() !== "" && !envBool("MNEMOPROMETHEUS_FORCE_LOCAL", false)) {
470
+ const remote = await callRemoteLlm(prompt, temperature, options);
433
471
  return remote === null ? null : cleanOutput(remote) || null;
434
472
  }
435
473
  return callLocalLlm(prompt);
@@ -1,5 +1,5 @@
1
1
  import type { Database } from "bun:sqlite";
2
- import type { Api, Model } from "@prometheus-ai/ai";
2
+ import type { Api, ApiKey, Model } from "@prometheus-ai/ai";
3
3
 
4
4
  import { dbPath as configuredDbPath } from "../config";
5
5
  import { closeQuietly } from "../db";
@@ -7,6 +7,7 @@ import type { MemoryInput, Metadata } from "../types";
7
7
  import { AnnotationStore } from "./annotations";
8
8
  import { BankManager } from "./banks";
9
9
  import { BeamMemory, initBeam } from "./beam/index";
10
+ import { reconcileEmbeddingModel } from "./beam/store";
10
11
  import type { RecallEnhancedOptions, RecallOptions, RecallResult, SleepResult } from "./beam/types";
11
12
  import { EpisodicGraph } from "./episodic-graph";
12
13
  import {
@@ -35,13 +36,22 @@ export interface MnemopiOptions {
35
36
  readonly noEmbeddings?: boolean;
36
37
  readonly embeddingModel?: string;
37
38
  readonly embeddingApiUrl?: string;
38
- readonly embeddingApiKey?: string;
39
+ readonly embeddingApiKey?: ApiKey;
39
40
  readonly embeddings?: false | MnemopiEmbeddingRuntimeOptions;
40
41
  readonly llmEnabled?: boolean;
41
42
  readonly llmBaseUrl?: string;
42
- readonly llmApiKey?: string;
43
+ readonly llmApiKey?: ApiKey;
43
44
  readonly llmModel?: string | Model<Api>;
44
45
  readonly llm?: false | MnemopiLlmRuntimeOptions | Model<Api> | MnemopiLlmCompletion;
46
+ /** Escalate best-effort failure logs (embedding pipeline) from debug to warn. */
47
+ readonly debug?: boolean;
48
+ /**
49
+ * When `false`, skip the embedding-model reconcile (wipe-and-rebuild) on open.
50
+ * Read-only / ephemeral consumers (e.g. a stats snapshot) set this so an open
51
+ * never triggers a destructive migration whose background rebuild the process
52
+ * would exit before completing. Defaults to `true`.
53
+ */
54
+ readonly reconcile?: boolean;
45
55
  }
46
56
 
47
57
  export interface RememberInput extends MemoryInput {
@@ -219,10 +229,11 @@ function resolveRuntimeOptions(options: MnemopiOptions): ResolvedMnemopiRuntimeO
219
229
  }
220
230
  }
221
231
 
222
- if (embeddings === undefined && llm === undefined) {
232
+ const debug = options.debug ? true : undefined;
233
+ if (embeddings === undefined && llm === undefined && debug === undefined) {
223
234
  return undefined;
224
235
  }
225
- return { embeddings, llm };
236
+ return { embeddings, llm, debug };
226
237
  }
227
238
 
228
239
  let defaultInstance: Mnemopi | null = null;
@@ -385,6 +396,15 @@ export class Mnemopi {
385
396
  }
386
397
  this.conn = this.beam.db;
387
398
  this.db = this.beam.db;
399
+ // Wipe-and-rebuild stale embeddings when the configured model changed since
400
+ // the vectors were written. Runs inside the runtime scope so
401
+ // `currentEmbeddingModel()` reflects this instance's configured model.
402
+ // Skipped for read-only opens (`reconcile: false`) so an ephemeral stats
403
+ // reader never triggers a destructive migration whose async rebuild it would
404
+ // exit before completing — which would otherwise lose the embeddings.
405
+ if (options.reconcile !== false) {
406
+ this.#withRuntimeOptions(() => reconcileEmbeddingModel(this.beam));
407
+ }
388
408
  }
389
409
 
390
410
  close(): void {
@@ -2,7 +2,7 @@ import { existsSync } from "node:fs";
2
2
  import { homedir } from "node:os";
3
3
  import { join } from "node:path";
4
4
 
5
- export const DEFAULT_PLUGIN_DIR = join(homedir(), ".prometheus", "memory", "plugins");
5
+ export const DEFAULT_PLUGIN_DIR = join(homedir(), ".hermes", "mnemopi", "plugins");
6
6
 
7
7
  export type PluginConfig = Record<string, unknown>;
8
8
  export type MemoryDict = Record<string, unknown>;
@@ -220,7 +220,7 @@ export class PolyphonicRecallEngine {
220
220
  }
221
221
 
222
222
  vectorVoice(queryEmbedding: readonly number[] | Float32Array | null): VoiceRecallResult[] {
223
- if (envDisabled("PROMETHEUS_MEMORY_VOICE_VECTOR") || queryEmbedding === null) return [];
223
+ if (envDisabled("MNEMOPROMETHEUS_VOICE_VECTOR") || queryEmbedding === null) return [];
224
224
  const queryUnit = normalizeVector(queryEmbedding);
225
225
  if (queryUnit === null) return [];
226
226
  const now = new Date().toISOString();
@@ -277,7 +277,7 @@ export class PolyphonicRecallEngine {
277
277
  return [...byId.values()].sort((a, b) => b.score - a.score || a.memoryId.localeCompare(b.memoryId)).slice(0, 20);
278
278
  }
279
279
  graphVoice(query: string): VoiceRecallResult[] {
280
- if (envDisabled("PROMETHEUS_MEMORY_VOICE_GRAPH")) return [];
280
+ if (envDisabled("MNEMOPROMETHEUS_VOICE_GRAPH")) return [];
281
281
  const results: VoiceRecallResult[] = [];
282
282
  const seedIds = new Set<string>();
283
283
  for (const entity of extractEntities(query)) {
@@ -323,7 +323,7 @@ export class PolyphonicRecallEngine {
323
323
  return results;
324
324
  }
325
325
  factVoice(query: string): VoiceRecallResult[] {
326
- if (envDisabled("PROMETHEUS_MEMORY_VOICE_FACT")) return [];
326
+ if (envDisabled("MNEMOPROMETHEUS_VOICE_FACT")) return [];
327
327
  const byId = new Map<string, VoiceRecallResult>();
328
328
  for (const word of queryWords(query)) {
329
329
  const subject = word[0] === undefined ? word : word[0].toUpperCase() + word.slice(1);
@@ -351,7 +351,7 @@ export class PolyphonicRecallEngine {
351
351
  return [...byId.values()].sort((a, b) => b.score - a.score || a.memoryId.localeCompare(b.memoryId));
352
352
  }
353
353
  temporalVoice(query: string): VoiceRecallResult[] {
354
- if (envDisabled("PROMETHEUS_MEMORY_VOICE_TEMPORAL") || !looksTemporal(query)) return [];
354
+ if (envDisabled("MNEMOPROMETHEUS_VOICE_TEMPORAL") || !looksTemporal(query)) return [];
355
355
  const weekAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString();
356
356
  let rows: TemporalRow[] = [];
357
357
  try {
@@ -1,6 +1,7 @@
1
1
  import { Database } from "bun:sqlite";
2
2
  import { mkdirSync } from "node:fs";
3
3
  import { dirname } from "node:path";
4
+ import { type Env, enhancedRecallEnabled } from "../config";
4
5
  import { cosineSimilarity } from "./vector-math";
5
6
 
6
7
  export type QueryCacheResult = Record<string, unknown>;
@@ -39,10 +40,8 @@ interface CacheRow {
39
40
  readonly results_json: string;
40
41
  }
41
42
 
42
- type Env = Readonly<Record<string, string | undefined>>;
43
-
44
43
  export function isEnhancedRecallEnabled(env: Env = process.env): boolean {
45
- return env.PROMETHEUS_MEMORY_ENHANCED_RECALL === "1";
44
+ return enhancedRecallEnabled(env);
46
45
  }
47
46
 
48
47
  export function isQueryCacheEnabled(useCache = true, env: Env = process.env): boolean {