@prometheus-ai/memory 0.5.4 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/README.md +4 -4
  3. package/dist/types/config.d.ts +13 -2
  4. package/dist/types/core/beam/store.d.ts +20 -0
  5. package/dist/types/core/embeddings.d.ts +2 -1
  6. package/dist/types/core/extraction/client.d.ts +11 -7
  7. package/dist/types/core/extraction.d.ts +2 -1
  8. package/dist/types/core/fastembed-runtime.d.ts +4 -0
  9. package/dist/types/core/index.d.ts +1 -0
  10. package/dist/types/core/llm-backends.d.ts +2 -0
  11. package/dist/types/core/local-llm.d.ts +8 -3
  12. package/dist/types/core/memory.d.ts +12 -3
  13. package/dist/types/core/query-cache.d.ts +1 -2
  14. package/dist/types/core/runtime-options.d.ts +10 -5
  15. package/dist/types/core/shmr.d.ts +11 -5
  16. package/dist/types/core/vector-index.d.ts +16 -0
  17. package/dist/types/index.d.ts +2 -1
  18. package/package.json +30 -7
  19. package/src/cli.ts +19 -19
  20. package/src/config.ts +98 -68
  21. package/src/core/banks.ts +2 -2
  22. package/src/core/beam/consolidate.ts +34 -5
  23. package/src/core/beam/helpers.ts +21 -28
  24. package/src/core/beam/index.ts +2 -2
  25. package/src/core/beam/recall.ts +98 -25
  26. package/src/core/beam/store.ts +96 -4
  27. package/src/core/binary-vectors.ts +1 -1
  28. package/src/core/content-sanitizer.ts +3 -3
  29. package/src/core/cost-log.ts +1 -1
  30. package/src/core/embeddings.ts +75 -50
  31. package/src/core/extraction/client.ts +44 -20
  32. package/src/core/extraction.ts +10 -9
  33. package/src/core/fastembed-runtime.ts +89 -0
  34. package/src/core/index.ts +1 -0
  35. package/src/core/llm-backends.ts +3 -0
  36. package/src/core/local-llm.ts +81 -43
  37. package/src/core/memory.ts +25 -5
  38. package/src/core/plugins.ts +1 -1
  39. package/src/core/polyphonic-recall.ts +4 -4
  40. package/src/core/query-cache.ts +2 -3
  41. package/src/core/runtime-options.ts +13 -5
  42. package/src/core/shmr.ts +141 -39
  43. package/src/core/streaming.ts +1 -1
  44. package/src/core/triples.ts +3 -3
  45. package/src/core/vector-index.ts +84 -0
  46. package/src/diagnose.ts +2 -2
  47. package/src/dr/recovery.ts +5 -5
  48. package/src/index.ts +1 -1
  49. package/src/mcp-server.ts +2 -2
  50. package/src/mcp-tools.ts +61 -61
@@ -2,7 +2,7 @@ import { normalizedRecallWeights, temporalHalflifeHours } from "../../config";
2
2
  import { embedQuery } from "../embeddings";
3
3
  import { mmrRerank } from "../mmr";
4
4
  import { adjustWeights, classifyIntent } from "../query-intent";
5
- import { getSynonyms, normalizeQuery } from "../synonyms";
5
+ import { getSynonyms, normalizeQuery, STOP_WORDS as QUERY_STOP_WORDS } from "../synonyms";
6
6
  import { extractTemporal } from "../temporal-parser";
7
7
  import { cosineSimilarity } from "../vector-math";
8
8
  import type { BeamMemoryState, RecallEnhancedOptions, RecallOptions, RecallResult } from "./types";
@@ -101,6 +101,31 @@ const STOP_WORDS = new Set([
101
101
  "with",
102
102
  ]);
103
103
 
104
+ const FACT_QUERY_FILLER_WORDS = new Set([
105
+ ...QUERY_STOP_WORDS,
106
+ "active",
107
+ "current",
108
+ "currently",
109
+ "d",
110
+ "know",
111
+ "latest",
112
+ "ll",
113
+ "m",
114
+ "please",
115
+ "present",
116
+ "re",
117
+ "recent",
118
+ "remind",
119
+ "remember",
120
+ "s",
121
+ "t",
122
+ "tell",
123
+ "today",
124
+ "ve",
125
+ ]);
126
+
127
+ const FACT_CLITIC_FRAGMENTS = new Set(["d", "ll", "m", "re", "s", "t", "ve"]);
128
+
104
129
  function nowIso(): string {
105
130
  return new Date().toISOString();
106
131
  }
@@ -176,6 +201,35 @@ function expandedTokenGroups(query: string, useSynonyms = true): string[][] {
176
201
  return groups;
177
202
  }
178
203
 
204
+ function factExpandedTokenGroups(query: string, content: string): string[][] {
205
+ const contentLower = content.toLowerCase();
206
+ const contentTokens = new Set(tokenize(contentLower));
207
+ const groups: string[][] = [];
208
+ for (const token of tokenize(query)) {
209
+ if (FACT_QUERY_FILLER_WORDS.has(token) && (FACT_CLITIC_FRAGMENTS.has(token) || !contentTokens.has(token))) {
210
+ continue;
211
+ }
212
+ const seen = new Set<string>();
213
+ for (const variant of recallSynonyms(token, true)) {
214
+ for (const part of tokenize(variant)) {
215
+ if (!FACT_QUERY_FILLER_WORDS.has(part) || (!FACT_CLITIC_FRAGMENTS.has(part) && contentTokens.has(part))) {
216
+ seen.add(part);
217
+ }
218
+ }
219
+ }
220
+ if (seen.size > 0) groups.push([...seen]);
221
+ }
222
+ return groups;
223
+ }
224
+
225
+ function tokensFromGroups(groups: readonly (readonly string[])[]): string[] {
226
+ const seen = new Set<string>();
227
+ for (const group of groups) {
228
+ for (const token of group) seen.add(token);
229
+ }
230
+ return [...seen];
231
+ }
232
+
179
233
  function contentMatchesToken(contentLower: string, contentTokens: ReadonlySet<string>, token: string): boolean {
180
234
  if (contentTokens.has(token) || contentLower.includes(token)) return true;
181
235
  for (const contentToken of contentTokens) {
@@ -1062,13 +1116,11 @@ export function factRecall(beam: BeamMemoryState, query: string, topK = 30): Fac
1062
1116
  }
1063
1117
  }
1064
1118
  if (matched.length === 0) return [];
1065
- const rowids = matched
1066
- .slice(0, topK)
1067
- .map(row => asNumber(row.rowid))
1068
- .filter(rowid => rowid > 0);
1119
+ const rowids = matched.map(row => asNumber(row.rowid)).filter(rowid => rowid > 0);
1069
1120
  if (rowids.length === 0) return [];
1070
1121
  const visibility = factVisibilityWhere(beam, "");
1071
1122
  const ranks = normalizeRanks(matched, "rowid");
1123
+ const normalized = normalizeQuery(query).toLowerCase();
1072
1124
  const rows = queryAll(
1073
1125
  beam,
1074
1126
  `SELECT rowid, fact_id, subject, predicate, object, timestamp, confidence
@@ -1076,25 +1128,46 @@ export function factRecall(beam: BeamMemoryState, query: string, topK = 30): Fac
1076
1128
  WHERE rowid IN (${placeholders(rowids.length)}) AND ${visibility.where}
1077
1129
  ORDER BY confidence DESC
1078
1130
  LIMIT ?`,
1079
- [...rowids, ...visibility.params, topK],
1131
+ [...rowids, ...visibility.params, rowids.length],
1080
1132
  );
1081
- return rows.map(row => {
1082
- const subject = asString(row.subject);
1083
- const predicate = asString(row.predicate);
1084
- const object = asString(row.object);
1085
- const confidence = asNumber(row.confidence, 0.5);
1086
- const result: FactRecallResult = {
1087
- id: asString(row.fact_id),
1088
- content: object.length > 0 ? object : `${subject} ${predicate}`.trim(),
1089
- score: round4(confidence * 0.8 + (ranks.get(asNumber(row.rowid)) ?? 0) * 0.2),
1090
- fact_id: asString(row.fact_id),
1091
- subject,
1092
- predicate,
1093
- timestamp: asNullableString(row.timestamp),
1094
- tier_label: "fact",
1095
- tier: "fact",
1096
- source: "facts",
1097
- };
1098
- return result;
1099
- });
1133
+ return rows
1134
+ .map(row => {
1135
+ const subject = asString(row.subject);
1136
+ const predicate = asString(row.predicate);
1137
+ const object = asString(row.object);
1138
+ const confidence = asNumber(row.confidence, 0.5);
1139
+ const content = object.length > 0 ? object : `${subject} ${predicate}`.trim();
1140
+ const searchable = `${subject} ${predicate} ${object}`.trim();
1141
+ const queryGroups = factExpandedTokenGroups(query, searchable);
1142
+ const queryTokens = tokensFromGroups(queryGroups);
1143
+ const lexical =
1144
+ queryGroups.length > 0
1145
+ ? lexicalGroupRelevance(queryGroups, searchable, normalized)
1146
+ : lexicalRelevance(queryTokens, searchable, normalized);
1147
+ const rank = ranks.get(asNumber(row.rowid)) ?? 0;
1148
+ const result: FactRecallResult = {
1149
+ id: asString(row.fact_id),
1150
+ content,
1151
+ score: round4(lexical * (0.7 + confidence * 0.2 + rank * 0.1)),
1152
+ fact_id: asString(row.fact_id),
1153
+ subject,
1154
+ predicate,
1155
+ timestamp: asNullableString(row.timestamp),
1156
+ tier_label: "fact",
1157
+ tier: "fact",
1158
+ source: "facts",
1159
+ keyword_score: round4(lexical),
1160
+ fts_score: round4(rank),
1161
+ importance_score: round4(confidence),
1162
+ explanation: `fact keyword=${round4(lexical)}`,
1163
+ voice_scores: {
1164
+ keyword: round4(lexical),
1165
+ fts: round4(rank),
1166
+ importance: round4(confidence),
1167
+ },
1168
+ };
1169
+ return result;
1170
+ })
1171
+ .sort((left, right) => (right.score ?? 0) - (left.score ?? 0))
1172
+ .slice(0, topK);
1100
1173
  }
@@ -1,12 +1,14 @@
1
1
  import type { Database, SQLQueryBindings } from "bun:sqlite";
2
+ import { logger } from "@prometheus-ai/utils";
2
3
  import { transaction } from "../../db";
3
4
  import { toUtcIso } from "../../util/datetime";
4
5
  import { generateId } from "../../util/ids";
6
+ import { currentEmbeddingModel, embeddingsDisabled } from "../embeddings";
5
7
  import { EpisodicGraph } from "../episodic-graph";
6
8
  import { extractFactsSafe } from "../extraction";
7
9
  import { getMnemopiRuntimeOptions, withMnemopiRuntimeOptions } from "../runtime-options";
8
10
  import { storeFactStrings } from "./consolidate";
9
- import { scheduleEmbedding, vecAvailable, vecInsert } from "./helpers";
11
+ import { type EmbedItem, scheduleEmbedding, vecAvailable, vecInsert } from "./helpers";
10
12
  import type {
11
13
  BeamEvent,
12
14
  BeamMemoryState,
@@ -58,7 +60,7 @@ const TRUST_TIERS: Record<string, true> = {
58
60
  EXTERNAL_WRITE: true,
59
61
  IMPORTED: true,
60
62
  };
61
- const SCRATCHPAD_MAX_ITEMS = Number.parseInt(process.env.PROMETHEUS_MEMORY_SP_MAX ?? "1000", 10);
63
+ const SCRATCHPAD_MAX_ITEMS = Number.parseInt(process.env.MNEMOPROMETHEUS_SP_MAX ?? "1000", 10);
62
64
 
63
65
  function metadataJson(metadata: Metadata | null | undefined): string | null {
64
66
  return metadata == null ? null : JSON.stringify(metadata);
@@ -191,7 +193,7 @@ function proactiveLinkIfEnabled(
191
193
  content: string,
192
194
  extractEntities: boolean,
193
195
  ): void {
194
- if (process.env.PROMETHEUS_MEMORY_PROACTIVE_LINKING !== "1") return;
196
+ if (process.env.MNEMOPROMETHEUS_PROACTIVE_LINKING !== "1") return;
195
197
  try {
196
198
  const graph =
197
199
  beam.episodicGraph instanceof EpisodicGraph
@@ -248,6 +250,96 @@ function rowToDict(row: Row): Row {
248
250
  return { ...row };
249
251
  }
250
252
 
253
+ /** Re-embedding batch size for a model-change rebuild — bounds each background
254
+ * embedding request instead of embedding the whole corpus in one call. */
255
+ const EMBED_REBUILD_BATCH = 128;
256
+
257
+ /**
258
+ * Reconcile stored embeddings against the active embedding model at store open.
259
+ *
260
+ * Every `memory_embeddings` row is stamped with the model that produced it (see
261
+ * `runEmbedding` in `helpers.ts`). When the configured embedding model changes,
262
+ * its vector dimension changes too, so the previously-stored vectors are no
263
+ * longer comparable. On a mismatch we wipe every stored vector — the
264
+ * `memory_embeddings` table, the `episodic_memory.binary_vector` column, and the
265
+ * sqlite-vec `vec_episodes` index — then enqueue all live memories for
266
+ * background re-embedding under the new model via `scheduleEmbedding`.
267
+ *
268
+ * Runs once per store open; a fresh store (no embeddings) or an already-current
269
+ * store is a no-op. The destructive wipe is skipped whenever it could not be
270
+ * rebuilt — embeddings disabled via the runtime option OR the
271
+ * `MNEMOPROMETHEUS_NO_EMBEDDINGS` env, or an unresolved (empty) active model — so a
272
+ * stale-but-valid corpus is never destroyed without a replacement. MUST run
273
+ * inside the active runtime-options scope so `currentEmbeddingModel()` /
274
+ * `embeddingsDisabled()` reflect the per-instance configuration.
275
+ */
276
+ export function reconcileEmbeddingModel(beam: BeamMemoryState): void {
277
+ if (embeddingsDisabled()) return;
278
+ const active = currentEmbeddingModel().trim();
279
+ if (active === "") return;
280
+
281
+ // Re-embed in bounded batches so a corpus-wide rebuild never issues one giant
282
+ // embedding request; each batch is its own tracked background task.
283
+ const rebuild = (items: readonly EmbedItem[]): void => {
284
+ for (let offset = 0; offset < items.length; offset += EMBED_REBUILD_BATCH) {
285
+ scheduleEmbedding(beam, items.slice(offset, offset + EMBED_REBUILD_BATCH));
286
+ }
287
+ };
288
+
289
+ // Stop at the first row whose stamped model differs from the active one
290
+ // (NULL/unstamped counts as a mismatch via `IS NOT`).
291
+ const mismatch = beam.db.query("SELECT 1 FROM memory_embeddings WHERE model IS NOT ? LIMIT 1").get(active);
292
+ if (mismatch) {
293
+ const staleModels = beam.db
294
+ .query("SELECT DISTINCT model FROM memory_embeddings WHERE model IS NOT ?")
295
+ .all(active) as { model: string | null }[];
296
+ const live = beam.db
297
+ .query(`
298
+ SELECT id AS memoryId, content FROM working_memory WHERE superseded_by IS NULL
299
+ UNION ALL
300
+ SELECT id AS memoryId, content FROM episodic_memory WHERE superseded_by IS NULL
301
+ `)
302
+ .all() as EmbedItem[];
303
+
304
+ transaction(beam.db, () => {
305
+ beam.db.prepare("DELETE FROM memory_embeddings").run();
306
+ beam.db.prepare("UPDATE episodic_memory SET binary_vector = NULL").run();
307
+ if (vecAvailable(beam.db)) {
308
+ try {
309
+ beam.db.prepare("DELETE FROM vec_episodes").run();
310
+ } catch {
311
+ // sqlite-vec cleanup is best-effort; rebuild correctness takes precedence.
312
+ }
313
+ }
314
+ });
315
+
316
+ logger.info("mnemopi: embedding model changed, rebuilding", {
317
+ from: staleModels.map(row => row.model ?? "(unstamped)"),
318
+ to: active,
319
+ count: live.length,
320
+ });
321
+ rebuild(live);
322
+ return;
323
+ }
324
+
325
+ // No stale embeddings, but a previously-interrupted rebuild (a failed embed or a process
326
+ // exit after the wipe) can leave live memories with no active-model embedding. Treating an
327
+ // empty/partial table as "reconciled" would strand them FTS-only, so re-enqueue any live
328
+ // row still missing an active-model embedding.
329
+ const missing = beam.db
330
+ .query(`
331
+ SELECT id AS memoryId, content FROM working_memory
332
+ WHERE superseded_by IS NULL AND id NOT IN (SELECT memory_id FROM memory_embeddings WHERE model = ?)
333
+ UNION ALL
334
+ SELECT id AS memoryId, content FROM episodic_memory
335
+ WHERE superseded_by IS NULL AND id NOT IN (SELECT memory_id FROM memory_embeddings WHERE model = ?)
336
+ `)
337
+ .all(active, active) as EmbedItem[];
338
+ if (missing.length === 0) return;
339
+ logger.info("mnemopi: resuming interrupted embedding rebuild", { to: active, count: missing.length });
340
+ rebuild(missing);
341
+ }
342
+
251
343
  export function remember(beam: BeamMemoryState, content: string, options: StoreRememberOptions = {}): string {
252
344
  const source = options.source ?? "conversation";
253
345
  const importance = options.importance ?? 0.5;
@@ -594,7 +686,7 @@ export function scratchpadClear(beam: BeamMemoryState): void {
594
686
  export function exportToDict(beam: BeamMemoryState): Record<string, unknown> {
595
687
  const db = beam.db;
596
688
  return {
597
- prometheus_memory_export: {
689
+ mnemopi_export: {
598
690
  version: "1.0",
599
691
  export_date: toUtcIso(),
600
692
  source_db: beam.dbPath ?? ":memory:",
@@ -99,7 +99,7 @@ function isReadonlyMap(
99
99
  }
100
100
 
101
101
  export function getVecType(env: NodeJS.ProcessEnv = process.env): VecType {
102
- const value = (env.PROMETHEUS_MEMORY_VEC_TYPE ?? "int8").trim().toLowerCase();
102
+ const value = (env.MNEMOPROMETHEUS_VEC_TYPE ?? "int8").trim().toLowerCase();
103
103
  if (value === "float32" || value === "int8" || value === "bit") {
104
104
  return value;
105
105
  }
@@ -19,9 +19,9 @@ export interface BlobMetadata {
19
19
  }
20
20
 
21
21
  export function blobRoot(env: NodeJS.ProcessEnv = process.env): string {
22
- return env.PROMETHEUS_MEMORY_BLOB_DIR && env.PROMETHEUS_MEMORY_BLOB_DIR.length > 0
23
- ? env.PROMETHEUS_MEMORY_BLOB_DIR
24
- : join(homedir(), ".prometheus", "memory", "blobs");
22
+ return env.MNEMOPROMETHEUS_BLOB_DIR && env.MNEMOPROMETHEUS_BLOB_DIR.length > 0
23
+ ? env.MNEMOPROMETHEUS_BLOB_DIR
24
+ : join(homedir(), ".hermes", "mnemopi", "blobs");
25
25
  }
26
26
 
27
27
  export function computeSha256(data: Uint8Array | string): string {
@@ -3,7 +3,7 @@ import { mkdirSync } from "node:fs";
3
3
  import { homedir } from "node:os";
4
4
  import { dirname, join } from "node:path";
5
5
 
6
- export const DEFAULT_LOG_DIR = join(homedir(), ".prometheus", "memory", "data");
6
+ export const DEFAULT_LOG_DIR = join(homedir(), ".mnemopi", "data");
7
7
  export const DEFAULT_LOG_DB = join(DEFAULT_LOG_DIR, "cost_log.db");
8
8
 
9
9
  export interface CostStats {
@@ -1,8 +1,9 @@
1
1
  import { mkdirSync } from "node:fs";
2
+ import { type ApiKey, ProviderHttpError, withAuth } from "@prometheus-ai/ai";
3
+ import { hostMatchesUrl } from "@prometheus-ai/catalog/hosts";
2
4
  import {
3
5
  $env,
4
6
  $flag,
5
- APP_DISPLAY_NAME,
6
7
  extractHttpStatusFromError,
7
8
  fetchWithRetry,
8
9
  getFastembedCacheDir,
@@ -11,7 +12,13 @@ import {
11
12
  import type { EmbeddingModel } from "fastembed";
12
13
  import { LRUCache } from "lru-cache/raw";
13
14
  import packageJson from "../../package.json" with { type: "json" };
14
- import { type EmbeddingOutput, getMnemopiRuntimeOptions, resolveEmbeddingProvider } from "./runtime-options";
15
+ import { loadFastembed } from "./fastembed-runtime";
16
+ import {
17
+ type EmbeddingOutput,
18
+ getMnemopiRuntimeOptions,
19
+ mnemopiDebugEnabled,
20
+ resolveEmbeddingProvider,
21
+ } from "./runtime-options";
15
22
 
16
23
  export type { EmbeddingOutput } from "./runtime-options";
17
24
  export { cosineSimilarity } from "./vector-math";
@@ -55,17 +62,7 @@ const providerIds = new WeakMap<object, number>();
55
62
  let nextProviderId = 1;
56
63
 
57
64
  async function defaultLocalModelInitializer(options: LocalModelInitOptions): Promise<LocalEmbeddingModel> {
58
- // Preload ORT 1.24 before fastembed's bundled ORT 1.21 — only on Windows,
59
- // where loading the older binding first triggers a DLL-reuse crash. The 1.24
60
- // line also has no darwin/x64 prebuilt, so importing it unconditionally breaks
61
- // the darwin-x64 `bun build --compile` (Bun folds process.platform/arch and
62
- // fails to resolve a binding that doesn't ship). The `win32` literal guard is
63
- // statically foldable, so Bun dead-code-eliminates this import on every
64
- // non-Windows target; fastembed loads its own ORT 1.21 binding there.
65
- if (process.platform === "win32") {
66
- await import("onnxruntime-node");
67
- }
68
- const { FlagEmbedding } = await import("fastembed");
65
+ const { FlagEmbedding } = await loadFastembed();
69
66
  return FlagEmbedding.init(options);
70
67
  }
71
68
 
@@ -102,20 +99,30 @@ function inTestRuntime(): boolean {
102
99
  return $env.NODE_ENV === "test" || $env.BUN_ENV === "test";
103
100
  }
104
101
 
105
- function embeddingsDisabled(): boolean {
102
+ export function embeddingsDisabled(): boolean {
106
103
  const active = activeEmbeddingOptions();
107
104
  if (active?.disabled !== undefined) {
108
105
  return active.disabled;
109
106
  }
110
- return $flag("PROMETHEUS_MEMORY_NO_EMBEDDINGS");
107
+ return $flag("MNEMOPROMETHEUS_NO_EMBEDDINGS");
111
108
  }
112
109
 
113
- function embeddingApiKey(): string {
110
+ function embeddingApiKey(): ApiKey {
114
111
  const active = activeEmbeddingOptions();
115
112
  if (active?.apiKey !== undefined) {
116
113
  return active.apiKey;
117
114
  }
118
- return $env.PROMETHEUS_MEMORY_EMBEDDING_API_KEY || $env.OPENROUTER_API_KEY || $env.OPENAI_API_KEY || "";
115
+ return (
116
+ $env.MNEMOPROMETHEUS_EMBEDDING_APROMETHEUS_KEY ||
117
+ $env.OPENROUTER_APROMETHEUS_KEY ||
118
+ $env.OPENAI_APROMETHEUS_KEY ||
119
+ ""
120
+ );
121
+ }
122
+
123
+ /** A resolver always counts as configured; a static key only when non-empty. */
124
+ function embeddingKeyConfigured(key: ApiKey = embeddingApiKey()): boolean {
125
+ return typeof key === "function" || key !== "";
119
126
  }
120
127
 
121
128
  function embeddingBaseUrl(): string {
@@ -123,7 +130,7 @@ function embeddingBaseUrl(): string {
123
130
  if (active?.apiUrl !== undefined) {
124
131
  return active.apiUrl;
125
132
  }
126
- return $env.PROMETHEUS_MEMORY_EMBEDDING_API_URL || $env.OPENROUTER_BASE_URL || "https://openrouter.ai/api/v1";
133
+ return $env.MNEMOPROMETHEUS_EMBEDDING_APROMETHEUS_URL || $env.OPENROUTER_BASE_URL || "https://openrouter.ai/api/v1";
127
134
  }
128
135
 
129
136
  function defaultModel(): string {
@@ -131,14 +138,14 @@ function defaultModel(): string {
131
138
  if (active?.model !== undefined) {
132
139
  return active.model;
133
140
  }
134
- return $env.PROMETHEUS_MEMORY_EMBEDDING_MODEL || "BAAI/bge-small-en-v1.5";
141
+ return $env.MNEMOPROMETHEUS_EMBEDDING_MODEL || "BAAI/bge-small-en-v1.5";
135
142
  }
136
143
 
137
144
  /**
138
145
  * Resolve the embedding model name for the currently active runtime scope.
139
146
  *
140
147
  * Reads (in order): the active provider's `model` from `withMnemopiRuntimeOptions`,
141
- * the `PROMETHEUS_MEMORY_EMBEDDING_MODEL` env var, then the bundled fastembed default. Stored
148
+ * the `MNEMOPROMETHEUS_EMBEDDING_MODEL` env var, then the bundled fastembed default. Stored
142
149
  * alongside each row in `memory_embeddings.model` so migrations can re-embed when
143
150
  * the active model changes.
144
151
  */
@@ -155,11 +162,11 @@ export function isApiModel(modelName: string): boolean {
155
162
  return true;
156
163
  }
157
164
  const active = activeEmbeddingOptions();
158
- const baseUrl = active?.apiUrl ?? ($env.PROMETHEUS_MEMORY_EMBEDDING_API_URL || $env.OPENROUTER_BASE_URL);
159
- if (baseUrl !== undefined && baseUrl !== "" && !baseUrl.includes("openrouter.ai")) {
165
+ const baseUrl = active?.apiUrl ?? ($env.MNEMOPROMETHEUS_EMBEDDING_APROMETHEUS_URL || $env.OPENROUTER_BASE_URL);
166
+ if (baseUrl !== undefined && baseUrl !== "" && !hostMatchesUrl(baseUrl, "openrouter")) {
160
167
  return true;
161
168
  }
162
- return $flag("PROMETHEUS_MEMORY_EMBEDDINGS_VIA_API");
169
+ return $flag("MNEMOPROMETHEUS_EMBEDDINGS_VIA_API");
163
170
  }
164
171
 
165
172
  const MODEL_DIMS: Record<string, number> = {
@@ -182,7 +189,7 @@ const MODEL_DIMS: Record<string, number> = {
182
189
  "jina-embeddings-v5-omni-small": 1024,
183
190
  };
184
191
  export function embeddingDimFor(modelName: string): number {
185
- const override = Number.parseInt($env.PROMETHEUS_MEMORY_EMBEDDING_DIM ?? "", 10);
192
+ const override = Number.parseInt($env.MNEMOPROMETHEUS_EMBEDDING_DIM ?? "", 10);
186
193
  if (Number.isFinite(override)) {
187
194
  return override;
188
195
  }
@@ -239,7 +246,11 @@ async function getLocalModel(): Promise<LocalEmbeddingModel | null> {
239
246
  localModelPromise = loading;
240
247
  try {
241
248
  return await loading;
242
- } catch {
249
+ } catch (error) {
250
+ logger[mnemopiDebugEnabled() ? "warn" : "debug"]("mnemopi: local embedding model failed to load", {
251
+ model: modelName,
252
+ error: String(error),
253
+ });
243
254
  if (localModelPromise === loading) localModelPromise = null;
244
255
  return null;
245
256
  }
@@ -247,31 +258,41 @@ async function getLocalModel(): Promise<LocalEmbeddingModel | null> {
247
258
 
248
259
  async function embedApi(texts: readonly string[]): Promise<EmbeddingMatrix | null> {
249
260
  const baseUrl = embeddingBaseUrl();
250
- const isCustom = !baseUrl.includes("openrouter.ai");
261
+ const isCustom = !hostMatchesUrl(baseUrl, "openrouter");
251
262
  const apiKey = embeddingApiKey();
252
- if (!isCustom && apiKey === "") {
263
+ if (!isCustom && !embeddingKeyConfigured(apiKey)) {
253
264
  return null;
254
265
  }
255
266
 
256
- const headers: Record<string, string> = {
257
- "Content-Type": "application/json",
258
- "User-Agent": `${APP_DISPLAY_NAME}/${packageJson.version}`,
259
- "HTTP-Referer": "https://prometheus.trivlab.com/",
260
- "X-OpenRouter-Title": APP_DISPLAY_NAME,
261
- "X-OpenRouter-Categories": "cli-agent",
262
- };
263
- if (apiKey !== "") {
264
- headers.Authorization = `Bearer ${apiKey}`;
265
- }
266
-
267
+ const body = JSON.stringify({ model: defaultModel(), input: texts });
267
268
  try {
268
- const response = await fetchWithRetry(`${baseUrl.replace(/\/+$/, "")}/embeddings`, {
269
- method: "POST",
270
- headers,
271
- body: JSON.stringify({ model: defaultModel(), input: texts }),
272
- signal: AbortSignal.timeout(30000),
273
- maxAttempts: 3,
274
- defaultDelayMs: attempt => 2 ** attempt * 1000,
269
+ // withAuth re-resolves the key on 401 (force-refresh, then sibling
270
+ // rotation) when `apiKey` is a resolver. The 429 backoff stays inside
271
+ // the attempt via fetchWithRetry. An empty static key attempts without
272
+ // an Authorization header (local/proxy setups).
273
+ const response = await withAuth(apiKey, async key => {
274
+ const headers: Record<string, string> = {
275
+ "Content-Type": "application/json",
276
+ "User-Agent": `Oh-My-Pi/${packageJson.version}`,
277
+ "HTTP-Referer": "https://prometheus.sh/",
278
+ "X-OpenRouter-Title": "Oh-My-Pi",
279
+ "X-OpenRouter-Categories": "cli-agent",
280
+ };
281
+ if (key !== "") {
282
+ headers.Authorization = `Bearer ${key}`;
283
+ }
284
+ const res = await fetchWithRetry(`${baseUrl.replace(/\/+$/, "")}/embeddings`, {
285
+ method: "POST",
286
+ headers,
287
+ body,
288
+ signal: AbortSignal.timeout(30000),
289
+ maxAttempts: 3,
290
+ defaultDelayMs: attempt => 2 ** attempt * 1000,
291
+ });
292
+ if (res.status === 401) {
293
+ throw new ProviderHttpError("mnemopi embedding request unauthorized (401)", 401, { headers: res.headers });
294
+ }
295
+ return res;
275
296
  });
276
297
  if (!response.ok) {
277
298
  return null;
@@ -335,11 +356,11 @@ export async function available(): Promise<boolean> {
335
356
  return providerAvailable(providerOverride);
336
357
  }
337
358
  if (isApiModel(defaultModel())) {
338
- const baseUrl = active?.apiUrl ?? ($env.PROMETHEUS_MEMORY_EMBEDDING_API_URL || $env.OPENROUTER_BASE_URL);
339
- if (baseUrl !== undefined && baseUrl !== "" && !baseUrl.includes("openrouter.ai")) {
359
+ const baseUrl = active?.apiUrl ?? ($env.MNEMOPROMETHEUS_EMBEDDING_APROMETHEUS_URL || $env.OPENROUTER_BASE_URL);
360
+ if (baseUrl !== undefined && baseUrl !== "" && !hostMatchesUrl(baseUrl, "openrouter")) {
340
361
  return true;
341
362
  }
342
- return embeddingApiKey() !== "";
363
+ return embeddingKeyConfigured();
343
364
  }
344
365
  if (inTestRuntime()) {
345
366
  return false;
@@ -348,7 +369,7 @@ export async function available(): Promise<boolean> {
348
369
  }
349
370
 
350
371
  export function availableApi(): boolean {
351
- return embeddingApiKey() !== "";
372
+ return embeddingKeyConfigured();
352
373
  }
353
374
 
354
375
  export async function embedQuery(text: string): Promise<Vector | null> {
@@ -410,7 +431,11 @@ export async function embed(texts: readonly string[]): Promise<EmbeddingMatrix |
410
431
  }
411
432
  }
412
433
  return vectors;
413
- } catch {
434
+ } catch (error) {
435
+ logger[mnemopiDebugEnabled() ? "warn" : "debug"]("mnemopi: local embedding failed", {
436
+ textCount: texts.length,
437
+ error: String(error),
438
+ });
414
439
  return null;
415
440
  }
416
441
  }