@prometheus-ai/memory 0.5.3 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +4 -4
- package/dist/types/config.d.ts +13 -2
- package/dist/types/core/beam/store.d.ts +20 -0
- package/dist/types/core/embeddings.d.ts +2 -1
- package/dist/types/core/extraction/client.d.ts +11 -7
- package/dist/types/core/extraction.d.ts +2 -1
- package/dist/types/core/fastembed-runtime.d.ts +4 -0
- package/dist/types/core/index.d.ts +1 -0
- package/dist/types/core/llm-backends.d.ts +2 -0
- package/dist/types/core/local-llm.d.ts +8 -3
- package/dist/types/core/memory.d.ts +12 -3
- package/dist/types/core/query-cache.d.ts +1 -2
- package/dist/types/core/runtime-options.d.ts +10 -5
- package/dist/types/core/shmr.d.ts +11 -5
- package/dist/types/core/vector-index.d.ts +16 -0
- package/dist/types/index.d.ts +2 -1
- package/package.json +30 -7
- package/src/cli.ts +19 -19
- package/src/config.ts +98 -68
- package/src/core/banks.ts +2 -2
- package/src/core/beam/consolidate.ts +34 -5
- package/src/core/beam/helpers.ts +21 -28
- package/src/core/beam/index.ts +2 -2
- package/src/core/beam/recall.ts +98 -25
- package/src/core/beam/store.ts +96 -4
- package/src/core/binary-vectors.ts +1 -1
- package/src/core/content-sanitizer.ts +3 -3
- package/src/core/cost-log.ts +1 -1
- package/src/core/embeddings.ts +75 -50
- package/src/core/extraction/client.ts +44 -20
- package/src/core/extraction.ts +10 -9
- package/src/core/fastembed-runtime.ts +89 -0
- package/src/core/index.ts +1 -0
- package/src/core/llm-backends.ts +3 -0
- package/src/core/local-llm.ts +81 -43
- package/src/core/memory.ts +25 -5
- package/src/core/plugins.ts +1 -1
- package/src/core/polyphonic-recall.ts +4 -4
- package/src/core/query-cache.ts +2 -3
- package/src/core/runtime-options.ts +13 -5
- package/src/core/shmr.ts +141 -39
- package/src/core/streaming.ts +1 -1
- package/src/core/triples.ts +3 -3
- package/src/core/vector-index.ts +84 -0
- package/src/diagnose.ts +2 -2
- package/src/dr/recovery.ts +5 -5
- package/src/index.ts +1 -1
- package/src/mcp-server.ts +2 -2
- package/src/mcp-tools.ts +61 -61
package/src/core/beam/recall.ts
CHANGED
|
@@ -2,7 +2,7 @@ import { normalizedRecallWeights, temporalHalflifeHours } from "../../config";
|
|
|
2
2
|
import { embedQuery } from "../embeddings";
|
|
3
3
|
import { mmrRerank } from "../mmr";
|
|
4
4
|
import { adjustWeights, classifyIntent } from "../query-intent";
|
|
5
|
-
import { getSynonyms, normalizeQuery } from "../synonyms";
|
|
5
|
+
import { getSynonyms, normalizeQuery, STOP_WORDS as QUERY_STOP_WORDS } from "../synonyms";
|
|
6
6
|
import { extractTemporal } from "../temporal-parser";
|
|
7
7
|
import { cosineSimilarity } from "../vector-math";
|
|
8
8
|
import type { BeamMemoryState, RecallEnhancedOptions, RecallOptions, RecallResult } from "./types";
|
|
@@ -101,6 +101,31 @@ const STOP_WORDS = new Set([
|
|
|
101
101
|
"with",
|
|
102
102
|
]);
|
|
103
103
|
|
|
104
|
+
const FACT_QUERY_FILLER_WORDS = new Set([
|
|
105
|
+
...QUERY_STOP_WORDS,
|
|
106
|
+
"active",
|
|
107
|
+
"current",
|
|
108
|
+
"currently",
|
|
109
|
+
"d",
|
|
110
|
+
"know",
|
|
111
|
+
"latest",
|
|
112
|
+
"ll",
|
|
113
|
+
"m",
|
|
114
|
+
"please",
|
|
115
|
+
"present",
|
|
116
|
+
"re",
|
|
117
|
+
"recent",
|
|
118
|
+
"remind",
|
|
119
|
+
"remember",
|
|
120
|
+
"s",
|
|
121
|
+
"t",
|
|
122
|
+
"tell",
|
|
123
|
+
"today",
|
|
124
|
+
"ve",
|
|
125
|
+
]);
|
|
126
|
+
|
|
127
|
+
const FACT_CLITIC_FRAGMENTS = new Set(["d", "ll", "m", "re", "s", "t", "ve"]);
|
|
128
|
+
|
|
104
129
|
function nowIso(): string {
|
|
105
130
|
return new Date().toISOString();
|
|
106
131
|
}
|
|
@@ -176,6 +201,35 @@ function expandedTokenGroups(query: string, useSynonyms = true): string[][] {
|
|
|
176
201
|
return groups;
|
|
177
202
|
}
|
|
178
203
|
|
|
204
|
+
function factExpandedTokenGroups(query: string, content: string): string[][] {
|
|
205
|
+
const contentLower = content.toLowerCase();
|
|
206
|
+
const contentTokens = new Set(tokenize(contentLower));
|
|
207
|
+
const groups: string[][] = [];
|
|
208
|
+
for (const token of tokenize(query)) {
|
|
209
|
+
if (FACT_QUERY_FILLER_WORDS.has(token) && (FACT_CLITIC_FRAGMENTS.has(token) || !contentTokens.has(token))) {
|
|
210
|
+
continue;
|
|
211
|
+
}
|
|
212
|
+
const seen = new Set<string>();
|
|
213
|
+
for (const variant of recallSynonyms(token, true)) {
|
|
214
|
+
for (const part of tokenize(variant)) {
|
|
215
|
+
if (!FACT_QUERY_FILLER_WORDS.has(part) || (!FACT_CLITIC_FRAGMENTS.has(part) && contentTokens.has(part))) {
|
|
216
|
+
seen.add(part);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
if (seen.size > 0) groups.push([...seen]);
|
|
221
|
+
}
|
|
222
|
+
return groups;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
function tokensFromGroups(groups: readonly (readonly string[])[]): string[] {
|
|
226
|
+
const seen = new Set<string>();
|
|
227
|
+
for (const group of groups) {
|
|
228
|
+
for (const token of group) seen.add(token);
|
|
229
|
+
}
|
|
230
|
+
return [...seen];
|
|
231
|
+
}
|
|
232
|
+
|
|
179
233
|
function contentMatchesToken(contentLower: string, contentTokens: ReadonlySet<string>, token: string): boolean {
|
|
180
234
|
if (contentTokens.has(token) || contentLower.includes(token)) return true;
|
|
181
235
|
for (const contentToken of contentTokens) {
|
|
@@ -1062,13 +1116,11 @@ export function factRecall(beam: BeamMemoryState, query: string, topK = 30): Fac
|
|
|
1062
1116
|
}
|
|
1063
1117
|
}
|
|
1064
1118
|
if (matched.length === 0) return [];
|
|
1065
|
-
const rowids = matched
|
|
1066
|
-
.slice(0, topK)
|
|
1067
|
-
.map(row => asNumber(row.rowid))
|
|
1068
|
-
.filter(rowid => rowid > 0);
|
|
1119
|
+
const rowids = matched.map(row => asNumber(row.rowid)).filter(rowid => rowid > 0);
|
|
1069
1120
|
if (rowids.length === 0) return [];
|
|
1070
1121
|
const visibility = factVisibilityWhere(beam, "");
|
|
1071
1122
|
const ranks = normalizeRanks(matched, "rowid");
|
|
1123
|
+
const normalized = normalizeQuery(query).toLowerCase();
|
|
1072
1124
|
const rows = queryAll(
|
|
1073
1125
|
beam,
|
|
1074
1126
|
`SELECT rowid, fact_id, subject, predicate, object, timestamp, confidence
|
|
@@ -1076,25 +1128,46 @@ export function factRecall(beam: BeamMemoryState, query: string, topK = 30): Fac
|
|
|
1076
1128
|
WHERE rowid IN (${placeholders(rowids.length)}) AND ${visibility.where}
|
|
1077
1129
|
ORDER BY confidence DESC
|
|
1078
1130
|
LIMIT ?`,
|
|
1079
|
-
[...rowids, ...visibility.params,
|
|
1131
|
+
[...rowids, ...visibility.params, rowids.length],
|
|
1080
1132
|
);
|
|
1081
|
-
return rows
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1133
|
+
return rows
|
|
1134
|
+
.map(row => {
|
|
1135
|
+
const subject = asString(row.subject);
|
|
1136
|
+
const predicate = asString(row.predicate);
|
|
1137
|
+
const object = asString(row.object);
|
|
1138
|
+
const confidence = asNumber(row.confidence, 0.5);
|
|
1139
|
+
const content = object.length > 0 ? object : `${subject} ${predicate}`.trim();
|
|
1140
|
+
const searchable = `${subject} ${predicate} ${object}`.trim();
|
|
1141
|
+
const queryGroups = factExpandedTokenGroups(query, searchable);
|
|
1142
|
+
const queryTokens = tokensFromGroups(queryGroups);
|
|
1143
|
+
const lexical =
|
|
1144
|
+
queryGroups.length > 0
|
|
1145
|
+
? lexicalGroupRelevance(queryGroups, searchable, normalized)
|
|
1146
|
+
: lexicalRelevance(queryTokens, searchable, normalized);
|
|
1147
|
+
const rank = ranks.get(asNumber(row.rowid)) ?? 0;
|
|
1148
|
+
const result: FactRecallResult = {
|
|
1149
|
+
id: asString(row.fact_id),
|
|
1150
|
+
content,
|
|
1151
|
+
score: round4(lexical * (0.7 + confidence * 0.2 + rank * 0.1)),
|
|
1152
|
+
fact_id: asString(row.fact_id),
|
|
1153
|
+
subject,
|
|
1154
|
+
predicate,
|
|
1155
|
+
timestamp: asNullableString(row.timestamp),
|
|
1156
|
+
tier_label: "fact",
|
|
1157
|
+
tier: "fact",
|
|
1158
|
+
source: "facts",
|
|
1159
|
+
keyword_score: round4(lexical),
|
|
1160
|
+
fts_score: round4(rank),
|
|
1161
|
+
importance_score: round4(confidence),
|
|
1162
|
+
explanation: `fact keyword=${round4(lexical)}`,
|
|
1163
|
+
voice_scores: {
|
|
1164
|
+
keyword: round4(lexical),
|
|
1165
|
+
fts: round4(rank),
|
|
1166
|
+
importance: round4(confidence),
|
|
1167
|
+
},
|
|
1168
|
+
};
|
|
1169
|
+
return result;
|
|
1170
|
+
})
|
|
1171
|
+
.sort((left, right) => (right.score ?? 0) - (left.score ?? 0))
|
|
1172
|
+
.slice(0, topK);
|
|
1100
1173
|
}
|
package/src/core/beam/store.ts
CHANGED
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
import type { Database, SQLQueryBindings } from "bun:sqlite";
|
|
2
|
+
import { logger } from "@prometheus-ai/utils";
|
|
2
3
|
import { transaction } from "../../db";
|
|
3
4
|
import { toUtcIso } from "../../util/datetime";
|
|
4
5
|
import { generateId } from "../../util/ids";
|
|
6
|
+
import { currentEmbeddingModel, embeddingsDisabled } from "../embeddings";
|
|
5
7
|
import { EpisodicGraph } from "../episodic-graph";
|
|
6
8
|
import { extractFactsSafe } from "../extraction";
|
|
7
9
|
import { getMnemopiRuntimeOptions, withMnemopiRuntimeOptions } from "../runtime-options";
|
|
8
10
|
import { storeFactStrings } from "./consolidate";
|
|
9
|
-
import { scheduleEmbedding, vecAvailable, vecInsert } from "./helpers";
|
|
11
|
+
import { type EmbedItem, scheduleEmbedding, vecAvailable, vecInsert } from "./helpers";
|
|
10
12
|
import type {
|
|
11
13
|
BeamEvent,
|
|
12
14
|
BeamMemoryState,
|
|
@@ -58,7 +60,7 @@ const TRUST_TIERS: Record<string, true> = {
|
|
|
58
60
|
EXTERNAL_WRITE: true,
|
|
59
61
|
IMPORTED: true,
|
|
60
62
|
};
|
|
61
|
-
const SCRATCHPAD_MAX_ITEMS = Number.parseInt(process.env.
|
|
63
|
+
const SCRATCHPAD_MAX_ITEMS = Number.parseInt(process.env.MNEMOPROMETHEUS_SP_MAX ?? "1000", 10);
|
|
62
64
|
|
|
63
65
|
function metadataJson(metadata: Metadata | null | undefined): string | null {
|
|
64
66
|
return metadata == null ? null : JSON.stringify(metadata);
|
|
@@ -191,7 +193,7 @@ function proactiveLinkIfEnabled(
|
|
|
191
193
|
content: string,
|
|
192
194
|
extractEntities: boolean,
|
|
193
195
|
): void {
|
|
194
|
-
if (process.env.
|
|
196
|
+
if (process.env.MNEMOPROMETHEUS_PROACTIVE_LINKING !== "1") return;
|
|
195
197
|
try {
|
|
196
198
|
const graph =
|
|
197
199
|
beam.episodicGraph instanceof EpisodicGraph
|
|
@@ -248,6 +250,96 @@ function rowToDict(row: Row): Row {
|
|
|
248
250
|
return { ...row };
|
|
249
251
|
}
|
|
250
252
|
|
|
253
|
+
/** Re-embedding batch size for a model-change rebuild — bounds each background
|
|
254
|
+
* embedding request instead of embedding the whole corpus in one call. */
|
|
255
|
+
const EMBED_REBUILD_BATCH = 128;
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Reconcile stored embeddings against the active embedding model at store open.
|
|
259
|
+
*
|
|
260
|
+
* Every `memory_embeddings` row is stamped with the model that produced it (see
|
|
261
|
+
* `runEmbedding` in `helpers.ts`). When the configured embedding model changes,
|
|
262
|
+
* its vector dimension changes too, so the previously-stored vectors are no
|
|
263
|
+
* longer comparable. On a mismatch we wipe every stored vector — the
|
|
264
|
+
* `memory_embeddings` table, the `episodic_memory.binary_vector` column, and the
|
|
265
|
+
* sqlite-vec `vec_episodes` index — then enqueue all live memories for
|
|
266
|
+
* background re-embedding under the new model via `scheduleEmbedding`.
|
|
267
|
+
*
|
|
268
|
+
* Runs once per store open; a fresh store (no embeddings) or an already-current
|
|
269
|
+
* store is a no-op. The destructive wipe is skipped whenever it could not be
|
|
270
|
+
* rebuilt — embeddings disabled via the runtime option OR the
|
|
271
|
+
* `MNEMOPROMETHEUS_NO_EMBEDDINGS` env, or an unresolved (empty) active model — so a
|
|
272
|
+
* stale-but-valid corpus is never destroyed without a replacement. MUST run
|
|
273
|
+
* inside the active runtime-options scope so `currentEmbeddingModel()` /
|
|
274
|
+
* `embeddingsDisabled()` reflect the per-instance configuration.
|
|
275
|
+
*/
|
|
276
|
+
export function reconcileEmbeddingModel(beam: BeamMemoryState): void {
|
|
277
|
+
if (embeddingsDisabled()) return;
|
|
278
|
+
const active = currentEmbeddingModel().trim();
|
|
279
|
+
if (active === "") return;
|
|
280
|
+
|
|
281
|
+
// Re-embed in bounded batches so a corpus-wide rebuild never issues one giant
|
|
282
|
+
// embedding request; each batch is its own tracked background task.
|
|
283
|
+
const rebuild = (items: readonly EmbedItem[]): void => {
|
|
284
|
+
for (let offset = 0; offset < items.length; offset += EMBED_REBUILD_BATCH) {
|
|
285
|
+
scheduleEmbedding(beam, items.slice(offset, offset + EMBED_REBUILD_BATCH));
|
|
286
|
+
}
|
|
287
|
+
};
|
|
288
|
+
|
|
289
|
+
// Stop at the first row whose stamped model differs from the active one
|
|
290
|
+
// (NULL/unstamped counts as a mismatch via `IS NOT`).
|
|
291
|
+
const mismatch = beam.db.query("SELECT 1 FROM memory_embeddings WHERE model IS NOT ? LIMIT 1").get(active);
|
|
292
|
+
if (mismatch) {
|
|
293
|
+
const staleModels = beam.db
|
|
294
|
+
.query("SELECT DISTINCT model FROM memory_embeddings WHERE model IS NOT ?")
|
|
295
|
+
.all(active) as { model: string | null }[];
|
|
296
|
+
const live = beam.db
|
|
297
|
+
.query(`
|
|
298
|
+
SELECT id AS memoryId, content FROM working_memory WHERE superseded_by IS NULL
|
|
299
|
+
UNION ALL
|
|
300
|
+
SELECT id AS memoryId, content FROM episodic_memory WHERE superseded_by IS NULL
|
|
301
|
+
`)
|
|
302
|
+
.all() as EmbedItem[];
|
|
303
|
+
|
|
304
|
+
transaction(beam.db, () => {
|
|
305
|
+
beam.db.prepare("DELETE FROM memory_embeddings").run();
|
|
306
|
+
beam.db.prepare("UPDATE episodic_memory SET binary_vector = NULL").run();
|
|
307
|
+
if (vecAvailable(beam.db)) {
|
|
308
|
+
try {
|
|
309
|
+
beam.db.prepare("DELETE FROM vec_episodes").run();
|
|
310
|
+
} catch {
|
|
311
|
+
// sqlite-vec cleanup is best-effort; rebuild correctness takes precedence.
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
});
|
|
315
|
+
|
|
316
|
+
logger.info("mnemopi: embedding model changed, rebuilding", {
|
|
317
|
+
from: staleModels.map(row => row.model ?? "(unstamped)"),
|
|
318
|
+
to: active,
|
|
319
|
+
count: live.length,
|
|
320
|
+
});
|
|
321
|
+
rebuild(live);
|
|
322
|
+
return;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// No stale embeddings, but a previously-interrupted rebuild (a failed embed or a process
|
|
326
|
+
// exit after the wipe) can leave live memories with no active-model embedding. Treating an
|
|
327
|
+
// empty/partial table as "reconciled" would strand them FTS-only, so re-enqueue any live
|
|
328
|
+
// row still missing an active-model embedding.
|
|
329
|
+
const missing = beam.db
|
|
330
|
+
.query(`
|
|
331
|
+
SELECT id AS memoryId, content FROM working_memory
|
|
332
|
+
WHERE superseded_by IS NULL AND id NOT IN (SELECT memory_id FROM memory_embeddings WHERE model = ?)
|
|
333
|
+
UNION ALL
|
|
334
|
+
SELECT id AS memoryId, content FROM episodic_memory
|
|
335
|
+
WHERE superseded_by IS NULL AND id NOT IN (SELECT memory_id FROM memory_embeddings WHERE model = ?)
|
|
336
|
+
`)
|
|
337
|
+
.all(active, active) as EmbedItem[];
|
|
338
|
+
if (missing.length === 0) return;
|
|
339
|
+
logger.info("mnemopi: resuming interrupted embedding rebuild", { to: active, count: missing.length });
|
|
340
|
+
rebuild(missing);
|
|
341
|
+
}
|
|
342
|
+
|
|
251
343
|
export function remember(beam: BeamMemoryState, content: string, options: StoreRememberOptions = {}): string {
|
|
252
344
|
const source = options.source ?? "conversation";
|
|
253
345
|
const importance = options.importance ?? 0.5;
|
|
@@ -594,7 +686,7 @@ export function scratchpadClear(beam: BeamMemoryState): void {
|
|
|
594
686
|
export function exportToDict(beam: BeamMemoryState): Record<string, unknown> {
|
|
595
687
|
const db = beam.db;
|
|
596
688
|
return {
|
|
597
|
-
|
|
689
|
+
mnemopi_export: {
|
|
598
690
|
version: "1.0",
|
|
599
691
|
export_date: toUtcIso(),
|
|
600
692
|
source_db: beam.dbPath ?? ":memory:",
|
|
@@ -99,7 +99,7 @@ function isReadonlyMap(
|
|
|
99
99
|
}
|
|
100
100
|
|
|
101
101
|
export function getVecType(env: NodeJS.ProcessEnv = process.env): VecType {
|
|
102
|
-
const value = (env.
|
|
102
|
+
const value = (env.MNEMOPROMETHEUS_VEC_TYPE ?? "int8").trim().toLowerCase();
|
|
103
103
|
if (value === "float32" || value === "int8" || value === "bit") {
|
|
104
104
|
return value;
|
|
105
105
|
}
|
|
@@ -19,9 +19,9 @@ export interface BlobMetadata {
|
|
|
19
19
|
}
|
|
20
20
|
|
|
21
21
|
export function blobRoot(env: NodeJS.ProcessEnv = process.env): string {
|
|
22
|
-
return env.
|
|
23
|
-
? env.
|
|
24
|
-
: join(homedir(), ".
|
|
22
|
+
return env.MNEMOPROMETHEUS_BLOB_DIR && env.MNEMOPROMETHEUS_BLOB_DIR.length > 0
|
|
23
|
+
? env.MNEMOPROMETHEUS_BLOB_DIR
|
|
24
|
+
: join(homedir(), ".hermes", "mnemopi", "blobs");
|
|
25
25
|
}
|
|
26
26
|
|
|
27
27
|
export function computeSha256(data: Uint8Array | string): string {
|
package/src/core/cost-log.ts
CHANGED
|
@@ -3,7 +3,7 @@ import { mkdirSync } from "node:fs";
|
|
|
3
3
|
import { homedir } from "node:os";
|
|
4
4
|
import { dirname, join } from "node:path";
|
|
5
5
|
|
|
6
|
-
export const DEFAULT_LOG_DIR = join(homedir(), ".
|
|
6
|
+
export const DEFAULT_LOG_DIR = join(homedir(), ".mnemopi", "data");
|
|
7
7
|
export const DEFAULT_LOG_DB = join(DEFAULT_LOG_DIR, "cost_log.db");
|
|
8
8
|
|
|
9
9
|
export interface CostStats {
|
package/src/core/embeddings.ts
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import { mkdirSync } from "node:fs";
|
|
2
|
+
import { type ApiKey, ProviderHttpError, withAuth } from "@prometheus-ai/ai";
|
|
3
|
+
import { hostMatchesUrl } from "@prometheus-ai/catalog/hosts";
|
|
2
4
|
import {
|
|
3
5
|
$env,
|
|
4
6
|
$flag,
|
|
5
|
-
APP_DISPLAY_NAME,
|
|
6
7
|
extractHttpStatusFromError,
|
|
7
8
|
fetchWithRetry,
|
|
8
9
|
getFastembedCacheDir,
|
|
@@ -11,7 +12,13 @@ import {
|
|
|
11
12
|
import type { EmbeddingModel } from "fastembed";
|
|
12
13
|
import { LRUCache } from "lru-cache/raw";
|
|
13
14
|
import packageJson from "../../package.json" with { type: "json" };
|
|
14
|
-
import {
|
|
15
|
+
import { loadFastembed } from "./fastembed-runtime";
|
|
16
|
+
import {
|
|
17
|
+
type EmbeddingOutput,
|
|
18
|
+
getMnemopiRuntimeOptions,
|
|
19
|
+
mnemopiDebugEnabled,
|
|
20
|
+
resolveEmbeddingProvider,
|
|
21
|
+
} from "./runtime-options";
|
|
15
22
|
|
|
16
23
|
export type { EmbeddingOutput } from "./runtime-options";
|
|
17
24
|
export { cosineSimilarity } from "./vector-math";
|
|
@@ -55,17 +62,7 @@ const providerIds = new WeakMap<object, number>();
|
|
|
55
62
|
let nextProviderId = 1;
|
|
56
63
|
|
|
57
64
|
async function defaultLocalModelInitializer(options: LocalModelInitOptions): Promise<LocalEmbeddingModel> {
|
|
58
|
-
|
|
59
|
-
// where loading the older binding first triggers a DLL-reuse crash. The 1.24
|
|
60
|
-
// line also has no darwin/x64 prebuilt, so importing it unconditionally breaks
|
|
61
|
-
// the darwin-x64 `bun build --compile` (Bun folds process.platform/arch and
|
|
62
|
-
// fails to resolve a binding that doesn't ship). The `win32` literal guard is
|
|
63
|
-
// statically foldable, so Bun dead-code-eliminates this import on every
|
|
64
|
-
// non-Windows target; fastembed loads its own ORT 1.21 binding there.
|
|
65
|
-
if (process.platform === "win32") {
|
|
66
|
-
await import("onnxruntime-node");
|
|
67
|
-
}
|
|
68
|
-
const { FlagEmbedding } = await import("fastembed");
|
|
65
|
+
const { FlagEmbedding } = await loadFastembed();
|
|
69
66
|
return FlagEmbedding.init(options);
|
|
70
67
|
}
|
|
71
68
|
|
|
@@ -102,20 +99,30 @@ function inTestRuntime(): boolean {
|
|
|
102
99
|
return $env.NODE_ENV === "test" || $env.BUN_ENV === "test";
|
|
103
100
|
}
|
|
104
101
|
|
|
105
|
-
function embeddingsDisabled(): boolean {
|
|
102
|
+
export function embeddingsDisabled(): boolean {
|
|
106
103
|
const active = activeEmbeddingOptions();
|
|
107
104
|
if (active?.disabled !== undefined) {
|
|
108
105
|
return active.disabled;
|
|
109
106
|
}
|
|
110
|
-
return $flag("
|
|
107
|
+
return $flag("MNEMOPROMETHEUS_NO_EMBEDDINGS");
|
|
111
108
|
}
|
|
112
109
|
|
|
113
|
-
function embeddingApiKey():
|
|
110
|
+
function embeddingApiKey(): ApiKey {
|
|
114
111
|
const active = activeEmbeddingOptions();
|
|
115
112
|
if (active?.apiKey !== undefined) {
|
|
116
113
|
return active.apiKey;
|
|
117
114
|
}
|
|
118
|
-
return
|
|
115
|
+
return (
|
|
116
|
+
$env.MNEMOPROMETHEUS_EMBEDDING_APROMETHEUS_KEY ||
|
|
117
|
+
$env.OPENROUTER_APROMETHEUS_KEY ||
|
|
118
|
+
$env.OPENAI_APROMETHEUS_KEY ||
|
|
119
|
+
""
|
|
120
|
+
);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/** A resolver always counts as configured; a static key only when non-empty. */
|
|
124
|
+
function embeddingKeyConfigured(key: ApiKey = embeddingApiKey()): boolean {
|
|
125
|
+
return typeof key === "function" || key !== "";
|
|
119
126
|
}
|
|
120
127
|
|
|
121
128
|
function embeddingBaseUrl(): string {
|
|
@@ -123,7 +130,7 @@ function embeddingBaseUrl(): string {
|
|
|
123
130
|
if (active?.apiUrl !== undefined) {
|
|
124
131
|
return active.apiUrl;
|
|
125
132
|
}
|
|
126
|
-
return $env.
|
|
133
|
+
return $env.MNEMOPROMETHEUS_EMBEDDING_APROMETHEUS_URL || $env.OPENROUTER_BASE_URL || "https://openrouter.ai/api/v1";
|
|
127
134
|
}
|
|
128
135
|
|
|
129
136
|
function defaultModel(): string {
|
|
@@ -131,14 +138,14 @@ function defaultModel(): string {
|
|
|
131
138
|
if (active?.model !== undefined) {
|
|
132
139
|
return active.model;
|
|
133
140
|
}
|
|
134
|
-
return $env.
|
|
141
|
+
return $env.MNEMOPROMETHEUS_EMBEDDING_MODEL || "BAAI/bge-small-en-v1.5";
|
|
135
142
|
}
|
|
136
143
|
|
|
137
144
|
/**
|
|
138
145
|
* Resolve the embedding model name for the currently active runtime scope.
|
|
139
146
|
*
|
|
140
147
|
* Reads (in order): the active provider's `model` from `withMnemopiRuntimeOptions`,
|
|
141
|
-
* the `
|
|
148
|
+
* the `MNEMOPROMETHEUS_EMBEDDING_MODEL` env var, then the bundled fastembed default. Stored
|
|
142
149
|
* alongside each row in `memory_embeddings.model` so migrations can re-embed when
|
|
143
150
|
* the active model changes.
|
|
144
151
|
*/
|
|
@@ -155,11 +162,11 @@ export function isApiModel(modelName: string): boolean {
|
|
|
155
162
|
return true;
|
|
156
163
|
}
|
|
157
164
|
const active = activeEmbeddingOptions();
|
|
158
|
-
const baseUrl = active?.apiUrl ?? ($env.
|
|
159
|
-
if (baseUrl !== undefined && baseUrl !== "" && !baseUrl
|
|
165
|
+
const baseUrl = active?.apiUrl ?? ($env.MNEMOPROMETHEUS_EMBEDDING_APROMETHEUS_URL || $env.OPENROUTER_BASE_URL);
|
|
166
|
+
if (baseUrl !== undefined && baseUrl !== "" && !hostMatchesUrl(baseUrl, "openrouter")) {
|
|
160
167
|
return true;
|
|
161
168
|
}
|
|
162
|
-
return $flag("
|
|
169
|
+
return $flag("MNEMOPROMETHEUS_EMBEDDINGS_VIA_API");
|
|
163
170
|
}
|
|
164
171
|
|
|
165
172
|
const MODEL_DIMS: Record<string, number> = {
|
|
@@ -182,7 +189,7 @@ const MODEL_DIMS: Record<string, number> = {
|
|
|
182
189
|
"jina-embeddings-v5-omni-small": 1024,
|
|
183
190
|
};
|
|
184
191
|
export function embeddingDimFor(modelName: string): number {
|
|
185
|
-
const override = Number.parseInt($env.
|
|
192
|
+
const override = Number.parseInt($env.MNEMOPROMETHEUS_EMBEDDING_DIM ?? "", 10);
|
|
186
193
|
if (Number.isFinite(override)) {
|
|
187
194
|
return override;
|
|
188
195
|
}
|
|
@@ -239,7 +246,11 @@ async function getLocalModel(): Promise<LocalEmbeddingModel | null> {
|
|
|
239
246
|
localModelPromise = loading;
|
|
240
247
|
try {
|
|
241
248
|
return await loading;
|
|
242
|
-
} catch {
|
|
249
|
+
} catch (error) {
|
|
250
|
+
logger[mnemopiDebugEnabled() ? "warn" : "debug"]("mnemopi: local embedding model failed to load", {
|
|
251
|
+
model: modelName,
|
|
252
|
+
error: String(error),
|
|
253
|
+
});
|
|
243
254
|
if (localModelPromise === loading) localModelPromise = null;
|
|
244
255
|
return null;
|
|
245
256
|
}
|
|
@@ -247,31 +258,41 @@ async function getLocalModel(): Promise<LocalEmbeddingModel | null> {
|
|
|
247
258
|
|
|
248
259
|
async function embedApi(texts: readonly string[]): Promise<EmbeddingMatrix | null> {
|
|
249
260
|
const baseUrl = embeddingBaseUrl();
|
|
250
|
-
const isCustom = !baseUrl
|
|
261
|
+
const isCustom = !hostMatchesUrl(baseUrl, "openrouter");
|
|
251
262
|
const apiKey = embeddingApiKey();
|
|
252
|
-
if (!isCustom && apiKey
|
|
263
|
+
if (!isCustom && !embeddingKeyConfigured(apiKey)) {
|
|
253
264
|
return null;
|
|
254
265
|
}
|
|
255
266
|
|
|
256
|
-
const
|
|
257
|
-
"Content-Type": "application/json",
|
|
258
|
-
"User-Agent": `${APP_DISPLAY_NAME}/${packageJson.version}`,
|
|
259
|
-
"HTTP-Referer": "https://prometheus.trivlab.com/",
|
|
260
|
-
"X-OpenRouter-Title": APP_DISPLAY_NAME,
|
|
261
|
-
"X-OpenRouter-Categories": "cli-agent",
|
|
262
|
-
};
|
|
263
|
-
if (apiKey !== "") {
|
|
264
|
-
headers.Authorization = `Bearer ${apiKey}`;
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
+
const body = JSON.stringify({ model: defaultModel(), input: texts });
|
|
267
268
|
try {
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
269
|
+
// withAuth re-resolves the key on 401 (force-refresh, then sibling
|
|
270
|
+
// rotation) when `apiKey` is a resolver. The 429 backoff stays inside
|
|
271
|
+
// the attempt via fetchWithRetry. An empty static key attempts without
|
|
272
|
+
// an Authorization header (local/proxy setups).
|
|
273
|
+
const response = await withAuth(apiKey, async key => {
|
|
274
|
+
const headers: Record<string, string> = {
|
|
275
|
+
"Content-Type": "application/json",
|
|
276
|
+
"User-Agent": `Oh-My-Pi/${packageJson.version}`,
|
|
277
|
+
"HTTP-Referer": "https://prometheus.sh/",
|
|
278
|
+
"X-OpenRouter-Title": "Oh-My-Pi",
|
|
279
|
+
"X-OpenRouter-Categories": "cli-agent",
|
|
280
|
+
};
|
|
281
|
+
if (key !== "") {
|
|
282
|
+
headers.Authorization = `Bearer ${key}`;
|
|
283
|
+
}
|
|
284
|
+
const res = await fetchWithRetry(`${baseUrl.replace(/\/+$/, "")}/embeddings`, {
|
|
285
|
+
method: "POST",
|
|
286
|
+
headers,
|
|
287
|
+
body,
|
|
288
|
+
signal: AbortSignal.timeout(30000),
|
|
289
|
+
maxAttempts: 3,
|
|
290
|
+
defaultDelayMs: attempt => 2 ** attempt * 1000,
|
|
291
|
+
});
|
|
292
|
+
if (res.status === 401) {
|
|
293
|
+
throw new ProviderHttpError("mnemopi embedding request unauthorized (401)", 401, { headers: res.headers });
|
|
294
|
+
}
|
|
295
|
+
return res;
|
|
275
296
|
});
|
|
276
297
|
if (!response.ok) {
|
|
277
298
|
return null;
|
|
@@ -335,11 +356,11 @@ export async function available(): Promise<boolean> {
|
|
|
335
356
|
return providerAvailable(providerOverride);
|
|
336
357
|
}
|
|
337
358
|
if (isApiModel(defaultModel())) {
|
|
338
|
-
const baseUrl = active?.apiUrl ?? ($env.
|
|
339
|
-
if (baseUrl !== undefined && baseUrl !== "" && !baseUrl
|
|
359
|
+
const baseUrl = active?.apiUrl ?? ($env.MNEMOPROMETHEUS_EMBEDDING_APROMETHEUS_URL || $env.OPENROUTER_BASE_URL);
|
|
360
|
+
if (baseUrl !== undefined && baseUrl !== "" && !hostMatchesUrl(baseUrl, "openrouter")) {
|
|
340
361
|
return true;
|
|
341
362
|
}
|
|
342
|
-
return
|
|
363
|
+
return embeddingKeyConfigured();
|
|
343
364
|
}
|
|
344
365
|
if (inTestRuntime()) {
|
|
345
366
|
return false;
|
|
@@ -348,7 +369,7 @@ export async function available(): Promise<boolean> {
|
|
|
348
369
|
}
|
|
349
370
|
|
|
350
371
|
export function availableApi(): boolean {
|
|
351
|
-
return
|
|
372
|
+
return embeddingKeyConfigured();
|
|
352
373
|
}
|
|
353
374
|
|
|
354
375
|
export async function embedQuery(text: string): Promise<Vector | null> {
|
|
@@ -410,7 +431,11 @@ export async function embed(texts: readonly string[]): Promise<EmbeddingMatrix |
|
|
|
410
431
|
}
|
|
411
432
|
}
|
|
412
433
|
return vectors;
|
|
413
|
-
} catch {
|
|
434
|
+
} catch (error) {
|
|
435
|
+
logger[mnemopiDebugEnabled() ? "warn" : "debug"]("mnemopi: local embedding failed", {
|
|
436
|
+
textCount: texts.length,
|
|
437
|
+
error: String(error),
|
|
438
|
+
});
|
|
414
439
|
return null;
|
|
415
440
|
}
|
|
416
441
|
}
|