@prometheus-ai/memory 0.5.3 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +4 -4
- package/dist/types/config.d.ts +13 -2
- package/dist/types/core/beam/store.d.ts +20 -0
- package/dist/types/core/embeddings.d.ts +2 -1
- package/dist/types/core/extraction/client.d.ts +11 -7
- package/dist/types/core/extraction.d.ts +2 -1
- package/dist/types/core/fastembed-runtime.d.ts +4 -0
- package/dist/types/core/index.d.ts +1 -0
- package/dist/types/core/llm-backends.d.ts +2 -0
- package/dist/types/core/local-llm.d.ts +8 -3
- package/dist/types/core/memory.d.ts +12 -3
- package/dist/types/core/query-cache.d.ts +1 -2
- package/dist/types/core/runtime-options.d.ts +10 -5
- package/dist/types/core/shmr.d.ts +11 -5
- package/dist/types/core/vector-index.d.ts +16 -0
- package/dist/types/index.d.ts +2 -1
- package/package.json +30 -7
- package/src/cli.ts +19 -19
- package/src/config.ts +98 -68
- package/src/core/banks.ts +2 -2
- package/src/core/beam/consolidate.ts +34 -5
- package/src/core/beam/helpers.ts +21 -28
- package/src/core/beam/index.ts +2 -2
- package/src/core/beam/recall.ts +98 -25
- package/src/core/beam/store.ts +96 -4
- package/src/core/binary-vectors.ts +1 -1
- package/src/core/content-sanitizer.ts +3 -3
- package/src/core/cost-log.ts +1 -1
- package/src/core/embeddings.ts +75 -50
- package/src/core/extraction/client.ts +44 -20
- package/src/core/extraction.ts +10 -9
- package/src/core/fastembed-runtime.ts +89 -0
- package/src/core/index.ts +1 -0
- package/src/core/llm-backends.ts +3 -0
- package/src/core/local-llm.ts +81 -43
- package/src/core/memory.ts +25 -5
- package/src/core/plugins.ts +1 -1
- package/src/core/polyphonic-recall.ts +4 -4
- package/src/core/query-cache.ts +2 -3
- package/src/core/runtime-options.ts +13 -5
- package/src/core/shmr.ts +141 -39
- package/src/core/streaming.ts +1 -1
- package/src/core/triples.ts +3 -3
- package/src/core/vector-index.ts +84 -0
- package/src/diagnose.ts +2 -2
- package/src/dr/recovery.ts +5 -5
- package/src/index.ts +1 -1
- package/src/mcp-server.ts +2 -2
- package/src/mcp-tools.ts +61 -61
|
@@ -1,7 +1,9 @@
|
|
|
1
|
+
import { type ApiKey, type FetchImpl, withAuth } from "@prometheus-ai/ai";
|
|
2
|
+
|
|
1
3
|
import { getDiagnostics } from "./diagnostics";
|
|
2
4
|
import { EXTRACTION_SYSTEM_PROMPT, EXTRACTION_USER_TEMPLATE } from "./prompts";
|
|
3
5
|
|
|
4
|
-
export const DEFAULT_EXTRACTION_MODEL = process.env.
|
|
6
|
+
export const DEFAULT_EXTRACTION_MODEL = process.env.MNEMOPROMETHEUS_EXTRACTION_MODEL || "google/gemini-2.5-flash";
|
|
5
7
|
export const OPENROUTER_BASE_URL = (process.env.OPENROUTER_BASE_URL || "https://openrouter.ai/api/v1").replace(
|
|
6
8
|
/\/+$/,
|
|
7
9
|
"",
|
|
@@ -26,6 +28,13 @@ export interface ExtractedFact {
|
|
|
26
28
|
[key: string]: unknown;
|
|
27
29
|
}
|
|
28
30
|
|
|
31
|
+
export interface ExtractionClientOptions {
|
|
32
|
+
model?: string | null;
|
|
33
|
+
apiKey?: ApiKey | null;
|
|
34
|
+
baseUrl?: string | null;
|
|
35
|
+
fetch?: FetchImpl;
|
|
36
|
+
}
|
|
37
|
+
|
|
29
38
|
function sleep(ms: number): Promise<void> {
|
|
30
39
|
const { promise, resolve } = Promise.withResolvers<void>();
|
|
31
40
|
setTimeout(resolve, ms);
|
|
@@ -42,14 +51,16 @@ function authHeader(apiKey: string): Record<string, string> {
|
|
|
42
51
|
|
|
43
52
|
export class ExtractionClient {
|
|
44
53
|
model: string;
|
|
45
|
-
apiKey:
|
|
54
|
+
apiKey: ApiKey;
|
|
46
55
|
baseUrl: string;
|
|
47
56
|
callCount = 0;
|
|
57
|
+
private readonly fetchImpl: FetchImpl;
|
|
48
58
|
|
|
49
|
-
constructor(opts:
|
|
59
|
+
constructor(opts: ExtractionClientOptions = {}) {
|
|
50
60
|
this.model = opts.model || DEFAULT_EXTRACTION_MODEL;
|
|
51
|
-
this.apiKey = opts.apiKey ?? process.env.
|
|
61
|
+
this.apiKey = opts.apiKey ?? process.env.OPENROUTER_APROMETHEUS_KEY ?? "";
|
|
52
62
|
this.baseUrl = (opts.baseUrl || OPENROUTER_BASE_URL).replace(/\/+$/, "");
|
|
63
|
+
this.fetchImpl = opts.fetch ?? fetch;
|
|
53
64
|
}
|
|
54
65
|
|
|
55
66
|
async chat(messages: readonly ChatMessage[], temperature = 0, maxTokens = 4096): Promise<string> {
|
|
@@ -59,22 +70,34 @@ export class ExtractionClient {
|
|
|
59
70
|
let lastError: unknown = null;
|
|
60
71
|
|
|
61
72
|
for (const model of models) {
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
73
|
+
try {
|
|
74
|
+
// withAuth re-resolves the key on 401/usage-limit (force-refresh,
|
|
75
|
+
// then sibling rotation) when `apiKey` is a resolver; the 429
|
|
76
|
+
// backoff loop stays inside the attempt so rate-limit retries
|
|
77
|
+
// reuse the already-resolved key.
|
|
78
|
+
const result = await withAuth(this.apiKey, async key => {
|
|
79
|
+
let rateLimitError: unknown = null;
|
|
80
|
+
for (let attempt = 0; attempt < 3; attempt += 1) {
|
|
81
|
+
try {
|
|
82
|
+
return await this.callApi(model, messages, temperature, maxTokens, key);
|
|
83
|
+
} catch (exc) {
|
|
84
|
+
const msg = String(exc).toLowerCase();
|
|
85
|
+
if (msg.includes("429") || msg.includes("rate")) {
|
|
86
|
+
rateLimitError = exc;
|
|
87
|
+
await sleep(Math.min(RATE_LIMIT_BACKOFF_MAX_MS, RATE_LIMIT_BACKOFF_BASE_MS * 2 ** attempt));
|
|
88
|
+
continue;
|
|
89
|
+
}
|
|
90
|
+
throw exc;
|
|
91
|
+
}
|
|
75
92
|
}
|
|
76
|
-
|
|
93
|
+
throw rateLimitError;
|
|
94
|
+
});
|
|
95
|
+
if (result === "") {
|
|
96
|
+
diag.recordNoOutput("cloud");
|
|
77
97
|
}
|
|
98
|
+
return result;
|
|
99
|
+
} catch (exc) {
|
|
100
|
+
lastError = exc;
|
|
78
101
|
}
|
|
79
102
|
await sleep(FALLBACK_MODEL_DELAY_MS);
|
|
80
103
|
}
|
|
@@ -88,10 +111,11 @@ export class ExtractionClient {
|
|
|
88
111
|
messages: readonly ChatMessage[],
|
|
89
112
|
temperature: number,
|
|
90
113
|
maxTokens: number,
|
|
114
|
+
apiKey = "",
|
|
91
115
|
): Promise<string> {
|
|
92
|
-
const response = await
|
|
116
|
+
const response = await this.fetchImpl(`${this.baseUrl}/chat/completions`, {
|
|
93
117
|
method: "POST",
|
|
94
|
-
headers: authHeader(
|
|
118
|
+
headers: authHeader(apiKey),
|
|
95
119
|
body: JSON.stringify({ model, messages, temperature, max_tokens: maxTokens }),
|
|
96
120
|
signal: AbortSignal.timeout(60000),
|
|
97
121
|
});
|
package/src/core/extraction.ts
CHANGED
|
@@ -7,6 +7,7 @@ import {
|
|
|
7
7
|
cleanOutput,
|
|
8
8
|
configuredLlmWillHandleCall,
|
|
9
9
|
llmAvailable,
|
|
10
|
+
type RemoteLlmOptions,
|
|
10
11
|
} from "./local-llm";
|
|
11
12
|
import { getMnemopiRuntimeOptions } from "./runtime-options";
|
|
12
13
|
|
|
@@ -27,23 +28,23 @@ function envInt(name: string, defaultValue: number): number {
|
|
|
27
28
|
}
|
|
28
29
|
|
|
29
30
|
function llmEnabled(): boolean {
|
|
30
|
-
return envBool("
|
|
31
|
+
return envBool("MNEMOPROMETHEUS_LLM_ENABLED", true);
|
|
31
32
|
}
|
|
32
33
|
|
|
33
34
|
function hostLlmEnabled(): boolean {
|
|
34
|
-
return envBool("
|
|
35
|
+
return envBool("MNEMOPROMETHEUS_HOST_LLM_ENABLED", false);
|
|
35
36
|
}
|
|
36
37
|
|
|
37
38
|
function llmBaseUrl(): string {
|
|
38
|
-
return env("
|
|
39
|
+
return env("MNEMOPROMETHEUS_LLM_BASE_URL").replace(/\/+$/, "");
|
|
39
40
|
}
|
|
40
41
|
|
|
41
42
|
function llmMaxTokens(): number {
|
|
42
|
-
return envInt("
|
|
43
|
+
return envInt("MNEMOPROMETHEUS_LLM_MAX_TOKENS", 2048);
|
|
43
44
|
}
|
|
44
45
|
|
|
45
46
|
export const EXTRACTION_PROMPT_TEMPLATE =
|
|
46
|
-
env("
|
|
47
|
+
env("MNEMOPROMETHEUS_EXTRACTION_PROMPT") ||
|
|
47
48
|
`You are an expert structured memory extractor for Mnemopi v3.0+ MEMORIA tables.
|
|
48
49
|
The user message below may be in English, German, Russian, or another language.
|
|
49
50
|
First detect the language, then extract ONLY high-signal, long-term relevant items.
|
|
@@ -195,8 +196,8 @@ async function tryHostExtraction(prompt: string): Promise<[boolean, string | nul
|
|
|
195
196
|
maxTokens: llmMaxTokens(),
|
|
196
197
|
temperature: 0,
|
|
197
198
|
timeout: 15,
|
|
198
|
-
provider: env("
|
|
199
|
-
model: env("
|
|
199
|
+
provider: env("MNEMOPROMETHEUS_HOST_LLM_PROVIDER").trim() || null,
|
|
200
|
+
model: env("MNEMOPROMETHEUS_HOST_LLM_MODEL").trim() || null,
|
|
200
201
|
});
|
|
201
202
|
const text = typeof raw === "string" ? raw.trim() : "";
|
|
202
203
|
return [true, text === "" ? null : text];
|
|
@@ -231,7 +232,7 @@ async function localFallback(prompt: string, sourceText: string, diag = getDiagn
|
|
|
231
232
|
return [];
|
|
232
233
|
}
|
|
233
234
|
|
|
234
|
-
export async function extractFacts(text: string | null | undefined): Promise<string[]> {
|
|
235
|
+
export async function extractFacts(text: string | null | undefined, options: RemoteLlmOptions = {}): Promise<string[]> {
|
|
235
236
|
const diag = getDiagnostics();
|
|
236
237
|
if (typeof text !== "string" || text.trim() === "") {
|
|
237
238
|
return [];
|
|
@@ -303,7 +304,7 @@ export async function extractFacts(text: string | null | undefined): Promise<str
|
|
|
303
304
|
if (llmEnabled() && llmBaseUrl() !== "") {
|
|
304
305
|
diag.recordAttempt("remote");
|
|
305
306
|
try {
|
|
306
|
-
const raw = await callRemoteLlm(prompt, 0);
|
|
307
|
+
const raw = await callRemoteLlm(prompt, 0, options);
|
|
307
308
|
if (raw !== null) {
|
|
308
309
|
const facts = parseFacts(cleanOutput(raw));
|
|
309
310
|
if (facts.length > 0) {
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import { createRequire } from "node:module";
|
|
2
|
+
import * as path from "node:path";
|
|
3
|
+
import {
|
|
4
|
+
ensureRuntimeInstalled,
|
|
5
|
+
getFastembedRuntimeDir,
|
|
6
|
+
installRuntimeModuleResolver,
|
|
7
|
+
logger,
|
|
8
|
+
resolveRuntimeModule,
|
|
9
|
+
} from "@prometheus-ai/utils";
|
|
10
|
+
import type * as Fastembed from "fastembed";
|
|
11
|
+
import packageManifest from "../../package.json" with { type: "json" };
|
|
12
|
+
|
|
13
|
+
type FastembedModule = typeof Fastembed;
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* `fastembed` and `onnxruntime-node` are optional peers (~270MB of native
|
|
17
|
+
* assets across platforms), never bundled and never installed eagerly. When
|
|
18
|
+
* the direct import cannot resolve — bundled `dist/cli.js`, compiled binary,
|
|
19
|
+
* or a consumer that skipped the optional peers — the pinned pair is
|
|
20
|
+
* `bun install`ed into a per-version runtime cache on first use and loaded
|
|
21
|
+
* from there (#2389).
|
|
22
|
+
*
|
|
23
|
+
* The pins live in `peerDependencies` as exact versions (not `catalog:`) so
|
|
24
|
+
* this module reads concrete specs even when the workspace manifest is
|
|
25
|
+
* inlined into a bundle; a workspace test asserts they match the catalog.
|
|
26
|
+
*/
|
|
27
|
+
const FASTEMBED_SPEC = packageManifest.peerDependencies.fastembed;
|
|
28
|
+
const ORT_SPEC = packageManifest.peerDependencies["onnxruntime-node"];
|
|
29
|
+
|
|
30
|
+
let fastembedLoad: Promise<FastembedModule> | null = null;
|
|
31
|
+
|
|
32
|
+
export function loadFastembed(): Promise<FastembedModule> {
|
|
33
|
+
fastembedLoad ??= loadFastembedOnce().catch(error => {
|
|
34
|
+
fastembedLoad = null;
|
|
35
|
+
throw error;
|
|
36
|
+
});
|
|
37
|
+
return fastembedLoad;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
async function loadFastembedOnce(): Promise<FastembedModule> {
|
|
41
|
+
// Dynamic imports: both packages are optional peers that eagerly load
|
|
42
|
+
// native addons and may be absent at runtime — a static import would load
|
|
43
|
+
// the addon at module-init and crash every consumer without the peers.
|
|
44
|
+
try {
|
|
45
|
+
// Preload ORT 1.24 before fastembed's nested ORT 1.21 — only on Windows,
|
|
46
|
+
// where loading the older binding first triggers a DLL-reuse crash.
|
|
47
|
+
if (process.platform === "win32") {
|
|
48
|
+
await import("onnxruntime-node");
|
|
49
|
+
}
|
|
50
|
+
return await import("fastembed");
|
|
51
|
+
} catch (error) {
|
|
52
|
+
if (!isModuleResolutionError(error)) throw error;
|
|
53
|
+
logger.debug("mnemopi: fastembed not resolvable, using on-demand runtime install", {
|
|
54
|
+
error: String(error),
|
|
55
|
+
});
|
|
56
|
+
return loadFromRuntimeInstall();
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
async function loadFromRuntimeInstall(): Promise<FastembedModule> {
|
|
61
|
+
const versionKey = `fastembed-${FASTEMBED_SPEC}_ort-${ORT_SPEC}`.replace(/[^A-Za-z0-9._-]/g, "_");
|
|
62
|
+
const runtimeDir = await ensureRuntimeInstalled({
|
|
63
|
+
runtimeDir: path.join(getFastembedRuntimeDir(), versionKey),
|
|
64
|
+
install: { dependencies: { fastembed: FASTEMBED_SPEC, "onnxruntime-node": ORT_SPEC } },
|
|
65
|
+
probePackage: "fastembed",
|
|
66
|
+
});
|
|
67
|
+
const nodeModules = path.join(runtimeDir, "node_modules");
|
|
68
|
+
// The compiled-binary resolver ignores `main`/`exports` for real-FS bare
|
|
69
|
+
// specifiers (Bun #1763); route the runtime graph's requires (fastembed →
|
|
70
|
+
// onnxruntime-node, @anush008/tokenizers → platform binding, …) through
|
|
71
|
+
// the runtime cache.
|
|
72
|
+
installRuntimeModuleResolver({ runtimeNodeModules: nodeModules });
|
|
73
|
+
if (process.platform === "win32") {
|
|
74
|
+
const ortEntry = resolveRuntimeModule(nodeModules, "onnxruntime-node");
|
|
75
|
+
if (ortEntry) createRequire(ortEntry)(ortEntry);
|
|
76
|
+
}
|
|
77
|
+
const entry = resolveRuntimeModule(nodeModules, "fastembed");
|
|
78
|
+
if (!entry) throw new Error(`fastembed runtime install at ${runtimeDir} has no loadable entry`);
|
|
79
|
+
const requireRuntime = createRequire(entry);
|
|
80
|
+
return requireRuntime(entry) as FastembedModule;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function isModuleResolutionError(error: unknown): boolean {
|
|
84
|
+
if (typeof error !== "object" || error === null) return false;
|
|
85
|
+
const { name, code, message } = error as { name?: unknown; code?: unknown; message?: unknown };
|
|
86
|
+
if (name === "ResolveMessage") return true;
|
|
87
|
+
if (code === "ERR_MODULE_NOT_FOUND" || code === "MODULE_NOT_FOUND") return true;
|
|
88
|
+
return typeof message === "string" && /cannot find (module|package)/i.test(message);
|
|
89
|
+
}
|
package/src/core/index.ts
CHANGED
package/src/core/llm-backends.ts
CHANGED
|
@@ -1,9 +1,12 @@
|
|
|
1
|
+
import type { FetchImpl } from "@prometheus-ai/ai";
|
|
2
|
+
|
|
1
3
|
export interface CompleteOptions {
|
|
2
4
|
maxTokens?: number;
|
|
3
5
|
temperature?: number;
|
|
4
6
|
timeout?: number;
|
|
5
7
|
provider?: string | null;
|
|
6
8
|
model?: string | null;
|
|
9
|
+
fetch?: FetchImpl;
|
|
7
10
|
}
|
|
8
11
|
|
|
9
12
|
export interface LlmBackend {
|
package/src/core/local-llm.ts
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
1
|
+
import {
|
|
2
|
+
type Api,
|
|
3
|
+
type ApiKey,
|
|
4
|
+
type AssistantMessage,
|
|
5
|
+
completeSimple,
|
|
6
|
+
type FetchImpl,
|
|
7
|
+
type Model,
|
|
8
|
+
ProviderHttpError,
|
|
9
|
+
withAuth,
|
|
10
|
+
} from "@prometheus-ai/ai";
|
|
11
|
+
import { type CompleteOptions, callHostLlm, getHostLlmBackend } from "./llm-backends";
|
|
3
12
|
import {
|
|
4
13
|
getMnemopiRuntimeOptions,
|
|
5
14
|
isPiAiModel,
|
|
@@ -7,8 +16,12 @@ import {
|
|
|
7
16
|
type MnemopiLlmCompletion,
|
|
8
17
|
} from "./runtime-options";
|
|
9
18
|
|
|
10
|
-
const ENV_MODEL_REPO = process.env.
|
|
11
|
-
|
|
19
|
+
const ENV_MODEL_REPO = process.env.MNEMOPROMETHEUS_LLM_REPO ?? "";
|
|
20
|
+
export interface RemoteLlmOptions {
|
|
21
|
+
fetch?: FetchImpl;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const ENV_MODEL_FILE = process.env.MNEMOPROMETHEUS_LLM_FILE ?? "";
|
|
12
25
|
export const DEFAULT_MODEL_REPO =
|
|
13
26
|
ENV_MODEL_REPO !== "" && ENV_MODEL_FILE !== "" ? ENV_MODEL_REPO : "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF";
|
|
14
27
|
export const DEFAULT_MODEL_FILE =
|
|
@@ -59,7 +72,7 @@ function llmEnabled(): boolean {
|
|
|
59
72
|
if (activeCustomCompletion() !== undefined || activePiAiModel() !== undefined) {
|
|
60
73
|
return true;
|
|
61
74
|
}
|
|
62
|
-
return envBool("
|
|
75
|
+
return envBool("MNEMOPROMETHEUS_LLM_ENABLED", true);
|
|
63
76
|
}
|
|
64
77
|
|
|
65
78
|
function llmMaxTokens(): number {
|
|
@@ -67,11 +80,11 @@ function llmMaxTokens(): number {
|
|
|
67
80
|
if (active?.maxTokens !== undefined) {
|
|
68
81
|
return active.maxTokens;
|
|
69
82
|
}
|
|
70
|
-
return envInt("
|
|
83
|
+
return envInt("MNEMOPROMETHEUS_LLM_MAX_TOKENS", 2048);
|
|
71
84
|
}
|
|
72
85
|
|
|
73
86
|
function llmContextTokens(): number {
|
|
74
|
-
return envInt("
|
|
87
|
+
return envInt("MNEMOPROMETHEUS_LLM_N_CTX", 2048);
|
|
75
88
|
}
|
|
76
89
|
|
|
77
90
|
function hostLlmEnabled(): boolean {
|
|
@@ -82,11 +95,11 @@ function hostLlmEnabled(): boolean {
|
|
|
82
95
|
if (active?.baseUrl !== undefined || (typeof active?.model === "string" && active.model !== "")) {
|
|
83
96
|
return false;
|
|
84
97
|
}
|
|
85
|
-
return envBool("
|
|
98
|
+
return envBool("MNEMOPROMETHEUS_HOST_LLM_ENABLED", false);
|
|
86
99
|
}
|
|
87
100
|
|
|
88
101
|
function hostLlmContextTokens(): number {
|
|
89
|
-
return envInt("
|
|
102
|
+
return envInt("MNEMOPROMETHEUS_HOST_LLM_N_CTX", 32000);
|
|
90
103
|
}
|
|
91
104
|
|
|
92
105
|
function llmBaseUrl(): string {
|
|
@@ -94,7 +107,7 @@ function llmBaseUrl(): string {
|
|
|
94
107
|
if (active?.baseUrl !== undefined) {
|
|
95
108
|
return stripTrailingSlash(active.baseUrl);
|
|
96
109
|
}
|
|
97
|
-
return stripTrailingSlash(env("
|
|
110
|
+
return stripTrailingSlash(env("MNEMOPROMETHEUS_LLM_BASE_URL"));
|
|
98
111
|
}
|
|
99
112
|
|
|
100
113
|
function llmModelName(): string {
|
|
@@ -102,19 +115,19 @@ function llmModelName(): string {
|
|
|
102
115
|
if (typeof model === "string") {
|
|
103
116
|
return model;
|
|
104
117
|
}
|
|
105
|
-
return env("
|
|
118
|
+
return env("MNEMOPROMETHEUS_LLM_MODEL") || "local";
|
|
106
119
|
}
|
|
107
120
|
|
|
108
|
-
function llmApiKey():
|
|
121
|
+
function llmApiKey(): ApiKey {
|
|
109
122
|
const active = activeLlmOptions();
|
|
110
123
|
if (active?.apiKey !== undefined) {
|
|
111
124
|
return active.apiKey;
|
|
112
125
|
}
|
|
113
|
-
return env("
|
|
126
|
+
return env("MNEMOPROMETHEUS_LLM_APROMETHEUS_KEY");
|
|
114
127
|
}
|
|
115
128
|
|
|
116
129
|
function sleepPrompt(): string {
|
|
117
|
-
return env("
|
|
130
|
+
return env("MNEMOPROMETHEUS_SLEEP_PROMPT").trim();
|
|
118
131
|
}
|
|
119
132
|
|
|
120
133
|
function memoryLines(memories: readonly string[]): string {
|
|
@@ -228,8 +241,8 @@ async function tryHostLlm(prompt: string, maxTokens: number, temperature: number
|
|
|
228
241
|
maxTokens,
|
|
229
242
|
temperature,
|
|
230
243
|
timeout: 15,
|
|
231
|
-
provider: env("
|
|
232
|
-
model: env("
|
|
244
|
+
provider: env("MNEMOPROMETHEUS_HOST_LLM_PROVIDER").trim() || null,
|
|
245
|
+
model: env("MNEMOPROMETHEUS_HOST_LLM_MODEL").trim() || null,
|
|
233
246
|
});
|
|
234
247
|
const text = typeof raw === "string" ? raw.trim() : "";
|
|
235
248
|
return [true, text === "" ? null : text];
|
|
@@ -309,30 +322,43 @@ export function llmAvailable(): boolean {
|
|
|
309
322
|
return llmEnabled() && llmBaseUrl() !== "";
|
|
310
323
|
}
|
|
311
324
|
|
|
312
|
-
export async function callRemoteLlm(
|
|
325
|
+
export async function callRemoteLlm(
|
|
326
|
+
prompt: string,
|
|
327
|
+
temperature = 0.3,
|
|
328
|
+
options: RemoteLlmOptions = {},
|
|
329
|
+
): Promise<string | null> {
|
|
313
330
|
const baseUrl = llmBaseUrl();
|
|
314
331
|
if (baseUrl === "") {
|
|
315
332
|
return null;
|
|
316
333
|
}
|
|
317
334
|
|
|
318
|
-
const
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
335
|
+
const body = JSON.stringify({
|
|
336
|
+
model: llmModelName(),
|
|
337
|
+
messages: [{ role: "user", content: prompt }],
|
|
338
|
+
max_tokens: llmMaxTokens(),
|
|
339
|
+
temperature,
|
|
340
|
+
stop: ["</s>", "<|user|>"],
|
|
341
|
+
});
|
|
342
|
+
const fetchImpl = options.fetch ?? fetch;
|
|
324
343
|
try {
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
344
|
+
// withAuth re-resolves the key on 401 (force-refresh, then sibling
|
|
345
|
+
// rotation) when the configured key is a resolver. An empty static key
|
|
346
|
+
// attempts without an Authorization header (local/proxy setups).
|
|
347
|
+
const response = await withAuth(llmApiKey(), async key => {
|
|
348
|
+
const headers: Record<string, string> = { "Content-Type": "application/json" };
|
|
349
|
+
if (key !== "") {
|
|
350
|
+
headers.Authorization = `Bearer ${key}`;
|
|
351
|
+
}
|
|
352
|
+
const res = await fetchImpl(`${baseUrl}/chat/completions`, {
|
|
353
|
+
method: "POST",
|
|
354
|
+
headers,
|
|
355
|
+
body,
|
|
356
|
+
signal: AbortSignal.timeout(60000),
|
|
357
|
+
});
|
|
358
|
+
if (res.status === 401) {
|
|
359
|
+
throw new ProviderHttpError("mnemopi remote LLM request unauthorized (401)", 401, { headers: res.headers });
|
|
360
|
+
}
|
|
361
|
+
return res;
|
|
336
362
|
});
|
|
337
363
|
if (!response.ok) {
|
|
338
364
|
return null;
|
|
@@ -355,7 +381,11 @@ export async function callLocalLlm(_prompt: string): Promise<string | null> {
|
|
|
355
381
|
return null;
|
|
356
382
|
}
|
|
357
383
|
|
|
358
|
-
async function summarizeChunk(
|
|
384
|
+
async function summarizeChunk(
|
|
385
|
+
memories: readonly string[],
|
|
386
|
+
source = "",
|
|
387
|
+
options: RemoteLlmOptions = {},
|
|
388
|
+
): Promise<string | null> {
|
|
359
389
|
const hostPrompt = buildHostPrompt(memories, source);
|
|
360
390
|
const prompt = buildPrompt(memories, source);
|
|
361
391
|
if (configuredLlmWillHandleCall()) {
|
|
@@ -379,8 +409,8 @@ async function summarizeChunk(memories: readonly string[], source = ""): Promise
|
|
|
379
409
|
return null;
|
|
380
410
|
}
|
|
381
411
|
|
|
382
|
-
if (llmEnabled() && llmBaseUrl() !== "" && !envBool("
|
|
383
|
-
const raw = await callRemoteLlm(prompt);
|
|
412
|
+
if (llmEnabled() && llmBaseUrl() !== "" && !envBool("MNEMOPROMETHEUS_FORCE_LOCAL", false)) {
|
|
413
|
+
const raw = await callRemoteLlm(prompt, 0.3, options);
|
|
384
414
|
if (raw !== null) {
|
|
385
415
|
const cleaned = cleanOutput(raw);
|
|
386
416
|
return cleaned === "" ? null : cleaned;
|
|
@@ -395,7 +425,11 @@ async function summarizeChunk(memories: readonly string[], source = ""): Promise
|
|
|
395
425
|
return null;
|
|
396
426
|
}
|
|
397
427
|
|
|
398
|
-
export async function summarizeMemories(
|
|
428
|
+
export async function summarizeMemories(
|
|
429
|
+
memories: readonly string[],
|
|
430
|
+
source = "",
|
|
431
|
+
options: RemoteLlmOptions = {},
|
|
432
|
+
): Promise<string | null> {
|
|
399
433
|
if (memories.length === 0) {
|
|
400
434
|
return null;
|
|
401
435
|
}
|
|
@@ -403,7 +437,7 @@ export async function summarizeMemories(memories: readonly string[], source = ""
|
|
|
403
437
|
const chunks = chunkMemoriesByBudget(memories, source);
|
|
404
438
|
const chunkSummaries: string[] = [];
|
|
405
439
|
for (const chunk of chunks) {
|
|
406
|
-
const summary = await summarizeChunk(chunk, source);
|
|
440
|
+
const summary = await summarizeChunk(chunk, source, options);
|
|
407
441
|
if (summary !== null) {
|
|
408
442
|
chunkSummaries.push(summary);
|
|
409
443
|
}
|
|
@@ -413,13 +447,17 @@ export async function summarizeMemories(memories: readonly string[], source = ""
|
|
|
413
447
|
return null;
|
|
414
448
|
}
|
|
415
449
|
if (chunkSummaries.length > 1) {
|
|
416
|
-
const final = await summarizeChunk(chunkSummaries, `${source} [chunked ${chunks.length} parts]
|
|
450
|
+
const final = await summarizeChunk(chunkSummaries, `${source} [chunked ${chunks.length} parts]`, options);
|
|
417
451
|
return final ?? chunkSummaries[0] ?? null;
|
|
418
452
|
}
|
|
419
453
|
return chunkSummaries[0] ?? null;
|
|
420
454
|
}
|
|
421
455
|
|
|
422
|
-
export async function complete(
|
|
456
|
+
export async function complete(
|
|
457
|
+
prompt: string,
|
|
458
|
+
temperature = 0.3,
|
|
459
|
+
options: CompleteOptions = {},
|
|
460
|
+
): Promise<string | null> {
|
|
423
461
|
if (configuredLlmWillHandleCall()) {
|
|
424
462
|
const raw = await callConfiguredCompletion(prompt, temperature, { maxTokens: llmMaxTokens() });
|
|
425
463
|
return raw === null ? null : cleanOutput(raw) || null;
|
|
@@ -428,8 +466,8 @@ export async function complete(prompt: string, temperature = 0.3): Promise<strin
|
|
|
428
466
|
if (attempted) {
|
|
429
467
|
return hostText;
|
|
430
468
|
}
|
|
431
|
-
if (llmEnabled() && llmBaseUrl() !== "" && !envBool("
|
|
432
|
-
const remote = await callRemoteLlm(prompt, temperature);
|
|
469
|
+
if (llmEnabled() && llmBaseUrl() !== "" && !envBool("MNEMOPROMETHEUS_FORCE_LOCAL", false)) {
|
|
470
|
+
const remote = await callRemoteLlm(prompt, temperature, options);
|
|
433
471
|
return remote === null ? null : cleanOutput(remote) || null;
|
|
434
472
|
}
|
|
435
473
|
return callLocalLlm(prompt);
|
package/src/core/memory.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { Database } from "bun:sqlite";
|
|
2
|
-
import type { Api, Model } from "@prometheus-ai/ai";
|
|
2
|
+
import type { Api, ApiKey, Model } from "@prometheus-ai/ai";
|
|
3
3
|
|
|
4
4
|
import { dbPath as configuredDbPath } from "../config";
|
|
5
5
|
import { closeQuietly } from "../db";
|
|
@@ -7,6 +7,7 @@ import type { MemoryInput, Metadata } from "../types";
|
|
|
7
7
|
import { AnnotationStore } from "./annotations";
|
|
8
8
|
import { BankManager } from "./banks";
|
|
9
9
|
import { BeamMemory, initBeam } from "./beam/index";
|
|
10
|
+
import { reconcileEmbeddingModel } from "./beam/store";
|
|
10
11
|
import type { RecallEnhancedOptions, RecallOptions, RecallResult, SleepResult } from "./beam/types";
|
|
11
12
|
import { EpisodicGraph } from "./episodic-graph";
|
|
12
13
|
import {
|
|
@@ -35,13 +36,22 @@ export interface MnemopiOptions {
|
|
|
35
36
|
readonly noEmbeddings?: boolean;
|
|
36
37
|
readonly embeddingModel?: string;
|
|
37
38
|
readonly embeddingApiUrl?: string;
|
|
38
|
-
readonly embeddingApiKey?:
|
|
39
|
+
readonly embeddingApiKey?: ApiKey;
|
|
39
40
|
readonly embeddings?: false | MnemopiEmbeddingRuntimeOptions;
|
|
40
41
|
readonly llmEnabled?: boolean;
|
|
41
42
|
readonly llmBaseUrl?: string;
|
|
42
|
-
readonly llmApiKey?:
|
|
43
|
+
readonly llmApiKey?: ApiKey;
|
|
43
44
|
readonly llmModel?: string | Model<Api>;
|
|
44
45
|
readonly llm?: false | MnemopiLlmRuntimeOptions | Model<Api> | MnemopiLlmCompletion;
|
|
46
|
+
/** Escalate best-effort failure logs (embedding pipeline) from debug to warn. */
|
|
47
|
+
readonly debug?: boolean;
|
|
48
|
+
/**
|
|
49
|
+
* When `false`, skip the embedding-model reconcile (wipe-and-rebuild) on open.
|
|
50
|
+
* Read-only / ephemeral consumers (e.g. a stats snapshot) set this so an open
|
|
51
|
+
* never triggers a destructive migration whose background rebuild the process
|
|
52
|
+
* would exit before completing. Defaults to `true`.
|
|
53
|
+
*/
|
|
54
|
+
readonly reconcile?: boolean;
|
|
45
55
|
}
|
|
46
56
|
|
|
47
57
|
export interface RememberInput extends MemoryInput {
|
|
@@ -219,10 +229,11 @@ function resolveRuntimeOptions(options: MnemopiOptions): ResolvedMnemopiRuntimeO
|
|
|
219
229
|
}
|
|
220
230
|
}
|
|
221
231
|
|
|
222
|
-
|
|
232
|
+
const debug = options.debug ? true : undefined;
|
|
233
|
+
if (embeddings === undefined && llm === undefined && debug === undefined) {
|
|
223
234
|
return undefined;
|
|
224
235
|
}
|
|
225
|
-
return { embeddings, llm };
|
|
236
|
+
return { embeddings, llm, debug };
|
|
226
237
|
}
|
|
227
238
|
|
|
228
239
|
let defaultInstance: Mnemopi | null = null;
|
|
@@ -385,6 +396,15 @@ export class Mnemopi {
|
|
|
385
396
|
}
|
|
386
397
|
this.conn = this.beam.db;
|
|
387
398
|
this.db = this.beam.db;
|
|
399
|
+
// Wipe-and-rebuild stale embeddings when the configured model changed since
|
|
400
|
+
// the vectors were written. Runs inside the runtime scope so
|
|
401
|
+
// `currentEmbeddingModel()` reflects this instance's configured model.
|
|
402
|
+
// Skipped for read-only opens (`reconcile: false`) so an ephemeral stats
|
|
403
|
+
// reader never triggers a destructive migration whose async rebuild it would
|
|
404
|
+
// exit before completing — which would otherwise lose the embeddings.
|
|
405
|
+
if (options.reconcile !== false) {
|
|
406
|
+
this.#withRuntimeOptions(() => reconcileEmbeddingModel(this.beam));
|
|
407
|
+
}
|
|
388
408
|
}
|
|
389
409
|
|
|
390
410
|
close(): void {
|
package/src/core/plugins.ts
CHANGED
|
@@ -2,7 +2,7 @@ import { existsSync } from "node:fs";
|
|
|
2
2
|
import { homedir } from "node:os";
|
|
3
3
|
import { join } from "node:path";
|
|
4
4
|
|
|
5
|
-
export const DEFAULT_PLUGIN_DIR = join(homedir(), ".
|
|
5
|
+
export const DEFAULT_PLUGIN_DIR = join(homedir(), ".hermes", "mnemopi", "plugins");
|
|
6
6
|
|
|
7
7
|
export type PluginConfig = Record<string, unknown>;
|
|
8
8
|
export type MemoryDict = Record<string, unknown>;
|
|
@@ -220,7 +220,7 @@ export class PolyphonicRecallEngine {
|
|
|
220
220
|
}
|
|
221
221
|
|
|
222
222
|
vectorVoice(queryEmbedding: readonly number[] | Float32Array | null): VoiceRecallResult[] {
|
|
223
|
-
if (envDisabled("
|
|
223
|
+
if (envDisabled("MNEMOPROMETHEUS_VOICE_VECTOR") || queryEmbedding === null) return [];
|
|
224
224
|
const queryUnit = normalizeVector(queryEmbedding);
|
|
225
225
|
if (queryUnit === null) return [];
|
|
226
226
|
const now = new Date().toISOString();
|
|
@@ -277,7 +277,7 @@ export class PolyphonicRecallEngine {
|
|
|
277
277
|
return [...byId.values()].sort((a, b) => b.score - a.score || a.memoryId.localeCompare(b.memoryId)).slice(0, 20);
|
|
278
278
|
}
|
|
279
279
|
graphVoice(query: string): VoiceRecallResult[] {
|
|
280
|
-
if (envDisabled("
|
|
280
|
+
if (envDisabled("MNEMOPROMETHEUS_VOICE_GRAPH")) return [];
|
|
281
281
|
const results: VoiceRecallResult[] = [];
|
|
282
282
|
const seedIds = new Set<string>();
|
|
283
283
|
for (const entity of extractEntities(query)) {
|
|
@@ -323,7 +323,7 @@ export class PolyphonicRecallEngine {
|
|
|
323
323
|
return results;
|
|
324
324
|
}
|
|
325
325
|
factVoice(query: string): VoiceRecallResult[] {
|
|
326
|
-
if (envDisabled("
|
|
326
|
+
if (envDisabled("MNEMOPROMETHEUS_VOICE_FACT")) return [];
|
|
327
327
|
const byId = new Map<string, VoiceRecallResult>();
|
|
328
328
|
for (const word of queryWords(query)) {
|
|
329
329
|
const subject = word[0] === undefined ? word : word[0].toUpperCase() + word.slice(1);
|
|
@@ -351,7 +351,7 @@ export class PolyphonicRecallEngine {
|
|
|
351
351
|
return [...byId.values()].sort((a, b) => b.score - a.score || a.memoryId.localeCompare(b.memoryId));
|
|
352
352
|
}
|
|
353
353
|
temporalVoice(query: string): VoiceRecallResult[] {
|
|
354
|
-
if (envDisabled("
|
|
354
|
+
if (envDisabled("MNEMOPROMETHEUS_VOICE_TEMPORAL") || !looksTemporal(query)) return [];
|
|
355
355
|
const weekAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString();
|
|
356
356
|
let rows: TemporalRow[] = [];
|
|
357
357
|
try {
|
package/src/core/query-cache.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { Database } from "bun:sqlite";
|
|
2
2
|
import { mkdirSync } from "node:fs";
|
|
3
3
|
import { dirname } from "node:path";
|
|
4
|
+
import { type Env, enhancedRecallEnabled } from "../config";
|
|
4
5
|
import { cosineSimilarity } from "./vector-math";
|
|
5
6
|
|
|
6
7
|
export type QueryCacheResult = Record<string, unknown>;
|
|
@@ -39,10 +40,8 @@ interface CacheRow {
|
|
|
39
40
|
readonly results_json: string;
|
|
40
41
|
}
|
|
41
42
|
|
|
42
|
-
type Env = Readonly<Record<string, string | undefined>>;
|
|
43
|
-
|
|
44
43
|
export function isEnhancedRecallEnabled(env: Env = process.env): boolean {
|
|
45
|
-
return env
|
|
44
|
+
return enhancedRecallEnabled(env);
|
|
46
45
|
}
|
|
47
46
|
|
|
48
47
|
export function isQueryCacheEnabled(useCache = true, env: Env = process.env): boolean {
|