@prom.codes/memory-mcp 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -7
- package/dist/bin.js +839 -17
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -10,28 +10,42 @@ as git-versioned markdown under `.prometheus/memories/` in your repo.
|
|
|
10
10
|
## Quick start
|
|
11
11
|
|
|
12
12
|
```jsonc
|
|
13
|
-
// Claude Desktop / Cursor MCP config
|
|
13
|
+
// Claude Desktop / Cursor MCP config — dock under the server name `memory`
|
|
14
|
+
// so the tools resolve to memory_read / memory_write / … (no double prefix).
|
|
14
15
|
{
|
|
15
16
|
"mcpServers": {
|
|
16
|
-
"
|
|
17
|
+
"memory": {
|
|
17
18
|
"command": "npx",
|
|
18
19
|
"args": ["-y", "@prom.codes/memory-mcp@latest"],
|
|
19
20
|
"env": {
|
|
20
|
-
"
|
|
21
|
-
"
|
|
21
|
+
"PROMETHEUS_WORKSPACE_ROOT": "/absolute/path/to/your/repo",
|
|
22
|
+
"VOYAGE_API_KEY": "pa-… // optional: enables semantic recall (search)"
|
|
22
23
|
}
|
|
23
24
|
}
|
|
24
25
|
}
|
|
25
26
|
}
|
|
26
27
|
```
|
|
27
28
|
|
|
29
|
+
No API key is required to start: the server boots in keyword mode out of the
|
|
30
|
+
box. Add a `VOYAGE_API_KEY` for semantic search (embeddings run Voyage-direct,
|
|
31
|
+
fully local — only the memory text transits to Voyage), or a `prom_live_…`
|
|
32
|
+
`PROMETHEUS_API_KEY` to route embeddings through the metered Prometheus proxy.
|
|
33
|
+
|
|
28
34
|
Then ask your agent to run `memory_setup` once per workspace — it installs
|
|
29
35
|
the memory protocol into your runtime rule files (CLAUDE.md, .cursor/rules,
|
|
30
36
|
.augment/rules or AGENTS.md) so the agent reads memory at session start and
|
|
31
37
|
captures learnings at session end.
|
|
32
38
|
|
|
33
|
-
Tools: `memory_read`, `memory_write`, `memory_capture`,
|
|
34
|
-
`memory_list`, `memory_delete`, `memory_setup`. Secrets are
|
|
35
|
-
every write. Your memories never leave your machine.
|
|
39
|
+
Tools (docked as `memory`): `memory_read`, `memory_write`, `memory_capture`,
|
|
40
|
+
`memory_search`, `memory_list`, `memory_delete`, `memory_setup`. Secrets are
|
|
41
|
+
rejected on every write. Your memories never leave your machine.
|
|
42
|
+
|
|
43
|
+
## Native modules
|
|
44
|
+
|
|
45
|
+
Uses `better-sqlite3` (native). Prebuilt binaries are fetched automatically on
|
|
46
|
+
common platforms (macOS x64/arm64, Linux x64/arm64, Windows x64) — no compiler
|
|
47
|
+
needed. On an unsupported platform/Node ABI, install C/C++ build tools so the
|
|
48
|
+
module can compile (Windows: `npm i -g windows-build-tools` or VS Build Tools).
|
|
49
|
+
Requires Node ≥ 20.10.
|
|
36
50
|
|
|
37
51
|
Docs: https://prom.codes/docs
|
package/dist/bin.js
CHANGED
|
@@ -8,6 +8,503 @@ import { createHash } from "node:crypto";
|
|
|
8
8
|
import { homedir } from "node:os";
|
|
9
9
|
import { basename, join, resolve } from "node:path";
|
|
10
10
|
|
|
11
|
+
// ../embeddings-openai-compat/dist/index.js
|
|
12
|
+
var DEFAULT_BATCH = 96;
|
|
13
|
+
var DEFAULT_RETRIES = 4;
|
|
14
|
+
var DEFAULT_BACKOFF = 250;
|
|
15
|
+
var DEFAULT_RETRY_MAX = 6e4;
|
|
16
|
+
var DEFAULT_CONCURRENCY = 1;
|
|
17
|
+
var DEFAULT_MAX_BATCH_TOKENS = 0;
|
|
18
|
+
var DEFAULT_CHARS_PER_TOKEN = 4;
|
|
19
|
+
function parseRetryAfterMs(value, now = Date.now()) {
|
|
20
|
+
if (value === null)
|
|
21
|
+
return null;
|
|
22
|
+
const trimmed = value.trim();
|
|
23
|
+
if (trimmed === "")
|
|
24
|
+
return null;
|
|
25
|
+
if (/^[0-9]+(\.[0-9]+)?$/.test(trimmed)) {
|
|
26
|
+
const secs = Number(trimmed);
|
|
27
|
+
if (!Number.isFinite(secs) || secs < 0)
|
|
28
|
+
return null;
|
|
29
|
+
return Math.round(secs * 1e3);
|
|
30
|
+
}
|
|
31
|
+
if (!/[A-Za-z]/.test(trimmed))
|
|
32
|
+
return null;
|
|
33
|
+
const ts = Date.parse(trimmed);
|
|
34
|
+
if (!Number.isFinite(ts))
|
|
35
|
+
return null;
|
|
36
|
+
const delta = ts - now;
|
|
37
|
+
return delta > 0 ? delta : 0;
|
|
38
|
+
}
|
|
39
|
+
function sleep(ms, signal) {
|
|
40
|
+
return new Promise((resolve2, reject) => {
|
|
41
|
+
if (signal?.aborted === true) {
|
|
42
|
+
reject(new Error("aborted"));
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
const timer = setTimeout(() => {
|
|
46
|
+
signal?.removeEventListener("abort", onAbort);
|
|
47
|
+
resolve2();
|
|
48
|
+
}, ms);
|
|
49
|
+
const onAbort = () => {
|
|
50
|
+
clearTimeout(timer);
|
|
51
|
+
reject(new Error("aborted"));
|
|
52
|
+
};
|
|
53
|
+
signal?.addEventListener("abort", onAbort, { once: true });
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
var OpenAICompatEmbeddingProvider = class {
|
|
57
|
+
name;
|
|
58
|
+
model;
|
|
59
|
+
dimension;
|
|
60
|
+
region;
|
|
61
|
+
#baseUrl;
|
|
62
|
+
#apiKey;
|
|
63
|
+
#sendDimensions;
|
|
64
|
+
#omitEncodingFormat;
|
|
65
|
+
#batchSize;
|
|
66
|
+
#maxBatchTokens;
|
|
67
|
+
#charsPerToken;
|
|
68
|
+
#maxRetries;
|
|
69
|
+
#retryBaseMs;
|
|
70
|
+
#retryMaxMs;
|
|
71
|
+
#maxConcurrency;
|
|
72
|
+
#fetch;
|
|
73
|
+
constructor(opts) {
|
|
74
|
+
if (!Number.isInteger(opts.dimension) || opts.dimension <= 0) {
|
|
75
|
+
throw new Error(`OpenAICompatEmbeddingProvider: dimension must be a positive integer, got ${opts.dimension}`);
|
|
76
|
+
}
|
|
77
|
+
if (opts.maxConcurrency !== void 0 && (!Number.isInteger(opts.maxConcurrency) || opts.maxConcurrency <= 0)) {
|
|
78
|
+
throw new Error(`OpenAICompatEmbeddingProvider: maxConcurrency must be a positive integer, got ${opts.maxConcurrency}`);
|
|
79
|
+
}
|
|
80
|
+
if (opts.maxBatchTokens !== void 0 && (!Number.isFinite(opts.maxBatchTokens) || opts.maxBatchTokens < 0)) {
|
|
81
|
+
throw new Error(`OpenAICompatEmbeddingProvider: maxBatchTokens must be a non-negative number, got ${opts.maxBatchTokens}`);
|
|
82
|
+
}
|
|
83
|
+
if (opts.charsPerToken !== void 0 && (!Number.isFinite(opts.charsPerToken) || opts.charsPerToken <= 0)) {
|
|
84
|
+
throw new Error(`OpenAICompatEmbeddingProvider: charsPerToken must be a positive number, got ${opts.charsPerToken}`);
|
|
85
|
+
}
|
|
86
|
+
this.name = opts.name;
|
|
87
|
+
this.model = opts.model;
|
|
88
|
+
this.dimension = opts.dimension;
|
|
89
|
+
this.region = opts.region;
|
|
90
|
+
this.#baseUrl = opts.baseUrl.replace(/\/+$/, "");
|
|
91
|
+
this.#apiKey = opts.apiKey;
|
|
92
|
+
this.#sendDimensions = opts.sendDimensions ?? false;
|
|
93
|
+
this.#omitEncodingFormat = opts.omitEncodingFormat ?? false;
|
|
94
|
+
this.#batchSize = opts.batchSize ?? DEFAULT_BATCH;
|
|
95
|
+
this.#maxBatchTokens = opts.maxBatchTokens ?? DEFAULT_MAX_BATCH_TOKENS;
|
|
96
|
+
this.#charsPerToken = opts.charsPerToken ?? DEFAULT_CHARS_PER_TOKEN;
|
|
97
|
+
this.#maxRetries = opts.maxRetries ?? DEFAULT_RETRIES;
|
|
98
|
+
this.#retryBaseMs = opts.retryBaseMs ?? DEFAULT_BACKOFF;
|
|
99
|
+
this.#retryMaxMs = opts.retryMaxMs ?? DEFAULT_RETRY_MAX;
|
|
100
|
+
this.#maxConcurrency = opts.maxConcurrency ?? DEFAULT_CONCURRENCY;
|
|
101
|
+
this.#fetch = opts.fetch ?? fetch;
|
|
102
|
+
}
|
|
103
|
+
async embed(texts, opts) {
|
|
104
|
+
if (texts.length === 0)
|
|
105
|
+
return [];
|
|
106
|
+
const total = texts.length;
|
|
107
|
+
const out = new Array(total);
|
|
108
|
+
const onProgress = opts?.onProgress;
|
|
109
|
+
const batches = this.#planBatches(texts);
|
|
110
|
+
let doneCount = 0;
|
|
111
|
+
const emit = (batchSize) => {
|
|
112
|
+
if (onProgress === void 0)
|
|
113
|
+
return;
|
|
114
|
+
try {
|
|
115
|
+
onProgress({ done: doneCount, total, batchSize });
|
|
116
|
+
} catch {
|
|
117
|
+
}
|
|
118
|
+
};
|
|
119
|
+
const runOne = async (range) => {
|
|
120
|
+
const batch = texts.slice(range.start, range.start + range.count);
|
|
121
|
+
const vectors = await this.#embedBatch(batch, opts?.signal);
|
|
122
|
+
for (let i = 0; i < vectors.length; i++)
|
|
123
|
+
out[range.start + i] = vectors[i];
|
|
124
|
+
doneCount += batch.length;
|
|
125
|
+
emit(batch.length);
|
|
126
|
+
};
|
|
127
|
+
if (this.#maxConcurrency <= 1) {
|
|
128
|
+
for (const range of batches)
|
|
129
|
+
await runOne(range);
|
|
130
|
+
} else {
|
|
131
|
+
let next = 0;
|
|
132
|
+
const worker = async () => {
|
|
133
|
+
while (true) {
|
|
134
|
+
const idx = next++;
|
|
135
|
+
if (idx >= batches.length)
|
|
136
|
+
return;
|
|
137
|
+
await runOne(batches[idx]);
|
|
138
|
+
}
|
|
139
|
+
};
|
|
140
|
+
const workers = [];
|
|
141
|
+
const lanes = Math.min(this.#maxConcurrency, batches.length);
|
|
142
|
+
for (let i = 0; i < lanes; i++)
|
|
143
|
+
workers.push(worker());
|
|
144
|
+
await Promise.all(workers);
|
|
145
|
+
}
|
|
146
|
+
return out;
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Partition `texts` into ordered `[start, count)` ranges. Each range is
|
|
150
|
+
* bounded by `#batchSize` (item count) and, when `#maxBatchTokens > 0`,
|
|
151
|
+
* by an *estimated* token budget (text length / `#charsPerToken`). A
|
|
152
|
+
* single text whose own estimate already exceeds the budget still gets
|
|
153
|
+
* its own one-item batch (the provider truncates it server-side) so the
|
|
154
|
+
* planner always makes forward progress.
|
|
155
|
+
*/
|
|
156
|
+
#planBatches(texts) {
|
|
157
|
+
const total = texts.length;
|
|
158
|
+
const batches = [];
|
|
159
|
+
let start = 0;
|
|
160
|
+
while (start < total) {
|
|
161
|
+
let count = 0;
|
|
162
|
+
let tokens = 0;
|
|
163
|
+
while (start + count < total && count < this.#batchSize) {
|
|
164
|
+
const est = this.#maxBatchTokens > 0 ? Math.ceil(texts[start + count].length / this.#charsPerToken) : 0;
|
|
165
|
+
if (this.#maxBatchTokens > 0 && count > 0 && tokens + est > this.#maxBatchTokens) {
|
|
166
|
+
break;
|
|
167
|
+
}
|
|
168
|
+
tokens += est;
|
|
169
|
+
count += 1;
|
|
170
|
+
}
|
|
171
|
+
if (count === 0)
|
|
172
|
+
count = 1;
|
|
173
|
+
batches.push({ start, count });
|
|
174
|
+
start += count;
|
|
175
|
+
}
|
|
176
|
+
return batches;
|
|
177
|
+
}
|
|
178
|
+
async #embedBatch(batch, signal) {
|
|
179
|
+
const body = {
|
|
180
|
+
input: batch,
|
|
181
|
+
model: this.model
|
|
182
|
+
};
|
|
183
|
+
if (!this.#omitEncodingFormat)
|
|
184
|
+
body.encoding_format = "float";
|
|
185
|
+
if (this.#sendDimensions)
|
|
186
|
+
body.dimensions = this.dimension;
|
|
187
|
+
const headers = { "content-type": "application/json" };
|
|
188
|
+
if (this.#apiKey !== void 0 && this.#apiKey !== "") {
|
|
189
|
+
headers.authorization = `Bearer ${this.#apiKey}`;
|
|
190
|
+
}
|
|
191
|
+
const init = {
|
|
192
|
+
method: "POST",
|
|
193
|
+
headers,
|
|
194
|
+
body: JSON.stringify(body)
|
|
195
|
+
};
|
|
196
|
+
if (signal !== void 0)
|
|
197
|
+
init.signal = signal;
|
|
198
|
+
let attempt = 0;
|
|
199
|
+
let lastError = null;
|
|
200
|
+
while (attempt <= this.#maxRetries) {
|
|
201
|
+
try {
|
|
202
|
+
const res = await this.#fetch(`${this.#baseUrl}/embeddings`, init);
|
|
203
|
+
if (res.status === 429 || res.status >= 500 && res.status < 600) {
|
|
204
|
+
lastError = new Error(`${this.name}: HTTP ${res.status}`);
|
|
205
|
+
attempt += 1;
|
|
206
|
+
if (attempt > this.#maxRetries)
|
|
207
|
+
break;
|
|
208
|
+
const backoff = this.#computeBackoff(attempt, res.headers.get("retry-after"));
|
|
209
|
+
await sleep(backoff, signal);
|
|
210
|
+
continue;
|
|
211
|
+
}
|
|
212
|
+
if (!res.ok) {
|
|
213
|
+
const text = await res.text().catch(() => "");
|
|
214
|
+
const err = new Error(`${this.name}: HTTP ${res.status} ${res.statusText}${text === "" ? "" : ` \u2014 ${text}`}`);
|
|
215
|
+
err.nonRetryable = true;
|
|
216
|
+
throw err;
|
|
217
|
+
}
|
|
218
|
+
const payload = await res.json();
|
|
219
|
+
return this.#decode(payload, batch.length);
|
|
220
|
+
} catch (err) {
|
|
221
|
+
if (err?.name === "AbortError")
|
|
222
|
+
throw err;
|
|
223
|
+
if (err?.nonRetryable === true)
|
|
224
|
+
throw err;
|
|
225
|
+
if (attempt >= this.#maxRetries)
|
|
226
|
+
throw err;
|
|
227
|
+
lastError = err;
|
|
228
|
+
attempt += 1;
|
|
229
|
+
await sleep(this.#computeBackoff(attempt, null), signal);
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
throw lastError instanceof Error ? lastError : new Error(`${this.name}: exhausted ${this.#maxRetries} retries`);
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* Compute the per-attempt backoff. Exponential growth starts from
|
|
236
|
+
* `retryBaseMs` and doubles per attempt; a `Retry-After` header value
|
|
237
|
+
* (if any, parsed by {@link parseRetryAfterMs}) raises the floor so we
|
|
238
|
+
* never undercut a server-advertised wait; the result is capped at
|
|
239
|
+
* `retryMaxMs` to prevent unbounded stalls from misbehaving servers.
|
|
240
|
+
*/
|
|
241
|
+
#computeBackoff(attempt, retryAfterHeader) {
|
|
242
|
+
const exp = this.#retryBaseMs * 2 ** Math.max(0, attempt - 1);
|
|
243
|
+
const advised = parseRetryAfterMs(retryAfterHeader);
|
|
244
|
+
const lower = advised === null ? exp : Math.max(exp, advised);
|
|
245
|
+
return Math.min(lower, this.#retryMaxMs);
|
|
246
|
+
}
|
|
247
|
+
#decode(payload, expected) {
|
|
248
|
+
if (!Array.isArray(payload.data) || payload.data.length !== expected) {
|
|
249
|
+
throw nonRetryable(`${this.name}: expected ${expected} embeddings, got ${payload.data?.length ?? 0}`);
|
|
250
|
+
}
|
|
251
|
+
const sorted = [...payload.data].sort((a, b) => a.index - b.index);
|
|
252
|
+
return sorted.map((row) => {
|
|
253
|
+
if (!Array.isArray(row.embedding) || row.embedding.length !== this.dimension) {
|
|
254
|
+
throw nonRetryable(`${this.name}: embedding length ${row.embedding?.length ?? 0} does not match declared dimension ${this.dimension}`);
|
|
255
|
+
}
|
|
256
|
+
return Float32Array.from(row.embedding);
|
|
257
|
+
});
|
|
258
|
+
}
|
|
259
|
+
};
|
|
260
|
+
function nonRetryable(message) {
|
|
261
|
+
const err = new Error(message);
|
|
262
|
+
err.nonRetryable = true;
|
|
263
|
+
return err;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// ../embeddings-prometheus/dist/index.js
|
|
267
|
+
var PrometheusEmbeddingDriftError = class extends Error {
|
|
268
|
+
/**
|
|
269
|
+
* Stable string code consumers match on (`err.code === "EMBEDDING_DRIFT"`)
|
|
270
|
+
* instead of importing this class — avoids a package dependency from
|
|
271
|
+
* the indexer onto this adapter.
|
|
272
|
+
*/
|
|
273
|
+
code = "EMBEDDING_DRIFT";
|
|
274
|
+
expected;
|
|
275
|
+
actual;
|
|
276
|
+
constructor(expected, actual) {
|
|
277
|
+
super(`prometheus-embed: embedding space changed upstream (fingerprint ${expected} -> ${actual}) \u2014 a full re-index is required`);
|
|
278
|
+
this.name = "PrometheusEmbeddingDriftError";
|
|
279
|
+
this.expected = expected;
|
|
280
|
+
this.actual = actual;
|
|
281
|
+
}
|
|
282
|
+
};
|
|
283
|
+
var DEFAULT_BASE = "https://api.prom.codes";
|
|
284
|
+
var DEFAULT_BATCH2 = 128;
|
|
285
|
+
var DEFAULT_BATCH_CHARS = 4e5;
|
|
286
|
+
var DEFAULT_RETRIES2 = 4;
|
|
287
|
+
var DEFAULT_BACKOFF2 = 250;
|
|
288
|
+
function sleep2(ms, signal) {
|
|
289
|
+
return new Promise((resolve2, reject) => {
|
|
290
|
+
if (signal?.aborted === true) {
|
|
291
|
+
reject(new Error("aborted"));
|
|
292
|
+
return;
|
|
293
|
+
}
|
|
294
|
+
const timer = setTimeout(() => {
|
|
295
|
+
signal?.removeEventListener("abort", onAbort);
|
|
296
|
+
resolve2();
|
|
297
|
+
}, ms);
|
|
298
|
+
const onAbort = () => {
|
|
299
|
+
clearTimeout(timer);
|
|
300
|
+
reject(new Error("aborted"));
|
|
301
|
+
};
|
|
302
|
+
signal?.addEventListener("abort", onAbort, { once: true });
|
|
303
|
+
});
|
|
304
|
+
}
|
|
305
|
+
function nonRetryable2(message) {
|
|
306
|
+
const err = new Error(message);
|
|
307
|
+
err.nonRetryable = true;
|
|
308
|
+
return err;
|
|
309
|
+
}
|
|
310
|
+
var PrometheusEmbeddingProvider = class {
|
|
311
|
+
name;
|
|
312
|
+
region;
|
|
313
|
+
#apiKey;
|
|
314
|
+
#url;
|
|
315
|
+
#batchSize;
|
|
316
|
+
#maxBatchChars;
|
|
317
|
+
#maxRetries;
|
|
318
|
+
#retryBaseMs;
|
|
319
|
+
#fetch;
|
|
320
|
+
#identity = null;
|
|
321
|
+
#identityPromise = null;
|
|
322
|
+
#creditsUsed = 0;
|
|
323
|
+
constructor(opts) {
|
|
324
|
+
if (opts.apiKey === "") {
|
|
325
|
+
throw new Error("PrometheusEmbeddingProvider: apiKey is required");
|
|
326
|
+
}
|
|
327
|
+
this.name = opts.name ?? "prometheus";
|
|
328
|
+
this.region = opts.region ?? "eu";
|
|
329
|
+
this.#apiKey = opts.apiKey;
|
|
330
|
+
this.#url = `${(opts.baseUrl ?? DEFAULT_BASE).replace(/\/+$/, "")}/embed`;
|
|
331
|
+
this.#batchSize = opts.batchSize ?? DEFAULT_BATCH2;
|
|
332
|
+
this.#maxBatchChars = opts.maxBatchChars ?? DEFAULT_BATCH_CHARS;
|
|
333
|
+
this.#maxRetries = opts.maxRetries ?? DEFAULT_RETRIES2;
|
|
334
|
+
this.#retryBaseMs = opts.retryBaseMs ?? DEFAULT_BACKOFF2;
|
|
335
|
+
this.#fetch = opts.fetch ?? fetch;
|
|
336
|
+
}
|
|
337
|
+
/**
|
|
338
|
+
* The abstract fingerprint stands in for the (hidden) upstream model
|
|
339
|
+
* id. Throws until the identity has been resolved — call
|
|
340
|
+
* {@link resolveIdentity} (or `embed()`) first.
|
|
341
|
+
*/
|
|
342
|
+
get model() {
|
|
343
|
+
return this.#requireIdentity().fingerprint;
|
|
344
|
+
}
|
|
345
|
+
/** Vector dimension. Throws until the identity has been resolved. */
|
|
346
|
+
get dimension() {
|
|
347
|
+
return this.#requireIdentity().dimension;
|
|
348
|
+
}
|
|
349
|
+
/** Cumulative credits charged across all embed calls of this instance. */
|
|
350
|
+
get creditsUsed() {
|
|
351
|
+
return this.#creditsUsed;
|
|
352
|
+
}
|
|
353
|
+
/**
|
|
354
|
+
* Resolve (and cache) the proxy's embedding-space identity via the
|
|
355
|
+
* free GET. Safe to call concurrently; a failed resolution is not
|
|
356
|
+
* cached, so callers may retry.
|
|
357
|
+
*/
|
|
358
|
+
async resolveIdentity(signal) {
|
|
359
|
+
if (this.#identity !== null)
|
|
360
|
+
return this.#identity;
|
|
361
|
+
if (this.#identityPromise === null) {
|
|
362
|
+
this.#identityPromise = this.#fetchIdentity(signal);
|
|
363
|
+
this.#identityPromise.catch(() => {
|
|
364
|
+
this.#identityPromise = null;
|
|
365
|
+
});
|
|
366
|
+
}
|
|
367
|
+
return this.#identityPromise;
|
|
368
|
+
}
|
|
369
|
+
async embed(texts, opts) {
|
|
370
|
+
if (texts.length === 0)
|
|
371
|
+
return [];
|
|
372
|
+
const identity = await this.resolveIdentity(opts?.signal);
|
|
373
|
+
const out = new Array(texts.length);
|
|
374
|
+
let done = 0;
|
|
375
|
+
let start = 0;
|
|
376
|
+
while (start < texts.length) {
|
|
377
|
+
const end = this.#batchEnd(texts, start);
|
|
378
|
+
const batch = texts.slice(start, end);
|
|
379
|
+
const vectors = await this.#embedBatch(batch, identity, opts?.inputType ?? "document", opts?.signal);
|
|
380
|
+
for (let i = 0; i < vectors.length; i++)
|
|
381
|
+
out[start + i] = vectors[i];
|
|
382
|
+
done += batch.length;
|
|
383
|
+
if (opts?.onProgress !== void 0) {
|
|
384
|
+
try {
|
|
385
|
+
opts.onProgress({ done, total: texts.length, batchSize: batch.length });
|
|
386
|
+
} catch {
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
start = end;
|
|
390
|
+
}
|
|
391
|
+
return out;
|
|
392
|
+
}
|
|
393
|
+
#requireIdentity() {
|
|
394
|
+
if (this.#identity === null) {
|
|
395
|
+
throw new Error("PrometheusEmbeddingProvider: identity not resolved yet \u2014 await resolveIdentity() (or a first embed()) before reading model/dimension");
|
|
396
|
+
}
|
|
397
|
+
return this.#identity;
|
|
398
|
+
}
|
|
399
|
+
async #fetchIdentity(signal) {
|
|
400
|
+
const init = {
|
|
401
|
+
method: "GET",
|
|
402
|
+
headers: { authorization: `Bearer ${this.#apiKey}` }
|
|
403
|
+
};
|
|
404
|
+
if (signal !== void 0)
|
|
405
|
+
init.signal = signal;
|
|
406
|
+
const payload = await this.#requestJson(init, signal);
|
|
407
|
+
if (payload?.ok !== true || typeof payload.fingerprint !== "string" || payload.fingerprint === "" || !Number.isInteger(payload.dimension) || payload.dimension <= 0) {
|
|
408
|
+
throw nonRetryable2("prometheus-embed: malformed identity response");
|
|
409
|
+
}
|
|
410
|
+
const identity = {
|
|
411
|
+
fingerprint: payload.fingerprint,
|
|
412
|
+
dimension: payload.dimension
|
|
413
|
+
};
|
|
414
|
+
this.#identity = identity;
|
|
415
|
+
return identity;
|
|
416
|
+
}
|
|
417
|
+
/** Greedy batch cut respecting both the item cap and the char budget. */
|
|
418
|
+
#batchEnd(texts, start) {
|
|
419
|
+
let chars = 0;
|
|
420
|
+
let end = start;
|
|
421
|
+
while (end < texts.length && end - start < this.#batchSize) {
|
|
422
|
+
const len = texts[end].length;
|
|
423
|
+
if (end > start && chars + len > this.#maxBatchChars)
|
|
424
|
+
break;
|
|
425
|
+
chars += len;
|
|
426
|
+
end += 1;
|
|
427
|
+
}
|
|
428
|
+
return end;
|
|
429
|
+
}
|
|
430
|
+
async #embedBatch(batch, identity, inputType, signal) {
|
|
431
|
+
const init = {
|
|
432
|
+
method: "POST",
|
|
433
|
+
headers: {
|
|
434
|
+
"content-type": "application/json",
|
|
435
|
+
authorization: `Bearer ${this.#apiKey}`
|
|
436
|
+
},
|
|
437
|
+
body: JSON.stringify({ input: batch, inputType })
|
|
438
|
+
};
|
|
439
|
+
if (signal !== void 0)
|
|
440
|
+
init.signal = signal;
|
|
441
|
+
const payload = await this.#requestJson(init, signal);
|
|
442
|
+
if (payload?.ok !== true || !Array.isArray(payload.embeddings)) {
|
|
443
|
+
throw nonRetryable2("prometheus-embed: malformed embed response");
|
|
444
|
+
}
|
|
445
|
+
if (typeof payload.fingerprint === "string" && payload.fingerprint !== identity.fingerprint) {
|
|
446
|
+
this.#identity = null;
|
|
447
|
+
this.#identityPromise = null;
|
|
448
|
+
throw new PrometheusEmbeddingDriftError(identity.fingerprint, payload.fingerprint);
|
|
449
|
+
}
|
|
450
|
+
if (payload.embeddings.length !== batch.length) {
|
|
451
|
+
throw nonRetryable2(`prometheus-embed: expected ${batch.length} embeddings, got ${payload.embeddings.length}`);
|
|
452
|
+
}
|
|
453
|
+
const sorted = [...payload.embeddings].sort((a, b) => a.index - b.index);
|
|
454
|
+
const vectors = sorted.map((row) => {
|
|
455
|
+
if (!Array.isArray(row.vector) || row.vector.length !== identity.dimension) {
|
|
456
|
+
throw nonRetryable2(`prometheus-embed: embedding length ${row.vector?.length ?? 0} does not match resolved dimension ${identity.dimension}`);
|
|
457
|
+
}
|
|
458
|
+
return Float32Array.from(row.vector);
|
|
459
|
+
});
|
|
460
|
+
const credits = payload.usage?.credits;
|
|
461
|
+
if (typeof credits === "number" && Number.isFinite(credits)) {
|
|
462
|
+
this.#creditsUsed += credits;
|
|
463
|
+
}
|
|
464
|
+
return vectors;
|
|
465
|
+
}
|
|
466
|
+
/**
|
|
467
|
+
* Fetch with retry. 5xx and network errors back off exponentially;
|
|
468
|
+
* everything else (401 invalid key, 413 oversized input, 429 quota
|
|
469
|
+
* exhausted — a *monthly* limit, retrying cannot help) fails fast
|
|
470
|
+
* with the proxy's error code in the message.
|
|
471
|
+
*/
|
|
472
|
+
async #requestJson(init, signal) {
|
|
473
|
+
let attempt = 0;
|
|
474
|
+
let lastError = null;
|
|
475
|
+
while (attempt <= this.#maxRetries) {
|
|
476
|
+
try {
|
|
477
|
+
const res = await this.#fetch(this.#url, init);
|
|
478
|
+
if (res.status >= 500 && res.status < 600) {
|
|
479
|
+
lastError = new Error(`prometheus-embed: HTTP ${res.status}`);
|
|
480
|
+
attempt += 1;
|
|
481
|
+
if (attempt > this.#maxRetries)
|
|
482
|
+
break;
|
|
483
|
+
await sleep2(this.#retryBaseMs * 2 ** (attempt - 1), signal);
|
|
484
|
+
continue;
|
|
485
|
+
}
|
|
486
|
+
if (!res.ok) {
|
|
487
|
+
const body = await res.json().catch(() => null);
|
|
488
|
+
const detail = typeof body?.code === "string" ? `${body.code}${typeof body.error === "string" ? ` \u2014 ${body.error}` : ""}` : res.statusText;
|
|
489
|
+
throw nonRetryable2(`prometheus-embed: HTTP ${res.status} ${detail}`);
|
|
490
|
+
}
|
|
491
|
+
return await res.json();
|
|
492
|
+
} catch (err) {
|
|
493
|
+
if (err?.name === "AbortError")
|
|
494
|
+
throw err;
|
|
495
|
+
if (err?.nonRetryable === true)
|
|
496
|
+
throw err;
|
|
497
|
+
if (attempt >= this.#maxRetries)
|
|
498
|
+
throw err;
|
|
499
|
+
lastError = err;
|
|
500
|
+
attempt += 1;
|
|
501
|
+
await sleep2(this.#retryBaseMs * 2 ** (attempt - 1), signal);
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
throw lastError instanceof Error ? lastError : new Error(`prometheus-embed: exhausted ${this.#maxRetries} retries`);
|
|
505
|
+
}
|
|
506
|
+
};
|
|
507
|
+
|
|
11
508
|
// dist/api-key.js
|
|
12
509
|
var KEY_PATTERN = /^prom_(live|test)_[A-Za-z0-9]{10,}$/;
|
|
13
510
|
var API_KEY_ENV = "PROMETHEUS_API_KEY";
|
|
@@ -28,6 +525,56 @@ import { mkdirSync } from "node:fs";
|
|
|
28
525
|
import { dirname } from "node:path";
|
|
29
526
|
import Database from "better-sqlite3";
|
|
30
527
|
|
|
528
|
+
// dist/rrf.js
|
|
529
|
+
function reciprocalRankFusion(lists, options = {}) {
|
|
530
|
+
const k = options.k ?? 60;
|
|
531
|
+
if (!Number.isFinite(k) || k <= 0) {
|
|
532
|
+
throw new Error(`reciprocalRankFusion: k must be > 0, got ${k}`);
|
|
533
|
+
}
|
|
534
|
+
const scores = /* @__PURE__ */ new Map();
|
|
535
|
+
const contribs = /* @__PURE__ */ new Map();
|
|
536
|
+
const payloads = /* @__PURE__ */ new Map();
|
|
537
|
+
const inserted = /* @__PURE__ */ new Map();
|
|
538
|
+
let insertCounter = 0;
|
|
539
|
+
for (const list of lists) {
|
|
540
|
+
const weight = list.weight ?? 1;
|
|
541
|
+
const seenInList = /* @__PURE__ */ new Set();
|
|
542
|
+
let rank = 0;
|
|
543
|
+
for (const item of list.items) {
|
|
544
|
+
if (seenInList.has(item.key))
|
|
545
|
+
continue;
|
|
546
|
+
seenInList.add(item.key);
|
|
547
|
+
rank += 1;
|
|
548
|
+
const delta = weight / (k + rank);
|
|
549
|
+
scores.set(item.key, (scores.get(item.key) ?? 0) + delta);
|
|
550
|
+
const c = contribs.get(item.key);
|
|
551
|
+
if (c === void 0)
|
|
552
|
+
contribs.set(item.key, { [list.id]: delta });
|
|
553
|
+
else
|
|
554
|
+
c[list.id] = (c[list.id] ?? 0) + delta;
|
|
555
|
+
if (!payloads.has(item.key)) {
|
|
556
|
+
payloads.set(item.key, item.payload);
|
|
557
|
+
inserted.set(item.key, insertCounter++);
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
const merged = [];
|
|
562
|
+
for (const [key, score] of scores) {
|
|
563
|
+
merged.push({
|
|
564
|
+
key,
|
|
565
|
+
score,
|
|
566
|
+
contribs: contribs.get(key) ?? {},
|
|
567
|
+
payload: payloads.get(key)
|
|
568
|
+
});
|
|
569
|
+
}
|
|
570
|
+
merged.sort((a, b) => {
|
|
571
|
+
if (b.score !== a.score)
|
|
572
|
+
return b.score - a.score;
|
|
573
|
+
return (inserted.get(a.key) ?? 0) - (inserted.get(b.key) ?? 0);
|
|
574
|
+
});
|
|
575
|
+
return options.limit !== void 0 && options.limit >= 0 ? merged.slice(0, options.limit) : merged;
|
|
576
|
+
}
|
|
577
|
+
|
|
31
578
|
// dist/types.js
|
|
32
579
|
var MEMORY_SCOPES = [
|
|
33
580
|
"system",
|
|
@@ -131,11 +678,54 @@ CREATE TRIGGER IF NOT EXISTS agent_memory_au AFTER UPDATE ON agent_memory BEGIN
|
|
|
131
678
|
VALUES (new.rowid, new.key, new.value);
|
|
132
679
|
END;
|
|
133
680
|
`;
|
|
681
|
+
var VEC_SCHEMA = `
|
|
682
|
+
CREATE TABLE IF NOT EXISTS agent_memory_vec (
|
|
683
|
+
record_id TEXT PRIMARY KEY,
|
|
684
|
+
vector BLOB NOT NULL,
|
|
685
|
+
dim INTEGER NOT NULL
|
|
686
|
+
);
|
|
687
|
+
CREATE TABLE IF NOT EXISTS embedding_meta (
|
|
688
|
+
id INTEGER PRIMARY KEY CHECK (id = 1),
|
|
689
|
+
fingerprint TEXT NOT NULL,
|
|
690
|
+
dim INTEGER NOT NULL
|
|
691
|
+
);
|
|
692
|
+
CREATE TRIGGER IF NOT EXISTS agent_memory_vec_ad AFTER DELETE ON agent_memory BEGIN
|
|
693
|
+
DELETE FROM agent_memory_vec WHERE record_id = old.id;
|
|
694
|
+
END;
|
|
695
|
+
`;
|
|
696
|
+
function vectorToBlob(vector) {
|
|
697
|
+
return Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength);
|
|
698
|
+
}
|
|
699
|
+
function blobToVector(blob) {
|
|
700
|
+
const out = new Float32Array(blob.byteLength / 4);
|
|
701
|
+
for (let i = 0; i < out.length; i++)
|
|
702
|
+
out[i] = blob.readFloatLE(i * 4);
|
|
703
|
+
return out;
|
|
704
|
+
}
|
|
705
|
+
function cosine(a, b) {
|
|
706
|
+
let dot = 0;
|
|
707
|
+
let na = 0;
|
|
708
|
+
let nb = 0;
|
|
709
|
+
for (let i = 0; i < a.length; i++) {
|
|
710
|
+
const x = a[i];
|
|
711
|
+
const y = b[i];
|
|
712
|
+
dot += x * y;
|
|
713
|
+
na += x * x;
|
|
714
|
+
nb += y * y;
|
|
715
|
+
}
|
|
716
|
+
if (na === 0 || nb === 0)
|
|
717
|
+
return 0;
|
|
718
|
+
return dot / (Math.sqrt(na) * Math.sqrt(nb));
|
|
719
|
+
}
|
|
720
|
+
function fallbackSnippet(value, cap = 200) {
|
|
721
|
+
const flat = value.replace(/\s+/g, " ").trim();
|
|
722
|
+
return flat.length > cap ? `${flat.slice(0, cap)} \u2026` : flat;
|
|
723
|
+
}
|
|
134
724
|
function toFtsQuery(query) {
|
|
135
725
|
const tokens = query.split(/\s+/).map((t) => t.replace(/"/g, "").trim()).filter((t) => t.length > 0);
|
|
136
726
|
if (tokens.length === 0)
|
|
137
727
|
return "";
|
|
138
|
-
return tokens.map((t) => `"${t}" *`).join("
|
|
728
|
+
return tokens.map((t) => `"${t}" *`).join(" OR ");
|
|
139
729
|
}
|
|
140
730
|
function rowToRecord(row) {
|
|
141
731
|
return {
|
|
@@ -155,8 +745,11 @@ function rowToRecord(row) {
|
|
|
155
745
|
}
|
|
156
746
|
var SqliteMemoryBackend = class {
|
|
157
747
|
db;
|
|
748
|
+
embedder;
|
|
749
|
+
/** Record ids whose vector is missing/stale, awaiting a batched embed. */
|
|
750
|
+
pendingEmbed = /* @__PURE__ */ new Set();
|
|
158
751
|
closed = false;
|
|
159
|
-
constructor(dbPath) {
|
|
752
|
+
constructor(dbPath, opts = {}) {
|
|
160
753
|
if (dbPath !== ":memory:") {
|
|
161
754
|
mkdirSync(dirname(dbPath), { recursive: true });
|
|
162
755
|
}
|
|
@@ -164,7 +757,24 @@ var SqliteMemoryBackend = class {
|
|
|
164
757
|
this.db.pragma("journal_mode = WAL");
|
|
165
758
|
this.db.exec(SCHEMA);
|
|
166
759
|
this.db.exec(FTS_SCHEMA);
|
|
760
|
+
this.db.exec(VEC_SCHEMA);
|
|
167
761
|
this.db.exec(`INSERT INTO agent_memory_fts (agent_memory_fts) VALUES ('rebuild')`);
|
|
762
|
+
this.embedder = opts.embedder;
|
|
763
|
+
if (this.embedder !== void 0)
|
|
764
|
+
this.queueUnembedded();
|
|
765
|
+
}
|
|
766
|
+
/**
|
|
767
|
+
* Queue every record lacking a stored vector for a (re)embed. Run at
|
|
768
|
+
* open so a DB first built keyword-only — or one whose vectors were
|
|
769
|
+
* wiped after an embedding-space change — lazily catches up on the next
|
|
770
|
+
* `search` instead of needing a manual reindex.
|
|
771
|
+
*/
|
|
772
|
+
queueUnembedded() {
|
|
773
|
+
const rows = this.db.prepare(`SELECT m.id AS id FROM agent_memory m
|
|
774
|
+
LEFT JOIN agent_memory_vec v ON v.record_id = m.id
|
|
775
|
+
WHERE v.record_id IS NULL`).all();
|
|
776
|
+
for (const r of rows)
|
|
777
|
+
this.pendingEmbed.add(r.id);
|
|
168
778
|
}
|
|
169
779
|
audit(action, fields, detail) {
|
|
170
780
|
this.db.prepare(`INSERT INTO audit_log (ts, action, scope, scope_id, type, key, detail)
|
|
@@ -177,6 +787,8 @@ var SqliteMemoryBackend = class {
|
|
|
177
787
|
this.db.prepare(`UPDATE agent_memory SET value = ?, confidence = ?, source = ?, tags = ?, updated_at = ?
|
|
178
788
|
WHERE id = ?`).run(input.value, input.confidence ?? null, input.source ?? existing.source, input.tags ? JSON.stringify(input.tags) : existing.tags, now, existing.id);
|
|
179
789
|
this.audit("write.update", input);
|
|
790
|
+
if (this.embedder !== void 0)
|
|
791
|
+
this.pendingEmbed.add(existing.id);
|
|
180
792
|
return this.byId(existing.id);
|
|
181
793
|
}
|
|
182
794
|
const id = randomUUID();
|
|
@@ -184,6 +796,8 @@ var SqliteMemoryBackend = class {
|
|
|
184
796
|
(id, project_id, scope, scope_id, type, key, value, confidence, source, tags, use_count, created_at, updated_at)
|
|
185
797
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, ?, ?)`).run(id, input.projectId, input.scope, input.scopeId, input.type, input.key, input.value, input.confidence ?? null, input.source ?? null, input.tags ? JSON.stringify(input.tags) : null, now, now);
|
|
186
798
|
this.audit("write.insert", input);
|
|
799
|
+
if (this.embedder !== void 0)
|
|
800
|
+
this.pendingEmbed.add(id);
|
|
187
801
|
return this.byId(id);
|
|
188
802
|
}
|
|
189
803
|
byId(id) {
|
|
@@ -234,9 +848,40 @@ var SqliteMemoryBackend = class {
|
|
|
234
848
|
const rows = this.db.prepare(sql).all(...params);
|
|
235
849
|
return rows.map(rowToRecord);
|
|
236
850
|
}
|
|
851
|
+
/**
|
|
852
|
+
* Hybrid search: FTS5 BM25 (keyword) ⊕ vector cosine (semantic), fused
|
|
853
|
+
* via RRF. The vector channel is best-effort — when no embedder is
|
|
854
|
+
* configured, or it is unreachable (offline / no key / proxy error),
|
|
855
|
+
* the method degrades to pure keyword search, byte-for-byte the Phase-1
|
|
856
|
+
* behaviour. This is the local-first guarantee: semantic recall is an
|
|
857
|
+
* enhancement, never a hard dependency.
|
|
858
|
+
*/
|
|
237
859
|
async search(input) {
|
|
238
860
|
if (input.chain.length === 0)
|
|
239
861
|
return [];
|
|
862
|
+
const finalLimit = input.limit ?? 20;
|
|
863
|
+
const poolLimit = Math.max(finalLimit * 4, 40);
|
|
864
|
+
const ftsHits = this.ftsSearch(input, poolLimit);
|
|
865
|
+
let vecHits = [];
|
|
866
|
+
if (this.embedder !== void 0) {
|
|
867
|
+
try {
|
|
868
|
+
vecHits = await this.vectorSearch(input, poolLimit);
|
|
869
|
+
} catch {
|
|
870
|
+
vecHits = [];
|
|
871
|
+
}
|
|
872
|
+
}
|
|
873
|
+
if (vecHits.length === 0)
|
|
874
|
+
return ftsHits.slice(0, finalLimit);
|
|
875
|
+
if (ftsHits.length === 0)
|
|
876
|
+
return vecHits.slice(0, finalLimit);
|
|
877
|
+
const fused = reciprocalRankFusion([
|
|
878
|
+
{ id: "fts", items: ftsHits.map((h) => ({ key: h.record.id, payload: h })) },
|
|
879
|
+
{ id: "vec", items: vecHits.map((h) => ({ key: h.record.id, payload: h })) }
|
|
880
|
+
], { limit: finalLimit });
|
|
881
|
+
return fused.map((f) => f.payload);
|
|
882
|
+
}
|
|
883
|
+
/** FTS5 BM25 keyword channel → ranked hits (best first). */
|
|
884
|
+
ftsSearch(input, limit) {
|
|
240
885
|
const match = toFtsQuery(input.query);
|
|
241
886
|
if (match === "")
|
|
242
887
|
return [];
|
|
@@ -253,12 +898,131 @@ var SqliteMemoryBackend = class {
|
|
|
253
898
|
params.push(...input.types);
|
|
254
899
|
}
|
|
255
900
|
sql += ` ORDER BY rank LIMIT ?`;
|
|
256
|
-
params.push(
|
|
901
|
+
params.push(limit);
|
|
257
902
|
const rows = this.db.prepare(sql).all(...params);
|
|
258
|
-
return rows.map((row) => ({
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
903
|
+
return rows.map((row) => ({ record: rowToRecord(row), snippet: row.snip }));
|
|
904
|
+
}
|
|
905
|
+
/**
|
|
906
|
+
* Vector channel: brute-force cosine of the query vector against every
|
|
907
|
+
* in-scope stored vector (memory sets are tiny — no ANN index needed).
|
|
908
|
+
* Flushes pending embeds first so freshly written records are searchable.
|
|
909
|
+
* May throw on an embed failure; the caller (`search`) catches it.
|
|
910
|
+
*/
|
|
911
|
+
async vectorSearch(input, limit) {
|
|
912
|
+
if (this.embedder === void 0)
|
|
913
|
+
return [];
|
|
914
|
+
await this.flushEmbeddings();
|
|
915
|
+
const scopePairs = input.chain.map(() => `(m.scope = ? AND m.scope_id = ?)`).join(" OR ");
|
|
916
|
+
const params = [];
|
|
917
|
+
params.push(...input.chain.flatMap((l) => [l.scope, l.scopeId]));
|
|
918
|
+
let sql = `
|
|
919
|
+
SELECT m.*, v.vector AS vec FROM agent_memory_vec v
|
|
920
|
+
JOIN agent_memory m ON m.id = v.record_id
|
|
921
|
+
WHERE (${scopePairs})`;
|
|
922
|
+
if (input.types && input.types.length > 0) {
|
|
923
|
+
sql += ` AND m.type IN (${input.types.map(() => "?").join(", ")})`;
|
|
924
|
+
params.push(...input.types);
|
|
925
|
+
}
|
|
926
|
+
const rows = this.db.prepare(sql).all(...params);
|
|
927
|
+
if (rows.length === 0)
|
|
928
|
+
return [];
|
|
929
|
+
const embedded = await this.embedder.embed([input.query], { inputType: "query" });
|
|
930
|
+
const queryVec = embedded[0];
|
|
931
|
+
if (queryVec === void 0)
|
|
932
|
+
return [];
|
|
933
|
+
const scored = [];
|
|
934
|
+
for (const row of rows) {
|
|
935
|
+
const vec = blobToVector(row.vec);
|
|
936
|
+
if (vec.length !== queryVec.length)
|
|
937
|
+
continue;
|
|
938
|
+
const score = cosine(queryVec, vec);
|
|
939
|
+
if (!(score > 0))
|
|
940
|
+
continue;
|
|
941
|
+
const record = rowToRecord(row);
|
|
942
|
+
scored.push({
|
|
943
|
+
hit: { record, snippet: fallbackSnippet(record.value) },
|
|
944
|
+
score
|
|
945
|
+
});
|
|
946
|
+
}
|
|
947
|
+
scored.sort((a, b) => b.score - a.score);
|
|
948
|
+
return scored.slice(0, limit).map((s) => s.hit);
|
|
949
|
+
}
|
|
950
|
+
/** Read the pinned embedding-space identity, if any. */
|
|
951
|
+
getEmbeddingMeta() {
|
|
952
|
+
const row = this.db.prepare(`SELECT fingerprint, dim FROM embedding_meta WHERE id = 1`).get();
|
|
953
|
+
return row ?? null;
|
|
954
|
+
}
|
|
955
|
+
/** Pin (or re-pin) the embedding-space fingerprint + dimension. */
|
|
956
|
+
setEmbeddingMeta(fingerprint, dim) {
|
|
957
|
+
this.db.prepare(`INSERT INTO embedding_meta (id, fingerprint, dim) VALUES (1, ?, ?)
|
|
958
|
+
ON CONFLICT(id) DO UPDATE SET fingerprint = excluded.fingerprint, dim = excluded.dim`).run(fingerprint, dim);
|
|
959
|
+
}
|
|
960
|
+
/** Drop every stored vector + the pinned space; re-queue all records. */
|
|
961
|
+
resetVectorSpace() {
|
|
962
|
+
this.db.exec(`DELETE FROM agent_memory_vec; DELETE FROM embedding_meta;`);
|
|
963
|
+
this.pendingEmbed.clear();
|
|
964
|
+
this.queueUnembedded();
|
|
965
|
+
}
|
|
966
|
+
/**
|
|
967
|
+
* Batch-embed every pending record's `value` and store the vectors.
|
|
968
|
+
* Lazy (only called from `vectorSearch`) and best-effort: a network/key
|
|
969
|
+
* failure throws and leaves rows pending (FTS-only fallback); a proxy
|
|
970
|
+
* embedding-space change — drift mid-run, or a cross-run fingerprint
|
|
971
|
+
* mismatch — wipes stale vectors and re-embeds in the new space so the
|
|
972
|
+
* store is never a mix of vector spaces.
|
|
973
|
+
*/
|
|
974
|
+
async flushEmbeddings() {
|
|
975
|
+
if (this.embedder === void 0 || this.pendingEmbed.size === 0)
|
|
976
|
+
return;
|
|
977
|
+
const load = (ids) => {
|
|
978
|
+
if (ids.length === 0)
|
|
979
|
+
return [];
|
|
980
|
+
const ph = ids.map(() => "?").join(", ");
|
|
981
|
+
return this.db.prepare(`SELECT id, value FROM agent_memory WHERE id IN (${ph})`).all(...ids);
|
|
982
|
+
};
|
|
983
|
+
let rows = load([...this.pendingEmbed]);
|
|
984
|
+
const live = new Set(rows.map((r) => r.id));
|
|
985
|
+
for (const id of [...this.pendingEmbed])
|
|
986
|
+
if (!live.has(id))
|
|
987
|
+
this.pendingEmbed.delete(id);
|
|
988
|
+
if (rows.length === 0)
|
|
989
|
+
return;
|
|
990
|
+
let vectors;
|
|
991
|
+
try {
|
|
992
|
+
vectors = await this.embedder.embed(rows.map((r) => r.value), { inputType: "document" });
|
|
993
|
+
} catch (err) {
|
|
994
|
+
if (err.code === "EMBEDDING_DRIFT") {
|
|
995
|
+
this.resetVectorSpace();
|
|
996
|
+
rows = load([...this.pendingEmbed]);
|
|
997
|
+
if (rows.length === 0)
|
|
998
|
+
return;
|
|
999
|
+
vectors = await this.embedder.embed(rows.map((r) => r.value), { inputType: "document" });
|
|
1000
|
+
} else {
|
|
1001
|
+
throw err;
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
const fingerprint = this.embedder.model;
|
|
1005
|
+
const dim = this.embedder.dimension;
|
|
1006
|
+
const meta = this.getEmbeddingMeta();
|
|
1007
|
+
if (meta !== null && meta.fingerprint !== fingerprint) {
|
|
1008
|
+
this.db.exec(`DELETE FROM agent_memory_vec;`);
|
|
1009
|
+
this.setEmbeddingMeta(fingerprint, dim);
|
|
1010
|
+
this.pendingEmbed.clear();
|
|
1011
|
+
this.queueUnembedded();
|
|
1012
|
+
for (const r of rows)
|
|
1013
|
+
this.pendingEmbed.delete(r.id);
|
|
1014
|
+
} else if (meta === null) {
|
|
1015
|
+
this.setEmbeddingMeta(fingerprint, dim);
|
|
1016
|
+
}
|
|
1017
|
+
const upsert = this.db.prepare(`INSERT INTO agent_memory_vec (record_id, vector, dim) VALUES (?, ?, ?)
|
|
1018
|
+
ON CONFLICT(record_id) DO UPDATE SET vector = excluded.vector, dim = excluded.dim`);
|
|
1019
|
+
const store = this.db.transaction((items) => {
|
|
1020
|
+
for (const it of items)
|
|
1021
|
+
upsert.run(it.id, vectorToBlob(it.vec), dim);
|
|
1022
|
+
});
|
|
1023
|
+
store(rows.map((r, i) => ({ id: r.id, vec: vectors[i] })));
|
|
1024
|
+
for (const r of rows)
|
|
1025
|
+
this.pendingEmbed.delete(r.id);
|
|
262
1026
|
}
|
|
263
1027
|
async delete(input) {
|
|
264
1028
|
const result = this.db.prepare(`DELETE FROM agent_memory
|
|
@@ -328,21 +1092,79 @@ function projectIdFor(workspaceRoot) {
|
|
|
328
1092
|
function defaultMemoryDbPath() {
|
|
329
1093
|
return join(homedir(), ".prometheus", "memory.db");
|
|
330
1094
|
}
|
|
1095
|
+
function intEnv(env, name, def) {
|
|
1096
|
+
const raw = env[name];
|
|
1097
|
+
if (raw === void 0 || raw === "")
|
|
1098
|
+
return def;
|
|
1099
|
+
const n = Number.parseInt(raw, 10);
|
|
1100
|
+
return Number.isFinite(n) ? n : def;
|
|
1101
|
+
}
|
|
1102
|
+
function buildVoyageEmbedder(env, apiKey) {
|
|
1103
|
+
return new OpenAICompatEmbeddingProvider({
|
|
1104
|
+
name: "voyage",
|
|
1105
|
+
model: env.VOYAGE_MODEL ?? "voyage-3-large",
|
|
1106
|
+
dimension: intEnv(env, "VOYAGE_DIM", 1024),
|
|
1107
|
+
region: "us",
|
|
1108
|
+
baseUrl: env.VOYAGE_BASE_URL ?? "https://api.voyageai.com/v1",
|
|
1109
|
+
apiKey,
|
|
1110
|
+
omitEncodingFormat: true,
|
|
1111
|
+
// Voyage caps a single request's summed input tokens; estimate-batch to
|
|
1112
|
+
// stay safely under it (same knobs/rationale as context-mcp).
|
|
1113
|
+
maxBatchTokens: intEnv(env, "VOYAGE_MAX_BATCH_TOKENS", 9e4),
|
|
1114
|
+
charsPerToken: intEnv(env, "VOYAGE_CHARS_PER_TOKEN", 2),
|
|
1115
|
+
// Voyage free tier rate-limits at 3 RPM → longer backoff than the default.
|
|
1116
|
+
maxRetries: intEnv(env, "VOYAGE_MAX_RETRIES", 6),
|
|
1117
|
+
retryBaseMs: intEnv(env, "VOYAGE_RETRY_BASE_MS", 2e3)
|
|
1118
|
+
});
|
|
1119
|
+
}
|
|
1120
|
+
function discoverMemoryEmbedder(env) {
|
|
1121
|
+
const mode = (env.PROMETHEUS_MEMORY_EMBED ?? "auto").toLowerCase();
|
|
1122
|
+
const baseUrl = env.PROMETHEUS_API_URL;
|
|
1123
|
+
const proxyOpts = (apiKey) => new PrometheusEmbeddingProvider({
|
|
1124
|
+
apiKey,
|
|
1125
|
+
...baseUrl !== void 0 && baseUrl !== "" ? { baseUrl } : {}
|
|
1126
|
+
});
|
|
1127
|
+
if (mode === "off")
|
|
1128
|
+
return { id: "none", embedder: void 0 };
|
|
1129
|
+
if (mode === "voyage") {
|
|
1130
|
+
const key = env.VOYAGE_API_KEY;
|
|
1131
|
+
if (key === void 0 || key === "") {
|
|
1132
|
+
throw new Error('PROMETHEUS_MEMORY_EMBED="voyage" requires VOYAGE_API_KEY to be set.');
|
|
1133
|
+
}
|
|
1134
|
+
return { id: "voyage", embedder: buildVoyageEmbedder(env, key) };
|
|
1135
|
+
}
|
|
1136
|
+
if (mode === "prometheus") {
|
|
1137
|
+
const apiKey = requireApiKey(env);
|
|
1138
|
+
return { id: "prometheus", embedder: proxyOpts(apiKey) };
|
|
1139
|
+
}
|
|
1140
|
+
const voyageKey = env.VOYAGE_API_KEY;
|
|
1141
|
+
if (voyageKey !== void 0 && voyageKey !== "") {
|
|
1142
|
+
return { id: "voyage", embedder: buildVoyageEmbedder(env, voyageKey) };
|
|
1143
|
+
}
|
|
1144
|
+
const promKey = env[API_KEY_ENV]?.trim();
|
|
1145
|
+
if (promKey !== void 0 && promKey !== "") {
|
|
1146
|
+
const apiKey = requireApiKey(env);
|
|
1147
|
+
return { id: "prometheus", embedder: proxyOpts(apiKey) };
|
|
1148
|
+
}
|
|
1149
|
+
return { id: "none", embedder: void 0 };
|
|
1150
|
+
}
|
|
331
1151
|
function composeFromEnv(opts) {
|
|
332
1152
|
const env = opts.env;
|
|
333
|
-
requireApiKey(env);
|
|
334
1153
|
const workspaceRoot = resolve(env.PROMETHEUS_WORKSPACE_ROOT ?? process.cwd());
|
|
335
1154
|
const projectId = projectIdFor(workspaceRoot);
|
|
336
1155
|
const projectName = basename(workspaceRoot) || workspaceRoot;
|
|
337
1156
|
const rawDbPath = env.PROMETHEUS_MEMORY_DB_PATH;
|
|
338
1157
|
const dbPath = rawDbPath !== void 0 && rawDbPath !== "" ? rawDbPath : defaultMemoryDbPath();
|
|
339
|
-
const
|
|
1158
|
+
const { id: embedderId, embedder } = discoverMemoryEmbedder(env);
|
|
1159
|
+
const backend = new SqliteMemoryBackend(dbPath, embedder !== void 0 ? { embedder } : {});
|
|
340
1160
|
return {
|
|
341
1161
|
backend,
|
|
342
1162
|
workspaceRoot,
|
|
343
1163
|
projectId,
|
|
344
1164
|
projectName,
|
|
345
1165
|
dbPath,
|
|
1166
|
+
embeddingsEnabled: embedder !== void 0,
|
|
1167
|
+
embedderId,
|
|
346
1168
|
close: () => backend.close()
|
|
347
1169
|
};
|
|
348
1170
|
}
|
|
@@ -743,7 +1565,7 @@ var setupInput = {
|
|
|
743
1565
|
};
|
|
744
1566
|
function registerTools(server, deps) {
|
|
745
1567
|
const { backend, workspaceRoot, projectId, projectName, dbPath } = deps;
|
|
746
|
-
server.registerTool("
|
|
1568
|
+
server.registerTool("read", {
|
|
747
1569
|
title: "Recall agent memory",
|
|
748
1570
|
description: "Read agent memory for this project along the scope chain (project \u2192 workspace \u2192 tenant \u2192 system; narrowest scope wins). Syncs `.prometheus/memories/*.md` first, then returns the resolved records plus a prompt-ready `woven` markdown block (token-capped). Call this at the START of a session or task to recall what earlier sessions learned.",
|
|
749
1571
|
inputSchema: readInput
|
|
@@ -763,7 +1585,7 @@ function registerTools(server, deps) {
|
|
|
763
1585
|
records: records.map(recordToJson)
|
|
764
1586
|
});
|
|
765
1587
|
});
|
|
766
|
-
server.registerTool("
|
|
1588
|
+
server.registerTool("write", {
|
|
767
1589
|
title: "Store agent memory",
|
|
768
1590
|
description: "Upsert one memory record (identity: scope+type+key). Use type `semantic` for durable facts, `procedural` for how-to knowledge, `episodic` for session events, `working` for short-lived notes. Default scope `project` also mirrors the value to `.prometheus/memories/<key>.md` (git-versioned, human-editable). Values matching the secret deny-list are rejected. Call this whenever the user states a durable preference, decision, or correction worth remembering.",
|
|
769
1591
|
inputSchema: writeInput
|
|
@@ -790,7 +1612,7 @@ ${args.value}`);
|
|
|
790
1612
|
}
|
|
791
1613
|
return textResult({ record: recordToJson(record), projectFile });
|
|
792
1614
|
});
|
|
793
|
-
server.registerTool("
|
|
1615
|
+
server.registerTool("capture", {
|
|
794
1616
|
title: "Consolidate session learnings",
|
|
795
1617
|
description: "Session-end consolidation: `plan`/`outcome` become one episodic record (key = sessionId), `facts` become semantic upserts, `procedures` become procedural upserts. Secret-bearing payloads are rejected. Call this at the END of a session to persist what was learned.",
|
|
796
1618
|
inputSchema: captureInput
|
|
@@ -818,7 +1640,7 @@ ${p.value}`)
|
|
|
818
1640
|
});
|
|
819
1641
|
return textResult({ written: written.map(recordToJson) });
|
|
820
1642
|
});
|
|
821
|
-
server.registerTool("
|
|
1643
|
+
server.registerTool("search", {
|
|
822
1644
|
title: "Search agent memory",
|
|
823
1645
|
description: "Full-text search (FTS5) over memory keys and values within this project's scope chain, ranked by relevance. Returns matching records plus a highlighted snippet per hit. Use this when memory_read's recall is not specific enough. Does not bump useCount.",
|
|
824
1646
|
inputSchema: searchInput
|
|
@@ -840,7 +1662,7 @@ ${p.value}`)
|
|
|
840
1662
|
}))
|
|
841
1663
|
});
|
|
842
1664
|
});
|
|
843
|
-
server.registerTool("
|
|
1665
|
+
server.registerTool("list", {
|
|
844
1666
|
title: "List stored memory (admin)",
|
|
845
1667
|
description: "Flat listing of this project's memory records without scope resolution \u2014 inspection/debug surface. Optional filters: scope, type, keyContains (case-insensitive substring).",
|
|
846
1668
|
inputSchema: listInput
|
|
@@ -860,7 +1682,7 @@ ${p.value}`)
|
|
|
860
1682
|
records: records.map(recordToJson)
|
|
861
1683
|
});
|
|
862
1684
|
});
|
|
863
|
-
server.registerTool("
|
|
1685
|
+
server.registerTool("delete", {
|
|
864
1686
|
title: "Delete stored memory",
|
|
865
1687
|
description: "Delete one memory record by identity (scope+type+key). For project-scoped semantic records the mirrored `.prometheus/memories/<key>.md` file is removed as well. Returns whether a record/file was actually removed.",
|
|
866
1688
|
inputSchema: deleteInput
|
|
@@ -879,7 +1701,7 @@ ${p.value}`)
|
|
|
879
1701
|
}
|
|
880
1702
|
return textResult({ removed, fileRemoved });
|
|
881
1703
|
});
|
|
882
|
-
server.registerTool("
|
|
1704
|
+
server.registerTool("setup", {
|
|
883
1705
|
title: "Install memory rules into runtime configs",
|
|
884
1706
|
description: "Idempotently install the Prometheus memory-protocol rule block into agent runtime configs in this workspace: CLAUDE.md (claude-code), .cursor/rules/prometheus-memory.mdc (cursor), .augment/rules/prometheus-memory.md (augment), AGENTS.md (agents). Without `runtimes` it auto-detects which runtimes are present (fallback: agents). Only the marked block is written \u2014 existing content is never touched. Re-running updates the block in place.",
|
|
885
1707
|
inputSchema: setupInput
|
|
@@ -914,7 +1736,7 @@ function createServer(deps, options = {}) {
|
|
|
914
1736
|
// dist/bin.js
|
|
915
1737
|
async function main() {
|
|
916
1738
|
const composed = composeFromEnv({ env: process.env });
|
|
917
|
-
process.stderr.write(`prometheus-memory-mcp: workspace=${composed.workspaceRoot} project=${composed.projectName} (${composed.projectId}) db=${composed.dbPath}
|
|
1739
|
+
process.stderr.write(`prometheus-memory-mcp: workspace=${composed.workspaceRoot} project=${composed.projectName} (${composed.projectId}) db=${composed.dbPath} embed=${composed.embedderId}${composed.embeddingsEnabled ? "" : " (keyword-only)"}
|
|
918
1740
|
`);
|
|
919
1741
|
const server = createServer(composed);
|
|
920
1742
|
const transport = new StdioServerTransport();
|