@prom.codes/memory-mcp 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +21 -7
  2. package/dist/bin.js +839 -17
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -10,28 +10,42 @@ as git-versioned markdown under `.prometheus/memories/` in your repo.
10
10
  ## Quick start
11
11
 
12
12
  ```jsonc
13
- // Claude Desktop / Cursor MCP config
13
+ // Claude Desktop / Cursor MCP config — dock under the server name `memory`
14
+ // so the tools resolve to memory_read / memory_write / … (no double prefix).
14
15
  {
15
16
  "mcpServers": {
16
- "prometheus-memory": {
17
+ "memory": {
17
18
  "command": "npx",
18
19
  "args": ["-y", "@prom.codes/memory-mcp@latest"],
19
20
  "env": {
20
- "PROMETHEUS_API_KEY": "prom_live_…",
21
- "PROMETHEUS_WORKSPACE_ROOT": "/absolute/path/to/your/repo"
21
+ "PROMETHEUS_WORKSPACE_ROOT": "/absolute/path/to/your/repo",
22
+ "VOYAGE_API_KEY": "pa-… // optional: enables semantic recall (search)"
22
23
  }
23
24
  }
24
25
  }
25
26
  }
26
27
  ```
27
28
 
29
+ No API key is required to start: the server boots in keyword mode out of the
30
+ box. Add a `VOYAGE_API_KEY` for semantic search (embeddings run Voyage-direct,
31
+ fully local — only the memory text transits to Voyage), or a `prom_live_…`
32
+ `PROMETHEUS_API_KEY` to route embeddings through the metered Prometheus proxy.
33
+
28
34
  Then ask your agent to run `memory_setup` once per workspace — it installs
29
35
  the memory protocol into your runtime rule files (CLAUDE.md, .cursor/rules,
30
36
  .augment/rules or AGENTS.md) so the agent reads memory at session start and
31
37
  captures learnings at session end.
32
38
 
33
- Tools: `memory_read`, `memory_write`, `memory_capture`, `memory_search`,
34
- `memory_list`, `memory_delete`, `memory_setup`. Secrets are rejected on
35
- every write. Your memories never leave your machine.
39
+ Tools (docked as `memory`): `memory_read`, `memory_write`, `memory_capture`,
40
+ `memory_search`, `memory_list`, `memory_delete`, `memory_setup`. Secrets are
41
+ rejected on every write. Your memories never leave your machine.
42
+
43
+ ## Native modules
44
+
45
+ Uses `better-sqlite3` (native). Prebuilt binaries are fetched automatically on
46
+ common platforms (macOS x64/arm64, Linux x64/arm64, Windows x64) — no compiler
47
+ needed. On an unsupported platform/Node ABI, install C/C++ build tools so the
48
+ module can compile (Windows: `npm i -g windows-build-tools` or VS Build Tools).
49
+ Requires Node ≥ 20.10.
36
50
 
37
51
  Docs: https://prom.codes/docs
package/dist/bin.js CHANGED
@@ -8,6 +8,503 @@ import { createHash } from "node:crypto";
8
8
  import { homedir } from "node:os";
9
9
  import { basename, join, resolve } from "node:path";
10
10
 
11
+ // ../embeddings-openai-compat/dist/index.js
12
+ var DEFAULT_BATCH = 96;
13
+ var DEFAULT_RETRIES = 4;
14
+ var DEFAULT_BACKOFF = 250;
15
+ var DEFAULT_RETRY_MAX = 6e4;
16
+ var DEFAULT_CONCURRENCY = 1;
17
+ var DEFAULT_MAX_BATCH_TOKENS = 0;
18
+ var DEFAULT_CHARS_PER_TOKEN = 4;
19
+ function parseRetryAfterMs(value, now = Date.now()) {
20
+ if (value === null)
21
+ return null;
22
+ const trimmed = value.trim();
23
+ if (trimmed === "")
24
+ return null;
25
+ if (/^[0-9]+(\.[0-9]+)?$/.test(trimmed)) {
26
+ const secs = Number(trimmed);
27
+ if (!Number.isFinite(secs) || secs < 0)
28
+ return null;
29
+ return Math.round(secs * 1e3);
30
+ }
31
+ if (!/[A-Za-z]/.test(trimmed))
32
+ return null;
33
+ const ts = Date.parse(trimmed);
34
+ if (!Number.isFinite(ts))
35
+ return null;
36
+ const delta = ts - now;
37
+ return delta > 0 ? delta : 0;
38
+ }
39
+ function sleep(ms, signal) {
40
+ return new Promise((resolve2, reject) => {
41
+ if (signal?.aborted === true) {
42
+ reject(new Error("aborted"));
43
+ return;
44
+ }
45
+ const timer = setTimeout(() => {
46
+ signal?.removeEventListener("abort", onAbort);
47
+ resolve2();
48
+ }, ms);
49
+ const onAbort = () => {
50
+ clearTimeout(timer);
51
+ reject(new Error("aborted"));
52
+ };
53
+ signal?.addEventListener("abort", onAbort, { once: true });
54
+ });
55
+ }
56
+ var OpenAICompatEmbeddingProvider = class {
57
+ name;
58
+ model;
59
+ dimension;
60
+ region;
61
+ #baseUrl;
62
+ #apiKey;
63
+ #sendDimensions;
64
+ #omitEncodingFormat;
65
+ #batchSize;
66
+ #maxBatchTokens;
67
+ #charsPerToken;
68
+ #maxRetries;
69
+ #retryBaseMs;
70
+ #retryMaxMs;
71
+ #maxConcurrency;
72
+ #fetch;
73
+ constructor(opts) {
74
+ if (!Number.isInteger(opts.dimension) || opts.dimension <= 0) {
75
+ throw new Error(`OpenAICompatEmbeddingProvider: dimension must be a positive integer, got ${opts.dimension}`);
76
+ }
77
+ if (opts.maxConcurrency !== void 0 && (!Number.isInteger(opts.maxConcurrency) || opts.maxConcurrency <= 0)) {
78
+ throw new Error(`OpenAICompatEmbeddingProvider: maxConcurrency must be a positive integer, got ${opts.maxConcurrency}`);
79
+ }
80
+ if (opts.maxBatchTokens !== void 0 && (!Number.isFinite(opts.maxBatchTokens) || opts.maxBatchTokens < 0)) {
81
+ throw new Error(`OpenAICompatEmbeddingProvider: maxBatchTokens must be a non-negative number, got ${opts.maxBatchTokens}`);
82
+ }
83
+ if (opts.charsPerToken !== void 0 && (!Number.isFinite(opts.charsPerToken) || opts.charsPerToken <= 0)) {
84
+ throw new Error(`OpenAICompatEmbeddingProvider: charsPerToken must be a positive number, got ${opts.charsPerToken}`);
85
+ }
86
+ this.name = opts.name;
87
+ this.model = opts.model;
88
+ this.dimension = opts.dimension;
89
+ this.region = opts.region;
90
+ this.#baseUrl = opts.baseUrl.replace(/\/+$/, "");
91
+ this.#apiKey = opts.apiKey;
92
+ this.#sendDimensions = opts.sendDimensions ?? false;
93
+ this.#omitEncodingFormat = opts.omitEncodingFormat ?? false;
94
+ this.#batchSize = opts.batchSize ?? DEFAULT_BATCH;
95
+ this.#maxBatchTokens = opts.maxBatchTokens ?? DEFAULT_MAX_BATCH_TOKENS;
96
+ this.#charsPerToken = opts.charsPerToken ?? DEFAULT_CHARS_PER_TOKEN;
97
+ this.#maxRetries = opts.maxRetries ?? DEFAULT_RETRIES;
98
+ this.#retryBaseMs = opts.retryBaseMs ?? DEFAULT_BACKOFF;
99
+ this.#retryMaxMs = opts.retryMaxMs ?? DEFAULT_RETRY_MAX;
100
+ this.#maxConcurrency = opts.maxConcurrency ?? DEFAULT_CONCURRENCY;
101
+ this.#fetch = opts.fetch ?? fetch;
102
+ }
103
+ async embed(texts, opts) {
104
+ if (texts.length === 0)
105
+ return [];
106
+ const total = texts.length;
107
+ const out = new Array(total);
108
+ const onProgress = opts?.onProgress;
109
+ const batches = this.#planBatches(texts);
110
+ let doneCount = 0;
111
+ const emit = (batchSize) => {
112
+ if (onProgress === void 0)
113
+ return;
114
+ try {
115
+ onProgress({ done: doneCount, total, batchSize });
116
+ } catch {
117
+ }
118
+ };
119
+ const runOne = async (range) => {
120
+ const batch = texts.slice(range.start, range.start + range.count);
121
+ const vectors = await this.#embedBatch(batch, opts?.signal);
122
+ for (let i = 0; i < vectors.length; i++)
123
+ out[range.start + i] = vectors[i];
124
+ doneCount += batch.length;
125
+ emit(batch.length);
126
+ };
127
+ if (this.#maxConcurrency <= 1) {
128
+ for (const range of batches)
129
+ await runOne(range);
130
+ } else {
131
+ let next = 0;
132
+ const worker = async () => {
133
+ while (true) {
134
+ const idx = next++;
135
+ if (idx >= batches.length)
136
+ return;
137
+ await runOne(batches[idx]);
138
+ }
139
+ };
140
+ const workers = [];
141
+ const lanes = Math.min(this.#maxConcurrency, batches.length);
142
+ for (let i = 0; i < lanes; i++)
143
+ workers.push(worker());
144
+ await Promise.all(workers);
145
+ }
146
+ return out;
147
+ }
148
+ /**
149
+ * Partition `texts` into ordered `[start, count)` ranges. Each range is
150
+ * bounded by `#batchSize` (item count) and, when `#maxBatchTokens > 0`,
151
+ * by an *estimated* token budget (text length / `#charsPerToken`). A
152
+ * single text whose own estimate already exceeds the budget still gets
153
+ * its own one-item batch (the provider truncates it server-side) so the
154
+ * planner always makes forward progress.
155
+ */
156
+ #planBatches(texts) {
157
+ const total = texts.length;
158
+ const batches = [];
159
+ let start = 0;
160
+ while (start < total) {
161
+ let count = 0;
162
+ let tokens = 0;
163
+ while (start + count < total && count < this.#batchSize) {
164
+ const est = this.#maxBatchTokens > 0 ? Math.ceil(texts[start + count].length / this.#charsPerToken) : 0;
165
+ if (this.#maxBatchTokens > 0 && count > 0 && tokens + est > this.#maxBatchTokens) {
166
+ break;
167
+ }
168
+ tokens += est;
169
+ count += 1;
170
+ }
171
+ if (count === 0)
172
+ count = 1;
173
+ batches.push({ start, count });
174
+ start += count;
175
+ }
176
+ return batches;
177
+ }
178
+ async #embedBatch(batch, signal) {
179
+ const body = {
180
+ input: batch,
181
+ model: this.model
182
+ };
183
+ if (!this.#omitEncodingFormat)
184
+ body.encoding_format = "float";
185
+ if (this.#sendDimensions)
186
+ body.dimensions = this.dimension;
187
+ const headers = { "content-type": "application/json" };
188
+ if (this.#apiKey !== void 0 && this.#apiKey !== "") {
189
+ headers.authorization = `Bearer ${this.#apiKey}`;
190
+ }
191
+ const init = {
192
+ method: "POST",
193
+ headers,
194
+ body: JSON.stringify(body)
195
+ };
196
+ if (signal !== void 0)
197
+ init.signal = signal;
198
+ let attempt = 0;
199
+ let lastError = null;
200
+ while (attempt <= this.#maxRetries) {
201
+ try {
202
+ const res = await this.#fetch(`${this.#baseUrl}/embeddings`, init);
203
+ if (res.status === 429 || res.status >= 500 && res.status < 600) {
204
+ lastError = new Error(`${this.name}: HTTP ${res.status}`);
205
+ attempt += 1;
206
+ if (attempt > this.#maxRetries)
207
+ break;
208
+ const backoff = this.#computeBackoff(attempt, res.headers.get("retry-after"));
209
+ await sleep(backoff, signal);
210
+ continue;
211
+ }
212
+ if (!res.ok) {
213
+ const text = await res.text().catch(() => "");
214
+ const err = new Error(`${this.name}: HTTP ${res.status} ${res.statusText}${text === "" ? "" : ` \u2014 ${text}`}`);
215
+ err.nonRetryable = true;
216
+ throw err;
217
+ }
218
+ const payload = await res.json();
219
+ return this.#decode(payload, batch.length);
220
+ } catch (err) {
221
+ if (err?.name === "AbortError")
222
+ throw err;
223
+ if (err?.nonRetryable === true)
224
+ throw err;
225
+ if (attempt >= this.#maxRetries)
226
+ throw err;
227
+ lastError = err;
228
+ attempt += 1;
229
+ await sleep(this.#computeBackoff(attempt, null), signal);
230
+ }
231
+ }
232
+ throw lastError instanceof Error ? lastError : new Error(`${this.name}: exhausted ${this.#maxRetries} retries`);
233
+ }
234
+ /**
235
+ * Compute the per-attempt backoff. Exponential growth starts from
236
+ * `retryBaseMs` and doubles per attempt; a `Retry-After` header value
237
+ * (if any, parsed by {@link parseRetryAfterMs}) raises the floor so we
238
+ * never undercut a server-advertised wait; the result is capped at
239
+ * `retryMaxMs` to prevent unbounded stalls from misbehaving servers.
240
+ */
241
+ #computeBackoff(attempt, retryAfterHeader) {
242
+ const exp = this.#retryBaseMs * 2 ** Math.max(0, attempt - 1);
243
+ const advised = parseRetryAfterMs(retryAfterHeader);
244
+ const lower = advised === null ? exp : Math.max(exp, advised);
245
+ return Math.min(lower, this.#retryMaxMs);
246
+ }
247
+ #decode(payload, expected) {
248
+ if (!Array.isArray(payload.data) || payload.data.length !== expected) {
249
+ throw nonRetryable(`${this.name}: expected ${expected} embeddings, got ${payload.data?.length ?? 0}`);
250
+ }
251
+ const sorted = [...payload.data].sort((a, b) => a.index - b.index);
252
+ return sorted.map((row) => {
253
+ if (!Array.isArray(row.embedding) || row.embedding.length !== this.dimension) {
254
+ throw nonRetryable(`${this.name}: embedding length ${row.embedding?.length ?? 0} does not match declared dimension ${this.dimension}`);
255
+ }
256
+ return Float32Array.from(row.embedding);
257
+ });
258
+ }
259
+ };
260
+ function nonRetryable(message) {
261
+ const err = new Error(message);
262
+ err.nonRetryable = true;
263
+ return err;
264
+ }
265
+
266
+ // ../embeddings-prometheus/dist/index.js
267
+ var PrometheusEmbeddingDriftError = class extends Error {
268
+ /**
269
+ * Stable string code consumers match on (`err.code === "EMBEDDING_DRIFT"`)
270
+ * instead of importing this class — avoids a package dependency from
271
+ * the indexer onto this adapter.
272
+ */
273
+ code = "EMBEDDING_DRIFT";
274
+ expected;
275
+ actual;
276
+ constructor(expected, actual) {
277
+ super(`prometheus-embed: embedding space changed upstream (fingerprint ${expected} -> ${actual}) \u2014 a full re-index is required`);
278
+ this.name = "PrometheusEmbeddingDriftError";
279
+ this.expected = expected;
280
+ this.actual = actual;
281
+ }
282
+ };
283
+ var DEFAULT_BASE = "https://api.prom.codes";
284
+ var DEFAULT_BATCH2 = 128;
285
+ var DEFAULT_BATCH_CHARS = 4e5;
286
+ var DEFAULT_RETRIES2 = 4;
287
+ var DEFAULT_BACKOFF2 = 250;
288
+ function sleep2(ms, signal) {
289
+ return new Promise((resolve2, reject) => {
290
+ if (signal?.aborted === true) {
291
+ reject(new Error("aborted"));
292
+ return;
293
+ }
294
+ const timer = setTimeout(() => {
295
+ signal?.removeEventListener("abort", onAbort);
296
+ resolve2();
297
+ }, ms);
298
+ const onAbort = () => {
299
+ clearTimeout(timer);
300
+ reject(new Error("aborted"));
301
+ };
302
+ signal?.addEventListener("abort", onAbort, { once: true });
303
+ });
304
+ }
305
+ function nonRetryable2(message) {
306
+ const err = new Error(message);
307
+ err.nonRetryable = true;
308
+ return err;
309
+ }
310
+ var PrometheusEmbeddingProvider = class {
311
+ name;
312
+ region;
313
+ #apiKey;
314
+ #url;
315
+ #batchSize;
316
+ #maxBatchChars;
317
+ #maxRetries;
318
+ #retryBaseMs;
319
+ #fetch;
320
+ #identity = null;
321
+ #identityPromise = null;
322
+ #creditsUsed = 0;
323
+ constructor(opts) {
324
+ if (opts.apiKey === "") {
325
+ throw new Error("PrometheusEmbeddingProvider: apiKey is required");
326
+ }
327
+ this.name = opts.name ?? "prometheus";
328
+ this.region = opts.region ?? "eu";
329
+ this.#apiKey = opts.apiKey;
330
+ this.#url = `${(opts.baseUrl ?? DEFAULT_BASE).replace(/\/+$/, "")}/embed`;
331
+ this.#batchSize = opts.batchSize ?? DEFAULT_BATCH2;
332
+ this.#maxBatchChars = opts.maxBatchChars ?? DEFAULT_BATCH_CHARS;
333
+ this.#maxRetries = opts.maxRetries ?? DEFAULT_RETRIES2;
334
+ this.#retryBaseMs = opts.retryBaseMs ?? DEFAULT_BACKOFF2;
335
+ this.#fetch = opts.fetch ?? fetch;
336
+ }
337
+ /**
338
+ * The abstract fingerprint stands in for the (hidden) upstream model
339
+ * id. Throws until the identity has been resolved — call
340
+ * {@link resolveIdentity} (or `embed()`) first.
341
+ */
342
+ get model() {
343
+ return this.#requireIdentity().fingerprint;
344
+ }
345
+ /** Vector dimension. Throws until the identity has been resolved. */
346
+ get dimension() {
347
+ return this.#requireIdentity().dimension;
348
+ }
349
+ /** Cumulative credits charged across all embed calls of this instance. */
350
+ get creditsUsed() {
351
+ return this.#creditsUsed;
352
+ }
353
+ /**
354
+ * Resolve (and cache) the proxy's embedding-space identity via the
355
+ * free GET. Safe to call concurrently; a failed resolution is not
356
+ * cached, so callers may retry.
357
+ */
358
+ async resolveIdentity(signal) {
359
+ if (this.#identity !== null)
360
+ return this.#identity;
361
+ if (this.#identityPromise === null) {
362
+ this.#identityPromise = this.#fetchIdentity(signal);
363
+ this.#identityPromise.catch(() => {
364
+ this.#identityPromise = null;
365
+ });
366
+ }
367
+ return this.#identityPromise;
368
+ }
369
+ async embed(texts, opts) {
370
+ if (texts.length === 0)
371
+ return [];
372
+ const identity = await this.resolveIdentity(opts?.signal);
373
+ const out = new Array(texts.length);
374
+ let done = 0;
375
+ let start = 0;
376
+ while (start < texts.length) {
377
+ const end = this.#batchEnd(texts, start);
378
+ const batch = texts.slice(start, end);
379
+ const vectors = await this.#embedBatch(batch, identity, opts?.inputType ?? "document", opts?.signal);
380
+ for (let i = 0; i < vectors.length; i++)
381
+ out[start + i] = vectors[i];
382
+ done += batch.length;
383
+ if (opts?.onProgress !== void 0) {
384
+ try {
385
+ opts.onProgress({ done, total: texts.length, batchSize: batch.length });
386
+ } catch {
387
+ }
388
+ }
389
+ start = end;
390
+ }
391
+ return out;
392
+ }
393
+ #requireIdentity() {
394
+ if (this.#identity === null) {
395
+ throw new Error("PrometheusEmbeddingProvider: identity not resolved yet \u2014 await resolveIdentity() (or a first embed()) before reading model/dimension");
396
+ }
397
+ return this.#identity;
398
+ }
399
+ async #fetchIdentity(signal) {
400
+ const init = {
401
+ method: "GET",
402
+ headers: { authorization: `Bearer ${this.#apiKey}` }
403
+ };
404
+ if (signal !== void 0)
405
+ init.signal = signal;
406
+ const payload = await this.#requestJson(init, signal);
407
+ if (payload?.ok !== true || typeof payload.fingerprint !== "string" || payload.fingerprint === "" || !Number.isInteger(payload.dimension) || payload.dimension <= 0) {
408
+ throw nonRetryable2("prometheus-embed: malformed identity response");
409
+ }
410
+ const identity = {
411
+ fingerprint: payload.fingerprint,
412
+ dimension: payload.dimension
413
+ };
414
+ this.#identity = identity;
415
+ return identity;
416
+ }
417
+ /** Greedy batch cut respecting both the item cap and the char budget. */
418
+ #batchEnd(texts, start) {
419
+ let chars = 0;
420
+ let end = start;
421
+ while (end < texts.length && end - start < this.#batchSize) {
422
+ const len = texts[end].length;
423
+ if (end > start && chars + len > this.#maxBatchChars)
424
+ break;
425
+ chars += len;
426
+ end += 1;
427
+ }
428
+ return end;
429
+ }
430
+ async #embedBatch(batch, identity, inputType, signal) {
431
+ const init = {
432
+ method: "POST",
433
+ headers: {
434
+ "content-type": "application/json",
435
+ authorization: `Bearer ${this.#apiKey}`
436
+ },
437
+ body: JSON.stringify({ input: batch, inputType })
438
+ };
439
+ if (signal !== void 0)
440
+ init.signal = signal;
441
+ const payload = await this.#requestJson(init, signal);
442
+ if (payload?.ok !== true || !Array.isArray(payload.embeddings)) {
443
+ throw nonRetryable2("prometheus-embed: malformed embed response");
444
+ }
445
+ if (typeof payload.fingerprint === "string" && payload.fingerprint !== identity.fingerprint) {
446
+ this.#identity = null;
447
+ this.#identityPromise = null;
448
+ throw new PrometheusEmbeddingDriftError(identity.fingerprint, payload.fingerprint);
449
+ }
450
+ if (payload.embeddings.length !== batch.length) {
451
+ throw nonRetryable2(`prometheus-embed: expected ${batch.length} embeddings, got ${payload.embeddings.length}`);
452
+ }
453
+ const sorted = [...payload.embeddings].sort((a, b) => a.index - b.index);
454
+ const vectors = sorted.map((row) => {
455
+ if (!Array.isArray(row.vector) || row.vector.length !== identity.dimension) {
456
+ throw nonRetryable2(`prometheus-embed: embedding length ${row.vector?.length ?? 0} does not match resolved dimension ${identity.dimension}`);
457
+ }
458
+ return Float32Array.from(row.vector);
459
+ });
460
+ const credits = payload.usage?.credits;
461
+ if (typeof credits === "number" && Number.isFinite(credits)) {
462
+ this.#creditsUsed += credits;
463
+ }
464
+ return vectors;
465
+ }
466
+ /**
467
+ * Fetch with retry. 5xx and network errors back off exponentially;
468
+ * everything else (401 invalid key, 413 oversized input, 429 quota
469
+ * exhausted — a *monthly* limit, retrying cannot help) fails fast
470
+ * with the proxy's error code in the message.
471
+ */
472
+ async #requestJson(init, signal) {
473
+ let attempt = 0;
474
+ let lastError = null;
475
+ while (attempt <= this.#maxRetries) {
476
+ try {
477
+ const res = await this.#fetch(this.#url, init);
478
+ if (res.status >= 500 && res.status < 600) {
479
+ lastError = new Error(`prometheus-embed: HTTP ${res.status}`);
480
+ attempt += 1;
481
+ if (attempt > this.#maxRetries)
482
+ break;
483
+ await sleep2(this.#retryBaseMs * 2 ** (attempt - 1), signal);
484
+ continue;
485
+ }
486
+ if (!res.ok) {
487
+ const body = await res.json().catch(() => null);
488
+ const detail = typeof body?.code === "string" ? `${body.code}${typeof body.error === "string" ? ` \u2014 ${body.error}` : ""}` : res.statusText;
489
+ throw nonRetryable2(`prometheus-embed: HTTP ${res.status} ${detail}`);
490
+ }
491
+ return await res.json();
492
+ } catch (err) {
493
+ if (err?.name === "AbortError")
494
+ throw err;
495
+ if (err?.nonRetryable === true)
496
+ throw err;
497
+ if (attempt >= this.#maxRetries)
498
+ throw err;
499
+ lastError = err;
500
+ attempt += 1;
501
+ await sleep2(this.#retryBaseMs * 2 ** (attempt - 1), signal);
502
+ }
503
+ }
504
+ throw lastError instanceof Error ? lastError : new Error(`prometheus-embed: exhausted ${this.#maxRetries} retries`);
505
+ }
506
+ };
507
+
11
508
  // dist/api-key.js
12
509
  var KEY_PATTERN = /^prom_(live|test)_[A-Za-z0-9]{10,}$/;
13
510
  var API_KEY_ENV = "PROMETHEUS_API_KEY";
@@ -28,6 +525,56 @@ import { mkdirSync } from "node:fs";
28
525
  import { dirname } from "node:path";
29
526
  import Database from "better-sqlite3";
30
527
 
528
+ // dist/rrf.js
529
+ function reciprocalRankFusion(lists, options = {}) {
530
+ const k = options.k ?? 60;
531
+ if (!Number.isFinite(k) || k <= 0) {
532
+ throw new Error(`reciprocalRankFusion: k must be > 0, got ${k}`);
533
+ }
534
+ const scores = /* @__PURE__ */ new Map();
535
+ const contribs = /* @__PURE__ */ new Map();
536
+ const payloads = /* @__PURE__ */ new Map();
537
+ const inserted = /* @__PURE__ */ new Map();
538
+ let insertCounter = 0;
539
+ for (const list of lists) {
540
+ const weight = list.weight ?? 1;
541
+ const seenInList = /* @__PURE__ */ new Set();
542
+ let rank = 0;
543
+ for (const item of list.items) {
544
+ if (seenInList.has(item.key))
545
+ continue;
546
+ seenInList.add(item.key);
547
+ rank += 1;
548
+ const delta = weight / (k + rank);
549
+ scores.set(item.key, (scores.get(item.key) ?? 0) + delta);
550
+ const c = contribs.get(item.key);
551
+ if (c === void 0)
552
+ contribs.set(item.key, { [list.id]: delta });
553
+ else
554
+ c[list.id] = (c[list.id] ?? 0) + delta;
555
+ if (!payloads.has(item.key)) {
556
+ payloads.set(item.key, item.payload);
557
+ inserted.set(item.key, insertCounter++);
558
+ }
559
+ }
560
+ }
561
+ const merged = [];
562
+ for (const [key, score] of scores) {
563
+ merged.push({
564
+ key,
565
+ score,
566
+ contribs: contribs.get(key) ?? {},
567
+ payload: payloads.get(key)
568
+ });
569
+ }
570
+ merged.sort((a, b) => {
571
+ if (b.score !== a.score)
572
+ return b.score - a.score;
573
+ return (inserted.get(a.key) ?? 0) - (inserted.get(b.key) ?? 0);
574
+ });
575
+ return options.limit !== void 0 && options.limit >= 0 ? merged.slice(0, options.limit) : merged;
576
+ }
577
+
31
578
  // dist/types.js
32
579
  var MEMORY_SCOPES = [
33
580
  "system",
@@ -131,11 +678,54 @@ CREATE TRIGGER IF NOT EXISTS agent_memory_au AFTER UPDATE ON agent_memory BEGIN
131
678
  VALUES (new.rowid, new.key, new.value);
132
679
  END;
133
680
  `;
681
+ var VEC_SCHEMA = `
682
+ CREATE TABLE IF NOT EXISTS agent_memory_vec (
683
+ record_id TEXT PRIMARY KEY,
684
+ vector BLOB NOT NULL,
685
+ dim INTEGER NOT NULL
686
+ );
687
+ CREATE TABLE IF NOT EXISTS embedding_meta (
688
+ id INTEGER PRIMARY KEY CHECK (id = 1),
689
+ fingerprint TEXT NOT NULL,
690
+ dim INTEGER NOT NULL
691
+ );
692
+ CREATE TRIGGER IF NOT EXISTS agent_memory_vec_ad AFTER DELETE ON agent_memory BEGIN
693
+ DELETE FROM agent_memory_vec WHERE record_id = old.id;
694
+ END;
695
+ `;
696
+ function vectorToBlob(vector) {
697
+ return Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength);
698
+ }
699
+ function blobToVector(blob) {
700
+ const out = new Float32Array(blob.byteLength / 4);
701
+ for (let i = 0; i < out.length; i++)
702
+ out[i] = blob.readFloatLE(i * 4);
703
+ return out;
704
+ }
705
+ function cosine(a, b) {
706
+ let dot = 0;
707
+ let na = 0;
708
+ let nb = 0;
709
+ for (let i = 0; i < a.length; i++) {
710
+ const x = a[i];
711
+ const y = b[i];
712
+ dot += x * y;
713
+ na += x * x;
714
+ nb += y * y;
715
+ }
716
+ if (na === 0 || nb === 0)
717
+ return 0;
718
+ return dot / (Math.sqrt(na) * Math.sqrt(nb));
719
+ }
720
+ function fallbackSnippet(value, cap = 200) {
721
+ const flat = value.replace(/\s+/g, " ").trim();
722
+ return flat.length > cap ? `${flat.slice(0, cap)} \u2026` : flat;
723
+ }
134
724
  function toFtsQuery(query) {
135
725
  const tokens = query.split(/\s+/).map((t) => t.replace(/"/g, "").trim()).filter((t) => t.length > 0);
136
726
  if (tokens.length === 0)
137
727
  return "";
138
- return tokens.map((t) => `"${t}" *`).join(" AND ");
728
+ return tokens.map((t) => `"${t}" *`).join(" OR ");
139
729
  }
140
730
  function rowToRecord(row) {
141
731
  return {
@@ -155,8 +745,11 @@ function rowToRecord(row) {
155
745
  }
156
746
  var SqliteMemoryBackend = class {
157
747
  db;
748
+ embedder;
749
+ /** Record ids whose vector is missing/stale, awaiting a batched embed. */
750
+ pendingEmbed = /* @__PURE__ */ new Set();
158
751
  closed = false;
159
- constructor(dbPath) {
752
+ constructor(dbPath, opts = {}) {
160
753
  if (dbPath !== ":memory:") {
161
754
  mkdirSync(dirname(dbPath), { recursive: true });
162
755
  }
@@ -164,7 +757,24 @@ var SqliteMemoryBackend = class {
164
757
  this.db.pragma("journal_mode = WAL");
165
758
  this.db.exec(SCHEMA);
166
759
  this.db.exec(FTS_SCHEMA);
760
+ this.db.exec(VEC_SCHEMA);
167
761
  this.db.exec(`INSERT INTO agent_memory_fts (agent_memory_fts) VALUES ('rebuild')`);
762
+ this.embedder = opts.embedder;
763
+ if (this.embedder !== void 0)
764
+ this.queueUnembedded();
765
+ }
766
+ /**
767
+ * Queue every record lacking a stored vector for a (re)embed. Run at
768
+ * open so a DB first built keyword-only — or one whose vectors were
769
+ * wiped after an embedding-space change — lazily catches up on the next
770
+ * `search` instead of needing a manual reindex.
771
+ */
772
+ queueUnembedded() {
773
+ const rows = this.db.prepare(`SELECT m.id AS id FROM agent_memory m
774
+ LEFT JOIN agent_memory_vec v ON v.record_id = m.id
775
+ WHERE v.record_id IS NULL`).all();
776
+ for (const r of rows)
777
+ this.pendingEmbed.add(r.id);
168
778
  }
169
779
  audit(action, fields, detail) {
170
780
  this.db.prepare(`INSERT INTO audit_log (ts, action, scope, scope_id, type, key, detail)
@@ -177,6 +787,8 @@ var SqliteMemoryBackend = class {
177
787
  this.db.prepare(`UPDATE agent_memory SET value = ?, confidence = ?, source = ?, tags = ?, updated_at = ?
178
788
  WHERE id = ?`).run(input.value, input.confidence ?? null, input.source ?? existing.source, input.tags ? JSON.stringify(input.tags) : existing.tags, now, existing.id);
179
789
  this.audit("write.update", input);
790
+ if (this.embedder !== void 0)
791
+ this.pendingEmbed.add(existing.id);
180
792
  return this.byId(existing.id);
181
793
  }
182
794
  const id = randomUUID();
@@ -184,6 +796,8 @@ var SqliteMemoryBackend = class {
184
796
  (id, project_id, scope, scope_id, type, key, value, confidence, source, tags, use_count, created_at, updated_at)
185
797
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, ?, ?)`).run(id, input.projectId, input.scope, input.scopeId, input.type, input.key, input.value, input.confidence ?? null, input.source ?? null, input.tags ? JSON.stringify(input.tags) : null, now, now);
186
798
  this.audit("write.insert", input);
799
+ if (this.embedder !== void 0)
800
+ this.pendingEmbed.add(id);
187
801
  return this.byId(id);
188
802
  }
189
803
  byId(id) {
@@ -234,9 +848,40 @@ var SqliteMemoryBackend = class {
234
848
  const rows = this.db.prepare(sql).all(...params);
235
849
  return rows.map(rowToRecord);
236
850
  }
851
+ /**
852
+ * Hybrid search: FTS5 BM25 (keyword) ⊕ vector cosine (semantic), fused
853
+ * via RRF. The vector channel is best-effort — when no embedder is
854
+ * configured, or it is unreachable (offline / no key / proxy error),
855
+ * the method degrades to pure keyword search, byte-for-byte the Phase-1
856
+ * behaviour. This is the local-first guarantee: semantic recall is an
857
+ * enhancement, never a hard dependency.
858
+ */
237
859
  async search(input) {
238
860
  if (input.chain.length === 0)
239
861
  return [];
862
+ const finalLimit = input.limit ?? 20;
863
+ const poolLimit = Math.max(finalLimit * 4, 40);
864
+ const ftsHits = this.ftsSearch(input, poolLimit);
865
+ let vecHits = [];
866
+ if (this.embedder !== void 0) {
867
+ try {
868
+ vecHits = await this.vectorSearch(input, poolLimit);
869
+ } catch {
870
+ vecHits = [];
871
+ }
872
+ }
873
+ if (vecHits.length === 0)
874
+ return ftsHits.slice(0, finalLimit);
875
+ if (ftsHits.length === 0)
876
+ return vecHits.slice(0, finalLimit);
877
+ const fused = reciprocalRankFusion([
878
+ { id: "fts", items: ftsHits.map((h) => ({ key: h.record.id, payload: h })) },
879
+ { id: "vec", items: vecHits.map((h) => ({ key: h.record.id, payload: h })) }
880
+ ], { limit: finalLimit });
881
+ return fused.map((f) => f.payload);
882
+ }
883
+ /** FTS5 BM25 keyword channel → ranked hits (best first). */
884
+ ftsSearch(input, limit) {
240
885
  const match = toFtsQuery(input.query);
241
886
  if (match === "")
242
887
  return [];
@@ -253,12 +898,131 @@ var SqliteMemoryBackend = class {
253
898
  params.push(...input.types);
254
899
  }
255
900
  sql += ` ORDER BY rank LIMIT ?`;
256
- params.push(input.limit ?? 20);
901
+ params.push(limit);
257
902
  const rows = this.db.prepare(sql).all(...params);
258
- return rows.map((row) => ({
259
- record: rowToRecord(row),
260
- snippet: row.snip
261
- }));
903
+ return rows.map((row) => ({ record: rowToRecord(row), snippet: row.snip }));
904
+ }
905
+ /**
906
+ * Vector channel: brute-force cosine of the query vector against every
907
+ * in-scope stored vector (memory sets are tiny — no ANN index needed).
908
+ * Flushes pending embeds first so freshly written records are searchable.
909
+ * May throw on an embed failure; the caller (`search`) catches it.
910
+ */
911
+ async vectorSearch(input, limit) {
912
+ if (this.embedder === void 0)
913
+ return [];
914
+ await this.flushEmbeddings();
915
+ const scopePairs = input.chain.map(() => `(m.scope = ? AND m.scope_id = ?)`).join(" OR ");
916
+ const params = [];
917
+ params.push(...input.chain.flatMap((l) => [l.scope, l.scopeId]));
918
+ let sql = `
919
+ SELECT m.*, v.vector AS vec FROM agent_memory_vec v
920
+ JOIN agent_memory m ON m.id = v.record_id
921
+ WHERE (${scopePairs})`;
922
+ if (input.types && input.types.length > 0) {
923
+ sql += ` AND m.type IN (${input.types.map(() => "?").join(", ")})`;
924
+ params.push(...input.types);
925
+ }
926
+ const rows = this.db.prepare(sql).all(...params);
927
+ if (rows.length === 0)
928
+ return [];
929
+ const embedded = await this.embedder.embed([input.query], { inputType: "query" });
930
+ const queryVec = embedded[0];
931
+ if (queryVec === void 0)
932
+ return [];
933
+ const scored = [];
934
+ for (const row of rows) {
935
+ const vec = blobToVector(row.vec);
936
+ if (vec.length !== queryVec.length)
937
+ continue;
938
+ const score = cosine(queryVec, vec);
939
+ if (!(score > 0))
940
+ continue;
941
+ const record = rowToRecord(row);
942
+ scored.push({
943
+ hit: { record, snippet: fallbackSnippet(record.value) },
944
+ score
945
+ });
946
+ }
947
+ scored.sort((a, b) => b.score - a.score);
948
+ return scored.slice(0, limit).map((s) => s.hit);
949
+ }
950
+ /** Read the pinned embedding-space identity, if any. */
951
+ getEmbeddingMeta() {
952
+ const row = this.db.prepare(`SELECT fingerprint, dim FROM embedding_meta WHERE id = 1`).get();
953
+ return row ?? null;
954
+ }
955
+ /** Pin (or re-pin) the embedding-space fingerprint + dimension. */
956
+ setEmbeddingMeta(fingerprint, dim) {
957
+ this.db.prepare(`INSERT INTO embedding_meta (id, fingerprint, dim) VALUES (1, ?, ?)
958
+ ON CONFLICT(id) DO UPDATE SET fingerprint = excluded.fingerprint, dim = excluded.dim`).run(fingerprint, dim);
959
+ }
960
+ /** Drop every stored vector + the pinned space; re-queue all records. */
961
+ resetVectorSpace() {
962
+ this.db.exec(`DELETE FROM agent_memory_vec; DELETE FROM embedding_meta;`);
963
+ this.pendingEmbed.clear();
964
+ this.queueUnembedded();
965
+ }
966
+ /**
967
+ * Batch-embed every pending record's `value` and store the vectors.
968
+ * Lazy (only called from `vectorSearch`) and best-effort: a network/key
969
+ * failure throws and leaves rows pending (FTS-only fallback); a proxy
970
+ * embedding-space change — drift mid-run, or a cross-run fingerprint
971
+ * mismatch — wipes stale vectors and re-embeds in the new space so the
972
+ * store is never a mix of vector spaces.
973
+ */
974
+ async flushEmbeddings() {
975
+ if (this.embedder === void 0 || this.pendingEmbed.size === 0)
976
+ return;
977
+ const load = (ids) => {
978
+ if (ids.length === 0)
979
+ return [];
980
+ const ph = ids.map(() => "?").join(", ");
981
+ return this.db.prepare(`SELECT id, value FROM agent_memory WHERE id IN (${ph})`).all(...ids);
982
+ };
983
+ let rows = load([...this.pendingEmbed]);
984
+ const live = new Set(rows.map((r) => r.id));
985
+ for (const id of [...this.pendingEmbed])
986
+ if (!live.has(id))
987
+ this.pendingEmbed.delete(id);
988
+ if (rows.length === 0)
989
+ return;
990
+ let vectors;
991
+ try {
992
+ vectors = await this.embedder.embed(rows.map((r) => r.value), { inputType: "document" });
993
+ } catch (err) {
994
+ if (err.code === "EMBEDDING_DRIFT") {
995
+ this.resetVectorSpace();
996
+ rows = load([...this.pendingEmbed]);
997
+ if (rows.length === 0)
998
+ return;
999
+ vectors = await this.embedder.embed(rows.map((r) => r.value), { inputType: "document" });
1000
+ } else {
1001
+ throw err;
1002
+ }
1003
+ }
1004
+ const fingerprint = this.embedder.model;
1005
+ const dim = this.embedder.dimension;
1006
+ const meta = this.getEmbeddingMeta();
1007
+ if (meta !== null && meta.fingerprint !== fingerprint) {
1008
+ this.db.exec(`DELETE FROM agent_memory_vec;`);
1009
+ this.setEmbeddingMeta(fingerprint, dim);
1010
+ this.pendingEmbed.clear();
1011
+ this.queueUnembedded();
1012
+ for (const r of rows)
1013
+ this.pendingEmbed.delete(r.id);
1014
+ } else if (meta === null) {
1015
+ this.setEmbeddingMeta(fingerprint, dim);
1016
+ }
1017
+ const upsert = this.db.prepare(`INSERT INTO agent_memory_vec (record_id, vector, dim) VALUES (?, ?, ?)
1018
+ ON CONFLICT(record_id) DO UPDATE SET vector = excluded.vector, dim = excluded.dim`);
1019
+ const store = this.db.transaction((items) => {
1020
+ for (const it of items)
1021
+ upsert.run(it.id, vectorToBlob(it.vec), dim);
1022
+ });
1023
+ store(rows.map((r, i) => ({ id: r.id, vec: vectors[i] })));
1024
+ for (const r of rows)
1025
+ this.pendingEmbed.delete(r.id);
262
1026
  }
263
1027
  async delete(input) {
264
1028
  const result = this.db.prepare(`DELETE FROM agent_memory
@@ -328,21 +1092,79 @@ function projectIdFor(workspaceRoot) {
328
1092
  function defaultMemoryDbPath() {
329
1093
  return join(homedir(), ".prometheus", "memory.db");
330
1094
  }
1095
+ function intEnv(env, name, def) {
1096
+ const raw = env[name];
1097
+ if (raw === void 0 || raw === "")
1098
+ return def;
1099
+ const n = Number.parseInt(raw, 10);
1100
+ return Number.isFinite(n) ? n : def;
1101
+ }
1102
+ function buildVoyageEmbedder(env, apiKey) {
1103
+ return new OpenAICompatEmbeddingProvider({
1104
+ name: "voyage",
1105
+ model: env.VOYAGE_MODEL ?? "voyage-3-large",
1106
+ dimension: intEnv(env, "VOYAGE_DIM", 1024),
1107
+ region: "us",
1108
+ baseUrl: env.VOYAGE_BASE_URL ?? "https://api.voyageai.com/v1",
1109
+ apiKey,
1110
+ omitEncodingFormat: true,
1111
+ // Voyage caps a single request's summed input tokens; estimate-batch to
1112
+ // stay safely under it (same knobs/rationale as context-mcp).
1113
+ maxBatchTokens: intEnv(env, "VOYAGE_MAX_BATCH_TOKENS", 9e4),
1114
+ charsPerToken: intEnv(env, "VOYAGE_CHARS_PER_TOKEN", 2),
1115
+ // Voyage free tier rate-limits at 3 RPM → longer backoff than the default.
1116
+ maxRetries: intEnv(env, "VOYAGE_MAX_RETRIES", 6),
1117
+ retryBaseMs: intEnv(env, "VOYAGE_RETRY_BASE_MS", 2e3)
1118
+ });
1119
+ }
1120
+ function discoverMemoryEmbedder(env) {
1121
+ const mode = (env.PROMETHEUS_MEMORY_EMBED ?? "auto").toLowerCase();
1122
+ const baseUrl = env.PROMETHEUS_API_URL;
1123
+ const proxyOpts = (apiKey) => new PrometheusEmbeddingProvider({
1124
+ apiKey,
1125
+ ...baseUrl !== void 0 && baseUrl !== "" ? { baseUrl } : {}
1126
+ });
1127
+ if (mode === "off")
1128
+ return { id: "none", embedder: void 0 };
1129
+ if (mode === "voyage") {
1130
+ const key = env.VOYAGE_API_KEY;
1131
+ if (key === void 0 || key === "") {
1132
+ throw new Error('PROMETHEUS_MEMORY_EMBED="voyage" requires VOYAGE_API_KEY to be set.');
1133
+ }
1134
+ return { id: "voyage", embedder: buildVoyageEmbedder(env, key) };
1135
+ }
1136
+ if (mode === "prometheus") {
1137
+ const apiKey = requireApiKey(env);
1138
+ return { id: "prometheus", embedder: proxyOpts(apiKey) };
1139
+ }
1140
+ const voyageKey = env.VOYAGE_API_KEY;
1141
+ if (voyageKey !== void 0 && voyageKey !== "") {
1142
+ return { id: "voyage", embedder: buildVoyageEmbedder(env, voyageKey) };
1143
+ }
1144
+ const promKey = env[API_KEY_ENV]?.trim();
1145
+ if (promKey !== void 0 && promKey !== "") {
1146
+ const apiKey = requireApiKey(env);
1147
+ return { id: "prometheus", embedder: proxyOpts(apiKey) };
1148
+ }
1149
+ return { id: "none", embedder: void 0 };
1150
+ }
331
1151
  function composeFromEnv(opts) {
332
1152
  const env = opts.env;
333
- requireApiKey(env);
334
1153
  const workspaceRoot = resolve(env.PROMETHEUS_WORKSPACE_ROOT ?? process.cwd());
335
1154
  const projectId = projectIdFor(workspaceRoot);
336
1155
  const projectName = basename(workspaceRoot) || workspaceRoot;
337
1156
  const rawDbPath = env.PROMETHEUS_MEMORY_DB_PATH;
338
1157
  const dbPath = rawDbPath !== void 0 && rawDbPath !== "" ? rawDbPath : defaultMemoryDbPath();
339
- const backend = new SqliteMemoryBackend(dbPath);
1158
+ const { id: embedderId, embedder } = discoverMemoryEmbedder(env);
1159
+ const backend = new SqliteMemoryBackend(dbPath, embedder !== void 0 ? { embedder } : {});
340
1160
  return {
341
1161
  backend,
342
1162
  workspaceRoot,
343
1163
  projectId,
344
1164
  projectName,
345
1165
  dbPath,
1166
+ embeddingsEnabled: embedder !== void 0,
1167
+ embedderId,
346
1168
  close: () => backend.close()
347
1169
  };
348
1170
  }
@@ -743,7 +1565,7 @@ var setupInput = {
743
1565
  };
744
1566
  function registerTools(server, deps) {
745
1567
  const { backend, workspaceRoot, projectId, projectName, dbPath } = deps;
746
- server.registerTool("memory_read", {
1568
+ server.registerTool("read", {
747
1569
  title: "Recall agent memory",
748
1570
  description: "Read agent memory for this project along the scope chain (project \u2192 workspace \u2192 tenant \u2192 system; narrowest scope wins). Syncs `.prometheus/memories/*.md` first, then returns the resolved records plus a prompt-ready `woven` markdown block (token-capped). Call this at the START of a session or task to recall what earlier sessions learned.",
749
1571
  inputSchema: readInput
@@ -763,7 +1585,7 @@ function registerTools(server, deps) {
763
1585
  records: records.map(recordToJson)
764
1586
  });
765
1587
  });
766
- server.registerTool("memory_write", {
1588
+ server.registerTool("write", {
767
1589
  title: "Store agent memory",
768
1590
  description: "Upsert one memory record (identity: scope+type+key). Use type `semantic` for durable facts, `procedural` for how-to knowledge, `episodic` for session events, `working` for short-lived notes. Default scope `project` also mirrors the value to `.prometheus/memories/<key>.md` (git-versioned, human-editable). Values matching the secret deny-list are rejected. Call this whenever the user states a durable preference, decision, or correction worth remembering.",
769
1591
  inputSchema: writeInput
@@ -790,7 +1612,7 @@ ${args.value}`);
790
1612
  }
791
1613
  return textResult({ record: recordToJson(record), projectFile });
792
1614
  });
793
- server.registerTool("memory_capture", {
1615
+ server.registerTool("capture", {
794
1616
  title: "Consolidate session learnings",
795
1617
  description: "Session-end consolidation: `plan`/`outcome` become one episodic record (key = sessionId), `facts` become semantic upserts, `procedures` become procedural upserts. Secret-bearing payloads are rejected. Call this at the END of a session to persist what was learned.",
796
1618
  inputSchema: captureInput
@@ -818,7 +1640,7 @@ ${p.value}`)
818
1640
  });
819
1641
  return textResult({ written: written.map(recordToJson) });
820
1642
  });
821
- server.registerTool("memory_search", {
1643
+ server.registerTool("search", {
822
1644
  title: "Search agent memory",
823
1645
  description: "Full-text search (FTS5) over memory keys and values within this project's scope chain, ranked by relevance. Returns matching records plus a highlighted snippet per hit. Use this when memory_read's recall is not specific enough. Does not bump useCount.",
824
1646
  inputSchema: searchInput
@@ -840,7 +1662,7 @@ ${p.value}`)
840
1662
  }))
841
1663
  });
842
1664
  });
843
- server.registerTool("memory_list", {
1665
+ server.registerTool("list", {
844
1666
  title: "List stored memory (admin)",
845
1667
  description: "Flat listing of this project's memory records without scope resolution \u2014 inspection/debug surface. Optional filters: scope, type, keyContains (case-insensitive substring).",
846
1668
  inputSchema: listInput
@@ -860,7 +1682,7 @@ ${p.value}`)
860
1682
  records: records.map(recordToJson)
861
1683
  });
862
1684
  });
863
- server.registerTool("memory_delete", {
1685
+ server.registerTool("delete", {
864
1686
  title: "Delete stored memory",
865
1687
  description: "Delete one memory record by identity (scope+type+key). For project-scoped semantic records the mirrored `.prometheus/memories/<key>.md` file is removed as well. Returns whether a record/file was actually removed.",
866
1688
  inputSchema: deleteInput
@@ -879,7 +1701,7 @@ ${p.value}`)
879
1701
  }
880
1702
  return textResult({ removed, fileRemoved });
881
1703
  });
882
- server.registerTool("memory_setup", {
1704
+ server.registerTool("setup", {
883
1705
  title: "Install memory rules into runtime configs",
884
1706
  description: "Idempotently install the Prometheus memory-protocol rule block into agent runtime configs in this workspace: CLAUDE.md (claude-code), .cursor/rules/prometheus-memory.mdc (cursor), .augment/rules/prometheus-memory.md (augment), AGENTS.md (agents). Without `runtimes` it auto-detects which runtimes are present (fallback: agents). Only the marked block is written \u2014 existing content is never touched. Re-running updates the block in place.",
885
1707
  inputSchema: setupInput
@@ -914,7 +1736,7 @@ function createServer(deps, options = {}) {
914
1736
  // dist/bin.js
915
1737
  async function main() {
916
1738
  const composed = composeFromEnv({ env: process.env });
917
- process.stderr.write(`prometheus-memory-mcp: workspace=${composed.workspaceRoot} project=${composed.projectName} (${composed.projectId}) db=${composed.dbPath}
1739
+ process.stderr.write(`prometheus-memory-mcp: workspace=${composed.workspaceRoot} project=${composed.projectName} (${composed.projectId}) db=${composed.dbPath} embed=${composed.embedderId}${composed.embeddingsEnabled ? "" : " (keyword-only)"}
918
1740
  `);
919
1741
  const server = createServer(composed);
920
1742
  const transport = new StdioServerTransport();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@prom.codes/memory-mcp",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "Prometheus Agent Memory — persistent, local-first agent memory as an MCP server.",
5
5
  "type": "module",
6
6
  "bin": {