npm - @prom.codes/memory-mcp - Versions diffs - 0.1.0 → 0.2.0 - Mend

@prom.codes/memory-mcp 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -10,28 +10,42 @@ as git-versioned markdown under `.prometheus/memories/` in your repo.
 ## Quick start
 ```jsonc
-// Claude Desktop / Cursor MCP config
+// Claude Desktop / Cursor MCP config — dock under the server name `memory`
+// so the tools resolve to memory_read / memory_write / … (no double prefix).
 {
   "mcpServers": {
-    "prometheus-memory": {
+    "memory": {
       "command": "npx",
       "args": ["-y", "@prom.codes/memory-mcp@latest"],
       "env": {
-        "PROMETHEUS_API_KEY": "prom_live_…",
-        "PROMETHEUS_WORKSPACE_ROOT": "/absolute/path/to/your/repo"
+        "PROMETHEUS_WORKSPACE_ROOT": "/absolute/path/to/your/repo",
+        "VOYAGE_API_KEY": "pa-…  // optional: enables semantic recall (search)"
       }
     }
   }
 }
 ```
+No API key is required to start: the server boots in keyword mode out of the
+box. Add a `VOYAGE_API_KEY` for semantic search (embeddings run Voyage-direct,
+fully local — only the memory text transits to Voyage), or a `prom_live_…`
+`PROMETHEUS_API_KEY` to route embeddings through the metered Prometheus proxy.
 Then ask your agent to run `memory_setup` once per workspace — it installs
 the memory protocol into your runtime rule files (CLAUDE.md, .cursor/rules,
 .augment/rules or AGENTS.md) so the agent reads memory at session start and
 captures learnings at session end.
-Tools: `memory_read`, `memory_write`, `memory_capture`, `memory_search`,
-`memory_list`, `memory_delete`, `memory_setup`. Secrets are rejected on
-every write. Your memories never leave your machine.
+Tools (docked as `memory`): `memory_read`, `memory_write`, `memory_capture`,
+`memory_search`, `memory_list`, `memory_delete`, `memory_setup`. Secrets are
+rejected on every write. Your memories never leave your machine.
+## Native modules
+Uses `better-sqlite3` (native). Prebuilt binaries are fetched automatically on
+common platforms (macOS x64/arm64, Linux x64/arm64, Windows x64) — no compiler
+needed. On an unsupported platform/Node ABI, install C/C++ build tools so the
+module can compile (Windows: `npm i -g windows-build-tools` or VS Build Tools).
+Requires Node ≥ 20.10.
 Docs: https://prom.codes/docs

package/dist/bin.js CHANGED Viewed

@@ -8,6 +8,503 @@ import { createHash } from "node:crypto";
 import { homedir } from "node:os";
 import { basename, join, resolve } from "node:path";
+// ../embeddings-openai-compat/dist/index.js
+var DEFAULT_BATCH = 96;
+var DEFAULT_RETRIES = 4;
+var DEFAULT_BACKOFF = 250;
+var DEFAULT_RETRY_MAX = 6e4;
+var DEFAULT_CONCURRENCY = 1;
+var DEFAULT_MAX_BATCH_TOKENS = 0;
+var DEFAULT_CHARS_PER_TOKEN = 4;
+function parseRetryAfterMs(value, now = Date.now()) {
+  if (value === null)
+    return null;
+  const trimmed = value.trim();
+  if (trimmed === "")
+    return null;
+  if (/^[0-9]+(\.[0-9]+)?$/.test(trimmed)) {
+    const secs = Number(trimmed);
+    if (!Number.isFinite(secs) || secs < 0)
+      return null;
+    return Math.round(secs * 1e3);
+  }
+  if (!/[A-Za-z]/.test(trimmed))
+    return null;
+  const ts = Date.parse(trimmed);
+  if (!Number.isFinite(ts))
+    return null;
+  const delta = ts - now;
+  return delta > 0 ? delta : 0;
+}
+function sleep(ms, signal) {
+  return new Promise((resolve2, reject) => {
+    if (signal?.aborted === true) {
+      reject(new Error("aborted"));
+      return;
+    }
+    const timer = setTimeout(() => {
+      signal?.removeEventListener("abort", onAbort);
+      resolve2();
+    }, ms);
+    const onAbort = () => {
+      clearTimeout(timer);
+      reject(new Error("aborted"));
+    };
+    signal?.addEventListener("abort", onAbort, { once: true });
+  });
+}
+var OpenAICompatEmbeddingProvider = class {
+  name;
+  model;
+  dimension;
+  region;
+  #baseUrl;
+  #apiKey;
+  #sendDimensions;
+  #omitEncodingFormat;
+  #batchSize;
+  #maxBatchTokens;
+  #charsPerToken;
+  #maxRetries;
+  #retryBaseMs;
+  #retryMaxMs;
+  #maxConcurrency;
+  #fetch;
+  constructor(opts) {
+    if (!Number.isInteger(opts.dimension) || opts.dimension <= 0) {
+      throw new Error(`OpenAICompatEmbeddingProvider: dimension must be a positive integer, got ${opts.dimension}`);
+    }
+    if (opts.maxConcurrency !== void 0 && (!Number.isInteger(opts.maxConcurrency) || opts.maxConcurrency <= 0)) {
+      throw new Error(`OpenAICompatEmbeddingProvider: maxConcurrency must be a positive integer, got ${opts.maxConcurrency}`);
+    }
+    if (opts.maxBatchTokens !== void 0 && (!Number.isFinite(opts.maxBatchTokens) || opts.maxBatchTokens < 0)) {
+      throw new Error(`OpenAICompatEmbeddingProvider: maxBatchTokens must be a non-negative number, got ${opts.maxBatchTokens}`);
+    }
+    if (opts.charsPerToken !== void 0 && (!Number.isFinite(opts.charsPerToken) || opts.charsPerToken <= 0)) {
+      throw new Error(`OpenAICompatEmbeddingProvider: charsPerToken must be a positive number, got ${opts.charsPerToken}`);
+    }
+    this.name = opts.name;
+    this.model = opts.model;
+    this.dimension = opts.dimension;
+    this.region = opts.region;
+    this.#baseUrl = opts.baseUrl.replace(/\/+$/, "");
+    this.#apiKey = opts.apiKey;
+    this.#sendDimensions = opts.sendDimensions ?? false;
+    this.#omitEncodingFormat = opts.omitEncodingFormat ?? false;
+    this.#batchSize = opts.batchSize ?? DEFAULT_BATCH;
+    this.#maxBatchTokens = opts.maxBatchTokens ?? DEFAULT_MAX_BATCH_TOKENS;
+    this.#charsPerToken = opts.charsPerToken ?? DEFAULT_CHARS_PER_TOKEN;
+    this.#maxRetries = opts.maxRetries ?? DEFAULT_RETRIES;
+    this.#retryBaseMs = opts.retryBaseMs ?? DEFAULT_BACKOFF;
+    this.#retryMaxMs = opts.retryMaxMs ?? DEFAULT_RETRY_MAX;
+    this.#maxConcurrency = opts.maxConcurrency ?? DEFAULT_CONCURRENCY;
+    this.#fetch = opts.fetch ?? fetch;
+  }
+  async embed(texts, opts) {
+    if (texts.length === 0)
+      return [];
+    const total = texts.length;
+    const out = new Array(total);
+    const onProgress = opts?.onProgress;
+    const batches = this.#planBatches(texts);
+    let doneCount = 0;
+    const emit = (batchSize) => {
+      if (onProgress === void 0)
+        return;
+      try {
+        onProgress({ done: doneCount, total, batchSize });
+      } catch {
+      }
+    };
+    const runOne = async (range) => {
+      const batch = texts.slice(range.start, range.start + range.count);
+      const vectors = await this.#embedBatch(batch, opts?.signal);
+      for (let i = 0; i < vectors.length; i++)
+        out[range.start + i] = vectors[i];
+      doneCount += batch.length;
+      emit(batch.length);
+    };
+    if (this.#maxConcurrency <= 1) {
+      for (const range of batches)
+        await runOne(range);
+    } else {
+      let next = 0;
+      const worker = async () => {
+        while (true) {
+          const idx = next++;
+          if (idx >= batches.length)
+            return;
+          await runOne(batches[idx]);
+        }
+      };
+      const workers = [];
+      const lanes = Math.min(this.#maxConcurrency, batches.length);
+      for (let i = 0; i < lanes; i++)
+        workers.push(worker());
+      await Promise.all(workers);
+    }
+    return out;
+  }
+  /**
+   * Partition `texts` into ordered `[start, count)` ranges. Each range is
+   * bounded by `#batchSize` (item count) and, when `#maxBatchTokens > 0`,
+   * by an *estimated* token budget (text length / `#charsPerToken`). A
+   * single text whose own estimate already exceeds the budget still gets
+   * its own one-item batch (the provider truncates it server-side) so the
+   * planner always makes forward progress.
+   */
+  #planBatches(texts) {
+    const total = texts.length;
+    const batches = [];
+    let start = 0;
+    while (start < total) {
+      let count = 0;
+      let tokens = 0;
+      while (start + count < total && count < this.#batchSize) {
+        const est = this.#maxBatchTokens > 0 ? Math.ceil(texts[start + count].length / this.#charsPerToken) : 0;
+        if (this.#maxBatchTokens > 0 && count > 0 && tokens + est > this.#maxBatchTokens) {
+          break;
+        }
+        tokens += est;
+        count += 1;
+      }
+      if (count === 0)
+        count = 1;
+      batches.push({ start, count });
+      start += count;
+    }
+    return batches;
+  }
+  async #embedBatch(batch, signal) {
+    const body = {
+      input: batch,
+      model: this.model
+    };
+    if (!this.#omitEncodingFormat)
+      body.encoding_format = "float";
+    if (this.#sendDimensions)
+      body.dimensions = this.dimension;
+    const headers = { "content-type": "application/json" };
+    if (this.#apiKey !== void 0 && this.#apiKey !== "") {
+      headers.authorization = `Bearer ${this.#apiKey}`;
+    }
+    const init = {
+      method: "POST",
+      headers,
+      body: JSON.stringify(body)
+    };
+    if (signal !== void 0)
+      init.signal = signal;
+    let attempt = 0;
+    let lastError = null;
+    while (attempt <= this.#maxRetries) {
+      try {
+        const res = await this.#fetch(`${this.#baseUrl}/embeddings`, init);
+        if (res.status === 429 || res.status >= 500 && res.status < 600) {
+          lastError = new Error(`${this.name}: HTTP ${res.status}`);
+          attempt += 1;
+          if (attempt > this.#maxRetries)
+            break;
+          const backoff = this.#computeBackoff(attempt, res.headers.get("retry-after"));
+          await sleep(backoff, signal);
+          continue;
+        }
+        if (!res.ok) {
+          const text = await res.text().catch(() => "");
+          const err = new Error(`${this.name}: HTTP ${res.status} ${res.statusText}${text === "" ? "" : ` \u2014 ${text}`}`);
+          err.nonRetryable = true;
+          throw err;
+        }
+        const payload = await res.json();
+        return this.#decode(payload, batch.length);
+      } catch (err) {
+        if (err?.name === "AbortError")
+          throw err;
+        if (err?.nonRetryable === true)
+          throw err;
+        if (attempt >= this.#maxRetries)
+          throw err;
+        lastError = err;
+        attempt += 1;
+        await sleep(this.#computeBackoff(attempt, null), signal);
+      }
+    }
+    throw lastError instanceof Error ? lastError : new Error(`${this.name}: exhausted ${this.#maxRetries} retries`);
+  }
+  /**
+   * Compute the per-attempt backoff. Exponential growth starts from
+   * `retryBaseMs` and doubles per attempt; a `Retry-After` header value
+   * (if any, parsed by {@link parseRetryAfterMs}) raises the floor so we
+   * never undercut a server-advertised wait; the result is capped at
+   * `retryMaxMs` to prevent unbounded stalls from misbehaving servers.
+   */
+  #computeBackoff(attempt, retryAfterHeader) {
+    const exp = this.#retryBaseMs * 2 ** Math.max(0, attempt - 1);
+    const advised = parseRetryAfterMs(retryAfterHeader);
+    const lower = advised === null ? exp : Math.max(exp, advised);
+    return Math.min(lower, this.#retryMaxMs);
+  }
+  #decode(payload, expected) {
+    if (!Array.isArray(payload.data) || payload.data.length !== expected) {
+      throw nonRetryable(`${this.name}: expected ${expected} embeddings, got ${payload.data?.length ?? 0}`);
+    }
+    const sorted = [...payload.data].sort((a, b) => a.index - b.index);
+    return sorted.map((row) => {
+      if (!Array.isArray(row.embedding) || row.embedding.length !== this.dimension) {
+        throw nonRetryable(`${this.name}: embedding length ${row.embedding?.length ?? 0} does not match declared dimension ${this.dimension}`);
+      }
+      return Float32Array.from(row.embedding);
+    });
+  }
+};
+function nonRetryable(message) {
+  const err = new Error(message);
+  err.nonRetryable = true;
+  return err;
+}
+// ../embeddings-prometheus/dist/index.js
+var PrometheusEmbeddingDriftError = class extends Error {
+  /**
+   * Stable string code consumers match on (`err.code === "EMBEDDING_DRIFT"`)
+   * instead of importing this class — avoids a package dependency from
+   * the indexer onto this adapter.
+   */
+  code = "EMBEDDING_DRIFT";
+  expected;
+  actual;
+  constructor(expected, actual) {
+    super(`prometheus-embed: embedding space changed upstream (fingerprint ${expected} -> ${actual}) \u2014 a full re-index is required`);
+    this.name = "PrometheusEmbeddingDriftError";
+    this.expected = expected;
+    this.actual = actual;
+  }
+};
+var DEFAULT_BASE = "https://api.prom.codes";
+var DEFAULT_BATCH2 = 128;
+var DEFAULT_BATCH_CHARS = 4e5;
+var DEFAULT_RETRIES2 = 4;
+var DEFAULT_BACKOFF2 = 250;
+function sleep2(ms, signal) {
+  return new Promise((resolve2, reject) => {
+    if (signal?.aborted === true) {
+      reject(new Error("aborted"));
+      return;
+    }
+    const timer = setTimeout(() => {
+      signal?.removeEventListener("abort", onAbort);
+      resolve2();
+    }, ms);
+    const onAbort = () => {
+      clearTimeout(timer);
+      reject(new Error("aborted"));
+    };
+    signal?.addEventListener("abort", onAbort, { once: true });
+  });
+}
+function nonRetryable2(message) {
+  const err = new Error(message);
+  err.nonRetryable = true;
+  return err;
+}
+var PrometheusEmbeddingProvider = class {
+  name;
+  region;
+  #apiKey;
+  #url;
+  #batchSize;
+  #maxBatchChars;
+  #maxRetries;
+  #retryBaseMs;
+  #fetch;
+  #identity = null;
+  #identityPromise = null;
+  #creditsUsed = 0;
+  constructor(opts) {
+    if (opts.apiKey === "") {
+      throw new Error("PrometheusEmbeddingProvider: apiKey is required");
+    }
+    this.name = opts.name ?? "prometheus";
+    this.region = opts.region ?? "eu";
+    this.#apiKey = opts.apiKey;
+    this.#url = `${(opts.baseUrl ?? DEFAULT_BASE).replace(/\/+$/, "")}/embed`;
+    this.#batchSize = opts.batchSize ?? DEFAULT_BATCH2;
+    this.#maxBatchChars = opts.maxBatchChars ?? DEFAULT_BATCH_CHARS;
+    this.#maxRetries = opts.maxRetries ?? DEFAULT_RETRIES2;
+    this.#retryBaseMs = opts.retryBaseMs ?? DEFAULT_BACKOFF2;
+    this.#fetch = opts.fetch ?? fetch;
+  }
+  /**
+   * The abstract fingerprint stands in for the (hidden) upstream model
+   * id. Throws until the identity has been resolved — call
+   * {@link resolveIdentity} (or `embed()`) first.
+   */
+  get model() {
+    return this.#requireIdentity().fingerprint;
+  }
+  /** Vector dimension. Throws until the identity has been resolved. */
+  get dimension() {
+    return this.#requireIdentity().dimension;
+  }
+  /** Cumulative credits charged across all embed calls of this instance. */
+  get creditsUsed() {
+    return this.#creditsUsed;
+  }
+  /**
+   * Resolve (and cache) the proxy's embedding-space identity via the
+   * free GET. Safe to call concurrently; a failed resolution is not
+   * cached, so callers may retry.
+   */
+  async resolveIdentity(signal) {
+    if (this.#identity !== null)
+      return this.#identity;
+    if (this.#identityPromise === null) {
+      this.#identityPromise = this.#fetchIdentity(signal);
+      this.#identityPromise.catch(() => {
+        this.#identityPromise = null;
+      });
+    }
+    return this.#identityPromise;
+  }
+  async embed(texts, opts) {
+    if (texts.length === 0)
+      return [];
+    const identity = await this.resolveIdentity(opts?.signal);
+    const out = new Array(texts.length);
+    let done = 0;
+    let start = 0;
+    while (start < texts.length) {
+      const end = this.#batchEnd(texts, start);
+      const batch = texts.slice(start, end);
+      const vectors = await this.#embedBatch(batch, identity, opts?.inputType ?? "document", opts?.signal);
+      for (let i = 0; i < vectors.length; i++)
+        out[start + i] = vectors[i];
+      done += batch.length;
+      if (opts?.onProgress !== void 0) {
+        try {
+          opts.onProgress({ done, total: texts.length, batchSize: batch.length });
+        } catch {
+        }
+      }
+      start = end;
+    }
+    return out;
+  }
+  #requireIdentity() {
+    if (this.#identity === null) {
+      throw new Error("PrometheusEmbeddingProvider: identity not resolved yet \u2014 await resolveIdentity() (or a first embed()) before reading model/dimension");
+    }
+    return this.#identity;
+  }
+  async #fetchIdentity(signal) {
+    const init = {
+      method: "GET",
+      headers: { authorization: `Bearer ${this.#apiKey}` }
+    };
+    if (signal !== void 0)
+      init.signal = signal;
+    const payload = await this.#requestJson(init, signal);
+    if (payload?.ok !== true || typeof payload.fingerprint !== "string" || payload.fingerprint === "" || !Number.isInteger(payload.dimension) || payload.dimension <= 0) {
+      throw nonRetryable2("prometheus-embed: malformed identity response");
+    }
+    const identity = {
+      fingerprint: payload.fingerprint,
+      dimension: payload.dimension
+    };
+    this.#identity = identity;
+    return identity;
+  }
+  /** Greedy batch cut respecting both the item cap and the char budget. */
+  #batchEnd(texts, start) {
+    let chars = 0;
+    let end = start;
+    while (end < texts.length && end - start < this.#batchSize) {
+      const len = texts[end].length;
+      if (end > start && chars + len > this.#maxBatchChars)
+        break;
+      chars += len;
+      end += 1;
+    }
+    return end;
+  }
+  async #embedBatch(batch, identity, inputType, signal) {
+    const init = {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+        authorization: `Bearer ${this.#apiKey}`
+      },
+      body: JSON.stringify({ input: batch, inputType })
+    };
+    if (signal !== void 0)
+      init.signal = signal;
+    const payload = await this.#requestJson(init, signal);
+    if (payload?.ok !== true || !Array.isArray(payload.embeddings)) {
+      throw nonRetryable2("prometheus-embed: malformed embed response");
+    }
+    if (typeof payload.fingerprint === "string" && payload.fingerprint !== identity.fingerprint) {
+      this.#identity = null;
+      this.#identityPromise = null;
+      throw new PrometheusEmbeddingDriftError(identity.fingerprint, payload.fingerprint);
+    }
+    if (payload.embeddings.length !== batch.length) {
+      throw nonRetryable2(`prometheus-embed: expected ${batch.length} embeddings, got ${payload.embeddings.length}`);
+    }
+    const sorted = [...payload.embeddings].sort((a, b) => a.index - b.index);
+    const vectors = sorted.map((row) => {
+      if (!Array.isArray(row.vector) || row.vector.length !== identity.dimension) {
+        throw nonRetryable2(`prometheus-embed: embedding length ${row.vector?.length ?? 0} does not match resolved dimension ${identity.dimension}`);
+      }
+      return Float32Array.from(row.vector);
+    });
+    const credits = payload.usage?.credits;
+    if (typeof credits === "number" && Number.isFinite(credits)) {
+      this.#creditsUsed += credits;
+    }
+    return vectors;
+  }
+  /**
+   * Fetch with retry. 5xx and network errors back off exponentially;
+   * everything else (401 invalid key, 413 oversized input, 429 quota
+   * exhausted — a *monthly* limit, retrying cannot help) fails fast
+   * with the proxy's error code in the message.
+   */
+  async #requestJson(init, signal) {
+    let attempt = 0;
+    let lastError = null;
+    while (attempt <= this.#maxRetries) {
+      try {
+        const res = await this.#fetch(this.#url, init);
+        if (res.status >= 500 && res.status < 600) {
+          lastError = new Error(`prometheus-embed: HTTP ${res.status}`);
+          attempt += 1;
+          if (attempt > this.#maxRetries)
+            break;
+          await sleep2(this.#retryBaseMs * 2 ** (attempt - 1), signal);
+          continue;
+        }
+        if (!res.ok) {
+          const body = await res.json().catch(() => null);
+          const detail = typeof body?.code === "string" ? `${body.code}${typeof body.error === "string" ? ` \u2014 ${body.error}` : ""}` : res.statusText;
+          throw nonRetryable2(`prometheus-embed: HTTP ${res.status} ${detail}`);
+        }
+        return await res.json();
+      } catch (err) {
+        if (err?.name === "AbortError")
+          throw err;
+        if (err?.nonRetryable === true)
+          throw err;
+        if (attempt >= this.#maxRetries)
+          throw err;
+        lastError = err;
+        attempt += 1;
+        await sleep2(this.#retryBaseMs * 2 ** (attempt - 1), signal);
+      }
+    }
+    throw lastError instanceof Error ? lastError : new Error(`prometheus-embed: exhausted ${this.#maxRetries} retries`);
+  }
+};
 // dist/api-key.js
 var KEY_PATTERN = /^prom_(live|test)_[A-Za-z0-9]{10,}$/;
 var API_KEY_ENV = "PROMETHEUS_API_KEY";
@@ -28,6 +525,56 @@ import { mkdirSync } from "node:fs";
 import { dirname } from "node:path";
 import Database from "better-sqlite3";
+// dist/rrf.js
+function reciprocalRankFusion(lists, options = {}) {
+  const k = options.k ?? 60;
+  if (!Number.isFinite(k) || k <= 0) {
+    throw new Error(`reciprocalRankFusion: k must be > 0, got ${k}`);
+  }
+  const scores = /* @__PURE__ */ new Map();
+  const contribs = /* @__PURE__ */ new Map();
+  const payloads = /* @__PURE__ */ new Map();
+  const inserted = /* @__PURE__ */ new Map();
+  let insertCounter = 0;
+  for (const list of lists) {
+    const weight = list.weight ?? 1;
+    const seenInList = /* @__PURE__ */ new Set();
+    let rank = 0;
+    for (const item of list.items) {
+      if (seenInList.has(item.key))
+        continue;
+      seenInList.add(item.key);
+      rank += 1;
+      const delta = weight / (k + rank);
+      scores.set(item.key, (scores.get(item.key) ?? 0) + delta);
+      const c = contribs.get(item.key);
+      if (c === void 0)
+        contribs.set(item.key, { [list.id]: delta });
+      else
+        c[list.id] = (c[list.id] ?? 0) + delta;
+      if (!payloads.has(item.key)) {
+        payloads.set(item.key, item.payload);
+        inserted.set(item.key, insertCounter++);
+      }
+    }
+  }
+  const merged = [];
+  for (const [key, score] of scores) {
+    merged.push({
+      key,
+      score,
+      contribs: contribs.get(key) ?? {},
+      payload: payloads.get(key)
+    });
+  }
+  merged.sort((a, b) => {
+    if (b.score !== a.score)
+      return b.score - a.score;
+    return (inserted.get(a.key) ?? 0) - (inserted.get(b.key) ?? 0);
+  });
+  return options.limit !== void 0 && options.limit >= 0 ? merged.slice(0, options.limit) : merged;
+}
 // dist/types.js
 var MEMORY_SCOPES = [
   "system",
@@ -131,11 +678,54 @@ CREATE TRIGGER IF NOT EXISTS agent_memory_au AFTER UPDATE ON agent_memory BEGIN
   VALUES (new.rowid, new.key, new.value);
 END;
 `;
+var VEC_SCHEMA = `
+CREATE TABLE IF NOT EXISTS agent_memory_vec (
+  record_id TEXT PRIMARY KEY,
+  vector    BLOB NOT NULL,
+  dim       INTEGER NOT NULL
+);
+CREATE TABLE IF NOT EXISTS embedding_meta (
+  id          INTEGER PRIMARY KEY CHECK (id = 1),
+  fingerprint TEXT NOT NULL,
+  dim         INTEGER NOT NULL
+);
+CREATE TRIGGER IF NOT EXISTS agent_memory_vec_ad AFTER DELETE ON agent_memory BEGIN
+  DELETE FROM agent_memory_vec WHERE record_id = old.id;
+END;
+`;
+function vectorToBlob(vector) {
+  return Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength);
+}
+function blobToVector(blob) {
+  const out = new Float32Array(blob.byteLength / 4);
+  for (let i = 0; i < out.length; i++)
+    out[i] = blob.readFloatLE(i * 4);
+  return out;
+}
+function cosine(a, b) {
+  let dot = 0;
+  let na = 0;
+  let nb = 0;
+  for (let i = 0; i < a.length; i++) {
+    const x = a[i];
+    const y = b[i];
+    dot += x * y;
+    na += x * x;
+    nb += y * y;
+  }
+  if (na === 0 || nb === 0)
+    return 0;
+  return dot / (Math.sqrt(na) * Math.sqrt(nb));
+}
+function fallbackSnippet(value, cap = 200) {
+  const flat = value.replace(/\s+/g, " ").trim();
+  return flat.length > cap ? `${flat.slice(0, cap)} \u2026` : flat;
+}
 function toFtsQuery(query) {
   const tokens = query.split(/\s+/).map((t) => t.replace(/"/g, "").trim()).filter((t) => t.length > 0);
   if (tokens.length === 0)
     return "";
-  return tokens.map((t) => `"${t}" *`).join(" AND ");
+  return tokens.map((t) => `"${t}" *`).join(" OR ");
 }
 function rowToRecord(row) {
   return {
@@ -155,8 +745,11 @@ function rowToRecord(row) {
 }
 var SqliteMemoryBackend = class {
   db;
+  embedder;
+  /** Record ids whose vector is missing/stale, awaiting a batched embed. */
+  pendingEmbed = /* @__PURE__ */ new Set();
   closed = false;
-  constructor(dbPath) {
+  constructor(dbPath, opts = {}) {
     if (dbPath !== ":memory:") {
       mkdirSync(dirname(dbPath), { recursive: true });
     }
@@ -164,7 +757,24 @@ var SqliteMemoryBackend = class {
     this.db.pragma("journal_mode = WAL");
     this.db.exec(SCHEMA);
     this.db.exec(FTS_SCHEMA);
+    this.db.exec(VEC_SCHEMA);
     this.db.exec(`INSERT INTO agent_memory_fts (agent_memory_fts) VALUES ('rebuild')`);
+    this.embedder = opts.embedder;
+    if (this.embedder !== void 0)
+      this.queueUnembedded();
+  }
+  /**
+   * Queue every record lacking a stored vector for a (re)embed. Run at
+   * open so a DB first built keyword-only — or one whose vectors were
+   * wiped after an embedding-space change — lazily catches up on the next
+   * `search` instead of needing a manual reindex.
+   */
+  queueUnembedded() {
+    const rows = this.db.prepare(`SELECT m.id AS id FROM agent_memory m
+         LEFT JOIN agent_memory_vec v ON v.record_id = m.id
+         WHERE v.record_id IS NULL`).all();
+    for (const r of rows)
+      this.pendingEmbed.add(r.id);
   }
   audit(action, fields, detail) {
     this.db.prepare(`INSERT INTO audit_log (ts, action, scope, scope_id, type, key, detail)
@@ -177,6 +787,8 @@ var SqliteMemoryBackend = class {
       this.db.prepare(`UPDATE agent_memory SET value = ?, confidence = ?, source = ?, tags = ?, updated_at = ?
            WHERE id = ?`).run(input.value, input.confidence ?? null, input.source ?? existing.source, input.tags ? JSON.stringify(input.tags) : existing.tags, now, existing.id);
       this.audit("write.update", input);
+      if (this.embedder !== void 0)
+        this.pendingEmbed.add(existing.id);
       return this.byId(existing.id);
     }
     const id = randomUUID();
@@ -184,6 +796,8 @@ var SqliteMemoryBackend = class {
            (id, project_id, scope, scope_id, type, key, value, confidence, source, tags, use_count, created_at, updated_at)
          VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, ?, ?)`).run(id, input.projectId, input.scope, input.scopeId, input.type, input.key, input.value, input.confidence ?? null, input.source ?? null, input.tags ? JSON.stringify(input.tags) : null, now, now);
     this.audit("write.insert", input);
+    if (this.embedder !== void 0)
+      this.pendingEmbed.add(id);
     return this.byId(id);
   }
   byId(id) {
@@ -234,9 +848,40 @@ var SqliteMemoryBackend = class {
     const rows = this.db.prepare(sql).all(...params);
     return rows.map(rowToRecord);
   }
+  /**
+   * Hybrid search: FTS5 BM25 (keyword) ⊕ vector cosine (semantic), fused
+   * via RRF. The vector channel is best-effort — when no embedder is
+   * configured, or it is unreachable (offline / no key / proxy error),
+   * the method degrades to pure keyword search, byte-for-byte the Phase-1
+   * behaviour. This is the local-first guarantee: semantic recall is an
+   * enhancement, never a hard dependency.
+   */
   async search(input) {
     if (input.chain.length === 0)
       return [];
+    const finalLimit = input.limit ?? 20;
+    const poolLimit = Math.max(finalLimit * 4, 40);
+    const ftsHits = this.ftsSearch(input, poolLimit);
+    let vecHits = [];
+    if (this.embedder !== void 0) {
+      try {
+        vecHits = await this.vectorSearch(input, poolLimit);
+      } catch {
+        vecHits = [];
+      }
+    }
+    if (vecHits.length === 0)
+      return ftsHits.slice(0, finalLimit);
+    if (ftsHits.length === 0)
+      return vecHits.slice(0, finalLimit);
+    const fused = reciprocalRankFusion([
+      { id: "fts", items: ftsHits.map((h) => ({ key: h.record.id, payload: h })) },
+      { id: "vec", items: vecHits.map((h) => ({ key: h.record.id, payload: h })) }
+    ], { limit: finalLimit });
+    return fused.map((f) => f.payload);
+  }
+  /** FTS5 BM25 keyword channel → ranked hits (best first). */
+  ftsSearch(input, limit) {
     const match = toFtsQuery(input.query);
     if (match === "")
       return [];
@@ -253,12 +898,131 @@ var SqliteMemoryBackend = class {
       params.push(...input.types);
     }
     sql += ` ORDER BY rank LIMIT ?`;
-    params.push(input.limit ?? 20);
+    params.push(limit);
     const rows = this.db.prepare(sql).all(...params);
-    return rows.map((row) => ({
-      record: rowToRecord(row),
-      snippet: row.snip
-    }));
+    return rows.map((row) => ({ record: rowToRecord(row), snippet: row.snip }));
+  }
+  /**
+   * Vector channel: brute-force cosine of the query vector against every
+   * in-scope stored vector (memory sets are tiny — no ANN index needed).
+   * Flushes pending embeds first so freshly written records are searchable.
+   * May throw on an embed failure; the caller (`search`) catches it.
+   */
+  async vectorSearch(input, limit) {
+    if (this.embedder === void 0)
+      return [];
+    await this.flushEmbeddings();
+    const scopePairs = input.chain.map(() => `(m.scope = ? AND m.scope_id = ?)`).join(" OR ");
+    const params = [];
+    params.push(...input.chain.flatMap((l) => [l.scope, l.scopeId]));
+    let sql = `
+      SELECT m.*, v.vector AS vec FROM agent_memory_vec v
+      JOIN agent_memory m ON m.id = v.record_id
+      WHERE (${scopePairs})`;
+    if (input.types && input.types.length > 0) {
+      sql += ` AND m.type IN (${input.types.map(() => "?").join(", ")})`;
+      params.push(...input.types);
+    }
+    const rows = this.db.prepare(sql).all(...params);
+    if (rows.length === 0)
+      return [];
+    const embedded = await this.embedder.embed([input.query], { inputType: "query" });
+    const queryVec = embedded[0];
+    if (queryVec === void 0)
+      return [];
+    const scored = [];
+    for (const row of rows) {
+      const vec = blobToVector(row.vec);
+      if (vec.length !== queryVec.length)
+        continue;
+      const score = cosine(queryVec, vec);
+      if (!(score > 0))
+        continue;
+      const record = rowToRecord(row);
+      scored.push({
+        hit: { record, snippet: fallbackSnippet(record.value) },
+        score
+      });
+    }
+    scored.sort((a, b) => b.score - a.score);
+    return scored.slice(0, limit).map((s) => s.hit);
+  }
+  /** Read the pinned embedding-space identity, if any. */
+  getEmbeddingMeta() {
+    const row = this.db.prepare(`SELECT fingerprint, dim FROM embedding_meta WHERE id = 1`).get();
+    return row ?? null;
+  }
+  /** Pin (or re-pin) the embedding-space fingerprint + dimension. */
+  setEmbeddingMeta(fingerprint, dim) {
+    this.db.prepare(`INSERT INTO embedding_meta (id, fingerprint, dim) VALUES (1, ?, ?)
+         ON CONFLICT(id) DO UPDATE SET fingerprint = excluded.fingerprint, dim = excluded.dim`).run(fingerprint, dim);
+  }
+  /** Drop every stored vector + the pinned space; re-queue all records. */
+  resetVectorSpace() {
+    this.db.exec(`DELETE FROM agent_memory_vec; DELETE FROM embedding_meta;`);
+    this.pendingEmbed.clear();
+    this.queueUnembedded();
+  }
+  /**
+   * Batch-embed every pending record's `value` and store the vectors.
+   * Lazy (only called from `vectorSearch`) and best-effort: a network/key
+   * failure throws and leaves rows pending (FTS-only fallback); a proxy
+   * embedding-space change — drift mid-run, or a cross-run fingerprint
+   * mismatch — wipes stale vectors and re-embeds in the new space so the
+   * store is never a mix of vector spaces.
+   */
+  async flushEmbeddings() {
+    if (this.embedder === void 0 || this.pendingEmbed.size === 0)
+      return;
+    const load = (ids) => {
+      if (ids.length === 0)
+        return [];
+      const ph = ids.map(() => "?").join(", ");
+      return this.db.prepare(`SELECT id, value FROM agent_memory WHERE id IN (${ph})`).all(...ids);
+    };
+    let rows = load([...this.pendingEmbed]);
+    const live = new Set(rows.map((r) => r.id));
+    for (const id of [...this.pendingEmbed])
+      if (!live.has(id))
+        this.pendingEmbed.delete(id);
+    if (rows.length === 0)
+      return;
+    let vectors;
+    try {
+      vectors = await this.embedder.embed(rows.map((r) => r.value), { inputType: "document" });
+    } catch (err) {
+      if (err.code === "EMBEDDING_DRIFT") {
+        this.resetVectorSpace();
+        rows = load([...this.pendingEmbed]);
+        if (rows.length === 0)
+          return;
+        vectors = await this.embedder.embed(rows.map((r) => r.value), { inputType: "document" });
+      } else {
+        throw err;
+      }
+    }
+    const fingerprint = this.embedder.model;
+    const dim = this.embedder.dimension;
+    const meta = this.getEmbeddingMeta();
+    if (meta !== null && meta.fingerprint !== fingerprint) {
+      this.db.exec(`DELETE FROM agent_memory_vec;`);
+      this.setEmbeddingMeta(fingerprint, dim);
+      this.pendingEmbed.clear();
+      this.queueUnembedded();
+      for (const r of rows)
+        this.pendingEmbed.delete(r.id);
+    } else if (meta === null) {
+      this.setEmbeddingMeta(fingerprint, dim);
+    }
+    const upsert = this.db.prepare(`INSERT INTO agent_memory_vec (record_id, vector, dim) VALUES (?, ?, ?)
+       ON CONFLICT(record_id) DO UPDATE SET vector = excluded.vector, dim = excluded.dim`);
+    const store = this.db.transaction((items) => {
+      for (const it of items)
+        upsert.run(it.id, vectorToBlob(it.vec), dim);
+    });
+    store(rows.map((r, i) => ({ id: r.id, vec: vectors[i] })));
+    for (const r of rows)
+      this.pendingEmbed.delete(r.id);
   }
   async delete(input) {
     const result = this.db.prepare(`DELETE FROM agent_memory
@@ -328,21 +1092,79 @@ function projectIdFor(workspaceRoot) {
 function defaultMemoryDbPath() {
   return join(homedir(), ".prometheus", "memory.db");
 }
+function intEnv(env, name, def) {
+  const raw = env[name];
+  if (raw === void 0 || raw === "")
+    return def;
+  const n = Number.parseInt(raw, 10);
+  return Number.isFinite(n) ? n : def;
+}
+function buildVoyageEmbedder(env, apiKey) {
+  return new OpenAICompatEmbeddingProvider({
+    name: "voyage",
+    model: env.VOYAGE_MODEL ?? "voyage-3-large",
+    dimension: intEnv(env, "VOYAGE_DIM", 1024),
+    region: "us",
+    baseUrl: env.VOYAGE_BASE_URL ?? "https://api.voyageai.com/v1",
+    apiKey,
+    omitEncodingFormat: true,
+    // Voyage caps a single request's summed input tokens; estimate-batch to
+    // stay safely under it (same knobs/rationale as context-mcp).
+    maxBatchTokens: intEnv(env, "VOYAGE_MAX_BATCH_TOKENS", 9e4),
+    charsPerToken: intEnv(env, "VOYAGE_CHARS_PER_TOKEN", 2),
+    // Voyage free tier rate-limits at 3 RPM → longer backoff than the default.
+    maxRetries: intEnv(env, "VOYAGE_MAX_RETRIES", 6),
+    retryBaseMs: intEnv(env, "VOYAGE_RETRY_BASE_MS", 2e3)
+  });
+}
+function discoverMemoryEmbedder(env) {
+  const mode = (env.PROMETHEUS_MEMORY_EMBED ?? "auto").toLowerCase();
+  const baseUrl = env.PROMETHEUS_API_URL;
+  const proxyOpts = (apiKey) => new PrometheusEmbeddingProvider({
+    apiKey,
+    ...baseUrl !== void 0 && baseUrl !== "" ? { baseUrl } : {}
+  });
+  if (mode === "off")
+    return { id: "none", embedder: void 0 };
+  if (mode === "voyage") {
+    const key = env.VOYAGE_API_KEY;
+    if (key === void 0 || key === "") {
+      throw new Error('PROMETHEUS_MEMORY_EMBED="voyage" requires VOYAGE_API_KEY to be set.');
+    }
+    return { id: "voyage", embedder: buildVoyageEmbedder(env, key) };
+  }
+  if (mode === "prometheus") {
+    const apiKey = requireApiKey(env);
+    return { id: "prometheus", embedder: proxyOpts(apiKey) };
+  }
+  const voyageKey = env.VOYAGE_API_KEY;
+  if (voyageKey !== void 0 && voyageKey !== "") {
+    return { id: "voyage", embedder: buildVoyageEmbedder(env, voyageKey) };
+  }
+  const promKey = env[API_KEY_ENV]?.trim();
+  if (promKey !== void 0 && promKey !== "") {
+    const apiKey = requireApiKey(env);
+    return { id: "prometheus", embedder: proxyOpts(apiKey) };
+  }
+  return { id: "none", embedder: void 0 };
+}
 function composeFromEnv(opts) {
   const env = opts.env;
-  requireApiKey(env);
   const workspaceRoot = resolve(env.PROMETHEUS_WORKSPACE_ROOT ?? process.cwd());
   const projectId = projectIdFor(workspaceRoot);
   const projectName = basename(workspaceRoot) || workspaceRoot;
   const rawDbPath = env.PROMETHEUS_MEMORY_DB_PATH;
   const dbPath = rawDbPath !== void 0 && rawDbPath !== "" ? rawDbPath : defaultMemoryDbPath();
-  const backend = new SqliteMemoryBackend(dbPath);
+  const { id: embedderId, embedder } = discoverMemoryEmbedder(env);
+  const backend = new SqliteMemoryBackend(dbPath, embedder !== void 0 ? { embedder } : {});
   return {
     backend,
     workspaceRoot,
     projectId,
     projectName,
     dbPath,
+    embeddingsEnabled: embedder !== void 0,
+    embedderId,
     close: () => backend.close()
   };
 }
@@ -743,7 +1565,7 @@ var setupInput = {
 };
 function registerTools(server, deps) {
   const { backend, workspaceRoot, projectId, projectName, dbPath } = deps;
-  server.registerTool("memory_read", {
+  server.registerTool("read", {
     title: "Recall agent memory",
     description: "Read agent memory for this project along the scope chain (project \u2192 workspace \u2192 tenant \u2192 system; narrowest scope wins). Syncs `.prometheus/memories/*.md` first, then returns the resolved records plus a prompt-ready `woven` markdown block (token-capped). Call this at the START of a session or task to recall what earlier sessions learned.",
     inputSchema: readInput
@@ -763,7 +1585,7 @@ function registerTools(server, deps) {
       records: records.map(recordToJson)
     });
   });
-  server.registerTool("memory_write", {
+  server.registerTool("write", {
     title: "Store agent memory",
     description: "Upsert one memory record (identity: scope+type+key). Use type `semantic` for durable facts, `procedural` for how-to knowledge, `episodic` for session events, `working` for short-lived notes. Default scope `project` also mirrors the value to `.prometheus/memories/<key>.md` (git-versioned, human-editable). Values matching the secret deny-list are rejected. Call this whenever the user states a durable preference, decision, or correction worth remembering.",
     inputSchema: writeInput
@@ -790,7 +1612,7 @@ ${args.value}`);
     }
     return textResult({ record: recordToJson(record), projectFile });
   });
-  server.registerTool("memory_capture", {
+  server.registerTool("capture", {
     title: "Consolidate session learnings",
     description: "Session-end consolidation: `plan`/`outcome` become one episodic record (key = sessionId), `facts` become semantic upserts, `procedures` become procedural upserts. Secret-bearing payloads are rejected. Call this at the END of a session to persist what was learned.",
     inputSchema: captureInput
@@ -818,7 +1640,7 @@ ${p.value}`)
     });
     return textResult({ written: written.map(recordToJson) });
   });
-  server.registerTool("memory_search", {
+  server.registerTool("search", {
     title: "Search agent memory",
     description: "Full-text search (FTS5) over memory keys and values within this project's scope chain, ranked by relevance. Returns matching records plus a highlighted snippet per hit. Use this when memory_read's recall is not specific enough. Does not bump useCount.",
     inputSchema: searchInput
@@ -840,7 +1662,7 @@ ${p.value}`)
       }))
     });
   });
-  server.registerTool("memory_list", {
+  server.registerTool("list", {
     title: "List stored memory (admin)",
     description: "Flat listing of this project's memory records without scope resolution \u2014 inspection/debug surface. Optional filters: scope, type, keyContains (case-insensitive substring).",
     inputSchema: listInput
@@ -860,7 +1682,7 @@ ${p.value}`)
       records: records.map(recordToJson)
     });
   });
-  server.registerTool("memory_delete", {
+  server.registerTool("delete", {
     title: "Delete stored memory",
     description: "Delete one memory record by identity (scope+type+key). For project-scoped semantic records the mirrored `.prometheus/memories/<key>.md` file is removed as well. Returns whether a record/file was actually removed.",
     inputSchema: deleteInput
@@ -879,7 +1701,7 @@ ${p.value}`)
     }
     return textResult({ removed, fileRemoved });
   });
-  server.registerTool("memory_setup", {
+  server.registerTool("setup", {
     title: "Install memory rules into runtime configs",
     description: "Idempotently install the Prometheus memory-protocol rule block into agent runtime configs in this workspace: CLAUDE.md (claude-code), .cursor/rules/prometheus-memory.mdc (cursor), .augment/rules/prometheus-memory.md (augment), AGENTS.md (agents). Without `runtimes` it auto-detects which runtimes are present (fallback: agents). Only the marked block is written \u2014 existing content is never touched. Re-running updates the block in place.",
     inputSchema: setupInput
@@ -914,7 +1736,7 @@ function createServer(deps, options = {}) {
 // dist/bin.js
 async function main() {
   const composed = composeFromEnv({ env: process.env });
-  process.stderr.write(`prometheus-memory-mcp: workspace=${composed.workspaceRoot} project=${composed.projectName} (${composed.projectId}) db=${composed.dbPath}
+  process.stderr.write(`prometheus-memory-mcp: workspace=${composed.workspaceRoot} project=${composed.projectName} (${composed.projectId}) db=${composed.dbPath} embed=${composed.embedderId}${composed.embeddingsEnabled ? "" : " (keyword-only)"}
 `);
   const server = createServer(composed);
   const transport = new StdioServerTransport();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@prom.codes/memory-mcp",
-  "version": "0.1.0",
+  "version": "0.2.0",
   "description": "Prometheus Agent Memory — persistent, local-first agent memory as an MCP server.",
   "type": "module",
   "bin": {