npm - @betterdb/memory - Versions diffs - 0.1.2 → 0.4.0 - Mend

@betterdb/memory 0.1.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md +101 -10
package/package.json +3 -1
package/scripts/aging-worker.ts +4 -1
package/scripts/docker-valkey.sh +101 -0
package/scripts/register-hooks.ts +94 -0
package/scripts/setup-index.ts +10 -3
package/scripts/unregister-hooks.ts +79 -0
package/src/client/memory-store.ts +406 -0
package/src/client/model.ts +10 -10
package/src/client/providers/local.ts +58 -0
package/src/client/valkey.ts +9 -0
package/src/config.ts +38 -6
package/src/hooks/post-tool.ts +2 -0
package/src/hooks/pre-tool.ts +12 -11
package/src/hooks/session-end.ts +14 -4
package/src/hooks/session-start.ts +33 -8
package/src/index.ts +379 -21
package/src/mcp/server.ts +82 -42
package/src/memory/aging.ts +78 -196
package/src/memory/recall.ts +169 -0
package/src/memory/retrieval.ts +73 -70

package/src/memory/recall.ts ADDED Viewed

@@ -0,0 +1,169 @@
+import { config } from "../config.js";
+import type { PluginMemoryStore, ScoredMemory } from "../client/memory-store.js";
+// Over-fetch → gate → narrow, mirroring the LongMemEval harness. Dense recall
+// is already ~95%; the gain is a real candidate set plus an honest gate, so
+// "found nothing" means "nothing cleared the gate" rather than an empty KNN.
+//
+// The gate is RELATIVE, not an absolute similarity threshold. Embed models
+// compress cosine similarity into different, narrow bands (mxbai-embed-large
+// packs everything into ~0.7–0.88; all-MiniLM differs), so a fixed tau doesn't
+// transfer across models. Instead: loosen the store's own distance gate to a
+// generous floor, drop genuine noise below that floor, then keep only the hits
+// within `margin` of the top match. Confidence comes from the top-vs-next gap,
+// which is scale-independent.
+export interface RecallResult {
+  hits: ScoredMemory[];
+  scope: "project" | "all";
+  /** 1: project+branch (or project) · 2: project · 3: cross-project · 0: nothing. */
+  rung: 0 | 1 | 2 | 3;
+  confidence: "high" | "low" | "none";
+  /**
+   * True on a miss when the caller asked to widen (`crossProjectRequested`) but
+   * `BETTERDB_ALLOW_CROSS_PROJECT` is off, so the cross-project rung never ran.
+   * Lets the formatter say "cross-project is disabled" instead of falsely
+   * offering a scope="all" retry the config would also refuse.
+   */
+  crossProjectBlocked: boolean;
+}
+/** Scoping for {@link escalatingRecall}. */
+export interface RecallQuery {
+  project: string;
+  /** Git branch (native thread scope). Rung 1 narrows to it when present. */
+  branch?: string;
+  /** Content-type filter (e.g. `["decision"]`) applied at every rung. */
+  tags?: string[];
+  /**
+   * Whether the caller wants to widen past the project (user consent / an
+   * explicit scope="all"). The cross-project rung *also* requires
+   * `BETTERDB_ALLOW_CROSS_PROJECT`; when requested but globally disabled the
+   * result is flagged {@link RecallResult.crossProjectBlocked}.
+   */
+  crossProjectRequested: boolean;
+}
+interface Gated {
+  hits: ScoredMemory[];
+  confidence: "high" | "low" | "none";
+}
+/** Keep hits within `margin` of the top match above `floor`; grade by gap. */
+function gate(pool: ScoredMemory[]): Gated {
+  const { floor, margin, separation } = config.recall;
+  const eligible = pool
+    .filter((h) => h.relevance >= floor)
+    .sort((a, b) => b.relevance - a.relevance);
+  if (eligible.length === 0) return { hits: [], confidence: "none" };
+  const top = eligible[0]!.relevance;
+  const hits = eligible.filter((h) => h.relevance >= top - margin);
+  const second = eligible[1]?.relevance ?? -Infinity;
+  // A clear peak above the rest → confident; a bunched cluster (e.g. many
+  // near-duplicate file-history entries) → low, honestly.
+  const confidence =
+    eligible.length === 1 || top - second >= separation ? "high" : "low";
+  return { hits, confidence };
+}
+/** Distance gate to hand the store so its strict default (0.25) doesn't
+ * pre-filter everything: similarity `floor` ↔ distance `2·(1 − floor)`. */
+function storeThreshold(): number {
+  return 2 * (1 - config.recall.floor);
+}
+/**
+ * Escalating recall, narrow → wide:
+ *   rung 1 — project + `branch` (when given), pool `poolK`. Same project and
+ *            branch is the most relevant scope; without a branch this is just
+ *            project scope.
+ *   rung 2 — project, any branch, wider pool `poolKWide`.
+ *   rung 3 — cross-project probe. Only when the caller requested widening AND
+ *            `BETTERDB_ALLOW_CROSS_PROJECT` is on, since another project's
+ *            memory is often noise or privacy-sensitive.
+ * Every rung is a speculative over-fetch, so all recalls set `reinforce:false`:
+ * the store reinforces its whole returned pool, but the gate then drops most of
+ * it, so reinforcing pre-gate would bump access counts on candidates the user
+ * never sees (and on entire pools of a miss), skewing composite ranking.
+ * A `tags` filter, when present, applies at every rung. Stops at the first rung
+ * that yields gated hits.
+ */
+export async function escalatingRecall(
+  store: PluginMemoryStore,
+  query: string,
+  q: RecallQuery,
+): Promise<RecallResult> {
+  const { poolK, poolKWide } = config.recall;
+  const threshold = storeThreshold();
+  const { project, branch, tags, crossProjectRequested } = q;
+  const crossProjectEnabled =
+    crossProjectRequested && config.recall.allowCrossProject;
+  // rung 1 — project + branch (most specific).
+  let pool = await store.recall(query, {
+    project,
+    ...(branch !== undefined ? { branch } : {}),
+    tags,
+    k: poolK,
+    threshold,
+    reinforce: false,
+  });
+  let g = gate(pool);
+  if (g.hits.length > 0) {
+    return {
+      hits: g.hits,
+      scope: "project",
+      rung: 1,
+      confidence: g.confidence,
+      crossProjectBlocked: false,
+    };
+  }
+  // rung 2 — project, any branch, wider pool.
+  pool = await store.recall(query, {
+    project,
+    tags,
+    k: poolKWide,
+    threshold,
+    reinforce: false,
+  });
+  g = gate(pool);
+  if (g.hits.length > 0) {
+    return {
+      hits: g.hits,
+      scope: "project",
+      rung: 2,
+      confidence: g.confidence,
+      crossProjectBlocked: false,
+    };
+  }
+  // rung 3 — cross-project probe.
+  if (crossProjectEnabled) {
+    pool = await store.recall(query, {
+      tags,
+      k: poolKWide,
+      threshold,
+      reinforce: false,
+    });
+    g = gate(pool);
+    if (g.hits.length > 0) {
+      return {
+        hits: g.hits,
+        scope: "all",
+        rung: 3,
+        confidence: g.confidence,
+        crossProjectBlocked: false,
+      };
+    }
+  }
+  return {
+    hits: [],
+    scope: crossProjectEnabled ? "all" : "project",
+    rung: 0,
+    confidence: "none",
+    crossProjectBlocked: crossProjectRequested && !config.recall.allowCrossProject,
+  };
+}

package/src/memory/retrieval.ts CHANGED Viewed

@@ -1,75 +1,9 @@
-import { config } from "../config.js";
-import type { ModelClient } from "../client/model.js";
-import type { ValkeyClient } from "../client/valkey.js";
 import type { EpisodicMemory } from "./schema.js";
-import { AgingPipeline } from "./aging.js";
+import type { RecallResult } from "./recall.js";
-// --- Memory Retriever ---
-export class MemoryRetriever {
-  private valkeyClient: ValkeyClient;
-  private modelClient: ModelClient;
-  private agingPipeline: AgingPipeline;
-  constructor(valkeyClient: ValkeyClient, modelClient: ModelClient) {
-    this.valkeyClient = valkeyClient;
-    this.modelClient = modelClient;
-    this.agingPipeline = new AgingPipeline(valkeyClient, modelClient);
-  }
-  async retrieve(
-    queryContext: string,
-    project: string,
-  ): Promise<EpisodicMemory[]> {
-    await this.maybeRunAging(project);
-    const embedding = await this.modelClient.embed(queryContext);
-    const topK = config.memory.maxContextMemories * 2;
-    const candidates = await this.valkeyClient.searchMemories(
-      embedding,
-      project,
-      topK,
-    );
-    const now = Date.now();
-    const scored = candidates
-      .filter((m) => m.importanceScore >= 0.1)
-      .map((m) => {
-        const daysSince =
-          (now - new Date(m.lastAccessed).getTime()) / (1000 * 60 * 60 * 24);
-        const recencyFactor = Math.pow(
-          config.memory.decayRate,
-          Math.max(daysSince, 0),
-        );
-        return {
-          memory: m,
-          score: m.importanceScore * recencyFactor,
-        };
-      })
-      .sort((a, b) => b.score - a.score)
-      .slice(0, config.memory.maxContextMemories);
-    // Fire-and-forget access increments
-    for (const { memory } of scored) {
-      this.valkeyClient.incrementAccess(memory.memoryId).catch(() => {});
-    }
-    return scored.map((s) => s.memory);
-  }
-  async maybeRunAging(project: string): Promise<void> {
-    const lastRun = await this.valkeyClient.getLastAgingRun();
-    const hoursAgo = lastRun
-      ? (Date.now() - lastRun.getTime()) / (1000 * 60 * 60)
-      : Infinity;
-    if (hoursAgo >= config.memory.agingIntervalHours) {
-      await this.agingPipeline.runDecay(project);
-      await this.valkeyClient.setLastAgingRun(new Date());
-    }
-  }
-}
+// Recall (KNN + composite recency/importance scoring + access reinforcement)
+// now lives in @betterdb/agent-memory's MemoryStore, reached via
+// PluginMemoryStore.recall. This module keeps only the formatters.
 // --- Format for Injection ---
@@ -89,6 +23,9 @@ export function formatForInjection(memories: EpisodicMemory[]): string {
     for (const d of m.summary.decisions) {
       sections.push(`  - Decision: ${d}`);
     }
+    for (const pat of m.summary.patterns) {
+      sections.push(`  - Pattern: ${pat}`);
+    }
     for (const p of m.summary.problemsSolved) {
       sections.push(`  - Solved: ${p.problem} → ${p.resolution}`);
     }
@@ -112,3 +49,69 @@ export function formatForInjection(memories: EpisodicMemory[]): string {
   return sections.join("\n");
 }
+// --- Format search_context result (reader contract) ---
+function detailLines(m: EpisodicMemory): string[] {
+  const lines: string[] = [];
+  for (const d of m.summary.decisions) lines.push(`    - Decision: ${d}`);
+  for (const pat of m.summary.patterns) lines.push(`    - Pattern: ${pat}`);
+  for (const p of m.summary.problemsSolved) {
+    lines.push(`    - Solved: ${p.problem} → ${p.resolution}`);
+  }
+  for (const t of m.summary.openThreads) lines.push(`    - Open: ${t}`);
+  return lines;
+}
+/**
+ * Format an escalating-recall result for the search_context tool. The output
+ * is self-instructing: on a miss it tells the model to be honest and not
+ * fabricate (mirroring the LongMemEval reader prompt); on a hit it tells the
+ * model to answer only from the excerpts. `topK` caps how many hits are shown.
+ */
+export function formatSearchResult(
+  query: string,
+  result: RecallResult,
+  topK: number,
+): string {
+  if (result.hits.length === 0) {
+    const searched =
+      result.scope === "all"
+        ? "this project AND all other projects"
+        : "this project";
+    // Cross-project was asked for but is disabled by config — don't offer a
+    // scope="all" retry the config would also refuse; say so plainly instead.
+    const offer = result.crossProjectBlocked
+      ? ` Cross-project search is disabled by configuration (BETTERDB_ALLOW_CROSS_PROJECT=false), so widening is not available.`
+      : result.scope === "project"
+        ? ` You may offer to search across ALL projects — call search_context again with scope="all".`
+        : "";
+    return [
+      `# Memory search: "${query}"`,
+      `Searched: ${searched}.`,
+      `NO memories cleared the relevance threshold.`,
+      `Tell the user you found nothing in memory about this. Do NOT fabricate an ` +
+        `answer, and do NOT substitute a codebase search as if it were recall.${offer}`,
+    ].join("\n");
+  }
+  const shown = result.hits.slice(0, topK);
+  const lines: string[] = [
+    `# Memory search: "${query}"`,
+    `Scope: ${result.scope} · confidence: ${result.confidence} · ${shown.length} match(es)`,
+    ``,
+  ];
+  shown.forEach((h, i) => {
+    const date = h.memory.timestamp.split("T")[0];
+    lines.push(
+      `[${i + 1}] (rel ${h.relevance.toFixed(2)}, ${date}) ${h.memory.summary.oneLineSummary}`,
+    );
+    lines.push(...detailLines(h.memory));
+  });
+  lines.push(``);
+  lines.push(
+    `Answer the user ONLY from these excerpts. If they do not contain the answer, ` +
+      `say so plainly — do not invent.`,
+  );
+  return lines.join("\n");
+}