@betterdb/memory 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -54,7 +54,7 @@ docker run -d --name betterdb-valkey -p 6379:6379 -v betterdb-valkey-data:/data
54
54
  ### MCP Tools
55
55
 
56
56
  Claude can use these mid-conversation:
57
- - `search_context` — Semantic search over past sessions
57
+ - `search_context` — Semantic search over past sessions. Escalates project+branch → project → cross-project, and takes an optional `tags` filter (`decision`, `pattern`, `problem`, `open-thread`)
58
58
  - `store_insight` — Save a decision, pattern, or warning
59
59
  - `list_open_threads` — Show unresolved items
60
60
  - `forget` — Delete a specific memory
@@ -63,9 +63,11 @@ Claude can use these mid-conversation:
63
63
 
64
64
  ```bash
65
65
  bunx @betterdb/memory install # Set up hooks + MCP server
66
- bunx @betterdb/memory status # Check health
66
+ bunx @betterdb/memory status # Check health + recall scoring config
67
67
  bunx @betterdb/memory uninstall # Remove everything
68
68
  bunx @betterdb/memory maintain # Run aging/compression manually
69
+ bunx @betterdb/memory forget # Bulk-delete by scope (dry run; --apply to delete)
70
+ # --project <name> | --all-projects --branch <b> --tags <a,b>
69
71
  bunx @betterdb/memory docker-valkey # Manage Docker Valkey container
70
72
  ```
71
73
 
@@ -80,23 +82,62 @@ Copy `.env.example` to `.env` and fill in your values before running `bunx @bett
80
82
  | `BETTERDB_VALKEY_URL` | `redis://localhost:6379` | Valkey connection URL |
81
83
  | `BETTERDB_VALKEY_INDEX_NAME` | `betterdb-memory-index` | Valkey search index name |
82
84
  | `BETTERDB_EMBED_DIM` | `1024` | Embedding dimensions |
83
- | `BETTERDB_MAX_CONTEXT_MEMORIES` | `5` | Memories injected per session |
85
+ | `BETTERDB_MAX_CONTEXT_MEMORIES` | `5` | Max memories injected per session (after gating) |
84
86
  | `BETTERDB_CONTEXT_FILE` | `.betterdb_context.md` | Context injection file |
85
87
  | `BETTERDB_ALLOW_REMOTE_FALLBACK` | `true` | Fall back to remote APIs if local models unavailable |
86
88
 
89
+ #### Recall Gating
90
+
91
+ Recall over-fetches a candidate pool, gates it by relevance, and escalates on a
92
+ miss (project+branch → project → cross-project). Memories are stored with their
93
+ git branch as a native thread scope and content-type tags, so recall can narrow
94
+ to the current branch first and filter by type. `search_context` returns nothing
95
+ only when nothing clears the bar — so a miss is honest, not a silent drop.
96
+
97
+ The gate is **relative**, not an absolute similarity threshold: embed models
98
+ compress cosine similarity into different, narrow bands (mxbai-embed-large packs
99
+ everything into ~0.7–0.88), so a fixed threshold doesn't transfer across models.
100
+ Instead, `floor` drops genuine noise, and hits within `margin` of the top match
101
+ are kept; confidence comes from the scale-independent top-vs-next gap.
102
+
103
+ | Variable | Default | Description |
104
+ |----------|---------|-------------|
105
+ | `BETTERDB_RECALL_FLOOR` | `0.5` | Similarity floor — drops noise and loosens the store's own distance gate |
106
+ | `BETTERDB_RECALL_MARGIN` | `0.05` | Keep hits within this similarity of the top match |
107
+ | `BETTERDB_RECALL_SEPARATION` | `0.04` | Top-vs-next gap above which a match is "high" confidence |
108
+ | `BETTERDB_RECALL_POOL_K` | `10` | Rung-1 over-fetch pool (project) |
109
+ | `BETTERDB_RECALL_POOL_K_WIDE` | `20` | Rung-2/3 over-fetch pool (wider / cross-project) |
110
+ | `BETTERDB_ALLOW_CROSS_PROJECT` | `true` | Allow escalation / `scope="all"` to search across projects |
111
+
112
+ Ranking within the gated pool uses a composite score (similarity + recency +
113
+ importance), owned by `@betterdb/agent-memory`. Recency is the system's single
114
+ time-decay — a half-life applied at query time, not a stored per-memory aging
115
+ pass. These knobs tune it; defaults match the store's.
116
+
117
+ | Variable | Default | Description |
118
+ |----------|---------|-------------|
119
+ | `BETTERDB_RECALL_HALF_LIFE_DAYS` | `7` | Age at which a memory's recency term halves |
120
+ | `BETTERDB_RECALL_WEIGHT_SIMILARITY` | `0.6` | Weight of semantic similarity in the composite score |
121
+ | `BETTERDB_RECALL_WEIGHT_RECENCY` | `0.25` | Weight of recency |
122
+ | `BETTERDB_RECALL_WEIGHT_IMPORTANCE` | `0.15` | Weight of stored importance |
123
+
87
124
  #### Model Providers
88
125
 
89
126
  | Variable | Default | Description |
90
127
  |----------|---------|-------------|
91
- | `BETTERDB_EMBED_PROVIDER` | auto-detect | Force embed provider: `ollama`, `voyage`, `openai`, `groq`, `together` |
128
+ | `BETTERDB_EMBED_PROVIDER` | auto-detect | Force embed provider: `local`, `ollama`, `voyage`, `openai`, `groq`, `together` |
92
129
  | `BETTERDB_SUMMARIZE_PROVIDER` | auto-detect | Force summarize provider: `ollama`, `anthropic`, `openai`, `groq`, `together` |
93
130
  | `BETTERDB_EMBED_MODEL` | `mxbai-embed-large` | Ollama embedding model name |
94
131
  | `BETTERDB_SUMMARIZE_MODEL` | `mistral:7b` | Ollama summarization model name |
95
132
  | `BETTERDB_OLLAMA_URL` | `http://localhost:11434` | Ollama API URL |
96
133
 
134
+ #### Embeddings work with zero config
135
+
136
+ If no embedding provider is detected (no Ollama models, no API keys), BetterDB falls back to **on-device embeddings** via `@xenova/transformers` (`all-MiniLM-L6-v2`, 384-dim, Apache-2.0). No API key, no running service — the model weights download once on first use and are cached thereafter. Auto-detected providers (Ollama, then API keys) take priority when available.
137
+
97
138
  #### API Keys
98
139
 
99
- At least one embedding provider and one summarization provider must be available. Ollama is free and local; the others require API keys.
140
+ Embeddings always work (on-device fallback above). A summarization provider is still required Ollama is free and local; the others require API keys.
100
141
 
101
142
  | Variable | Provider | Used for |
102
143
  |----------|----------|----------|
@@ -110,7 +151,6 @@ At least one embedding provider and one summarization provider must be available
110
151
 
111
152
  | Variable | Default | Description |
112
153
  |----------|---------|-------------|
113
- | `BETTERDB_DECAY_RATE` | `0.95` | Memory importance decay per day |
114
154
  | `BETTERDB_COMPRESS_THRESHOLD` | `0.3` | Importance threshold for compression |
115
155
  | `BETTERDB_DISTILL_MIN_SESSIONS` | `5` | Min sessions before knowledge distillation |
116
156
  | `BETTERDB_AGING_INTERVAL_HOURS` | `6` | Hours between automatic aging runs |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@betterdb/memory",
3
- "version": "0.2.0",
3
+ "version": "0.4.0",
4
4
  "description": "BetterDB Memory for Claude Code — Valkey-powered persistent memory across sessions",
5
5
  "license": "MIT",
6
6
  "author": "BetterDB Inc. <hello@betterdb.com>",
@@ -46,9 +46,11 @@
46
46
  "typecheck": "tsc --noEmit"
47
47
  },
48
48
  "dependencies": {
49
+ "@betterdb/agent-memory": "^0.2.1",
49
50
  "iovalkey": "^0.2.1",
50
51
  "ollama": "^0.5.14",
51
52
  "@modelcontextprotocol/sdk": "^1.12.1",
53
+ "@xenova/transformers": "^2.17.2",
52
54
  "zod": "^3.24.4",
53
55
  "zod-to-json-schema": "^3.24.5",
54
56
  "@anthropic-ai/sdk": "latest"
@@ -7,16 +7,19 @@
7
7
  * bun run scripts/aging-worker.ts
8
8
  */
9
9
  import { getValkeyClient } from "../src/client/valkey.js";
10
+ import { getPluginMemoryStore } from "../src/client/memory-store.js";
10
11
  import { createModelClient } from "../src/client/model.js";
11
12
  import { AgingPipeline } from "../src/memory/aging.js";
12
13
 
13
14
  try {
14
15
  const valkeyClient = await getValkeyClient();
15
16
  const modelClient = await createModelClient();
17
+ const store = await getPluginMemoryStore((t) => modelClient.embed(t));
16
18
 
17
- const pipeline = new AgingPipeline(valkeyClient, modelClient);
19
+ const pipeline = new AgingPipeline(valkeyClient, store, modelClient);
18
20
  await pipeline.runFullPipeline();
19
21
 
22
+ await store.close();
20
23
  await valkeyClient.quit();
21
24
  } catch (err) {
22
25
  console.error("[betterdb] Aging worker failed:", err);
@@ -1,14 +1,21 @@
1
1
  #!/usr/bin/env bun
2
2
  import { getValkeyClient } from "../src/client/valkey.js";
3
+ import { getPluginMemoryStore } from "../src/client/memory-store.js";
3
4
  import { createModelClient } from "../src/client/model.js";
4
- import { config } from "../src/config.js";
5
5
 
6
6
  const client = await getValkeyClient();
7
7
  const modelClient = await createModelClient();
8
8
 
9
- await client.ensureIndex(modelClient.embedDim, modelClient.preset.embedModel);
10
- console.log("Index ready:", config.valkey.indexName);
9
+ // Create the episodic vector index that MemoryStore reads/writes
10
+ // (betterdb:mem:idx) — the same one `install` builds. Record the active
11
+ // provider/dimension first so a later provider swap is caught.
12
+ await client.assertEmbedDim(modelClient.embedDim, modelClient.preset.embedModel);
13
+ const store = await getPluginMemoryStore((t) => modelClient.embed(t));
14
+ await store.ensureIndex();
15
+
16
+ console.log("Index ready: betterdb:mem:idx");
11
17
  console.log("Embedding dimension:", modelClient.embedDim);
12
18
  console.log("Preset:", modelClient.preset.embedModel, "/", modelClient.preset.summarizeModel);
13
19
 
20
+ await store.close();
14
21
  await client.quit();
@@ -0,0 +1,406 @@
1
+ import {
2
+ MemoryStore,
3
+ similarityFromDistance,
4
+ type ConsolidateOptions,
5
+ type ConsolidateResult,
6
+ type EmbedFn,
7
+ type MemoryItem,
8
+ type MemoryScope,
9
+ type MemoryStats,
10
+ type MemoryStoreClient,
11
+ } from "@betterdb/agent-memory";
12
+ import {
13
+ EpisodicMemorySchema,
14
+ type EpisodicMemory,
15
+ } from "../memory/schema.js";
16
+ import { getValkeyClient } from "./valkey.js";
17
+ import { config } from "../config.js";
18
+
19
+ const SECONDS_PER_DAY = 86400;
20
+
21
+ // Store name fixes the index (`betterdb:mem:idx`) and key prefix
22
+ // (`betterdb:mem:{id}`) that @betterdb/agent-memory derives internally.
23
+ const STORE_NAME = "betterdb";
24
+
25
+ // --- EpisodicMemory <-> MemoryItem mapping ---
26
+ //
27
+ // agent-memory's MemoryItem is flat (content + importance + tags + scope),
28
+ // while the plugin's EpisodicMemory carries a structured `summary` plus
29
+ // `branch` and an original `timestamp`. We embed `summary.oneLineSummary`
30
+ // (so recall quality matches the current implementation, which embeds the
31
+ // same string) and stash everything MemoryItem can't hold natively in the
32
+ // free-form `source` field. The remaining fields map directly:
33
+ // project -> namespace
34
+ // importanceScore -> importance
35
+ // accessCount -> accessCount (tracked natively, bumped on recall)
36
+ // lastAccessed -> lastAccessedAt (tracked natively)
37
+
38
+ interface SourcePayload {
39
+ summary: EpisodicMemory["summary"];
40
+ branch: string;
41
+ timestamp: string;
42
+ }
43
+
44
+ /** A recalled memory carrying its relevance and composite score for gating. */
45
+ export interface ScoredMemory {
46
+ memory: EpisodicMemory;
47
+ /** Cosine similarity to the query, 0..1 (higher = more relevant). */
48
+ relevance: number;
49
+ /** Composite recall score (similarity + recency + importance). */
50
+ score: number;
51
+ }
52
+
53
+ export function episodicToSource(memory: EpisodicMemory): string {
54
+ const payload: SourcePayload = {
55
+ summary: memory.summary,
56
+ branch: memory.branch,
57
+ timestamp: memory.timestamp,
58
+ };
59
+ return JSON.stringify(payload);
60
+ }
61
+
62
+ /**
63
+ * Content-type tags for a memory, derived from which summary sections it fills.
64
+ * Stored natively (not in the opaque `source` blob) so recall can filter on
65
+ * them — e.g. surface only decisions, or only unresolved open threads.
66
+ */
67
+ export function memoryTags(memory: EpisodicMemory): string[] {
68
+ const tags: string[] = [];
69
+ if (memory.summary.decisions.length > 0) tags.push("decision");
70
+ if (memory.summary.patterns.length > 0) tags.push("pattern");
71
+ if (memory.summary.problemsSolved.length > 0) tags.push("problem");
72
+ if (memory.summary.openThreads.length > 0) tags.push("open-thread");
73
+ return tags;
74
+ }
75
+
76
+ /**
77
+ * The text embedded for a memory. Previously only `oneLineSummary` was
78
+ * embedded, so recall could never see the structured detail (decisions,
79
+ * patterns, problems, open threads) — the single biggest recall-quality limit.
80
+ * We fold those into the vector here. `filesChanged` is deliberately omitted:
81
+ * bare file paths are generic and dominate the similarity band with noise.
82
+ */
83
+ export function buildEmbedText(memory: EpisodicMemory): string {
84
+ const s = memory.summary;
85
+ const parts: string[] = [s.oneLineSummary];
86
+ if (s.decisions.length > 0) parts.push(`Decisions: ${s.decisions.join("; ")}`);
87
+ if (s.patterns.length > 0) parts.push(`Patterns: ${s.patterns.join("; ")}`);
88
+ if (s.problemsSolved.length > 0) {
89
+ const solved = s.problemsSolved
90
+ .map((p) => `${p.problem} → ${p.resolution}`)
91
+ .join("; ");
92
+ parts.push(`Problems solved: ${solved}`);
93
+ }
94
+ if (s.openThreads.length > 0) {
95
+ parts.push(`Open threads: ${s.openThreads.join("; ")}`);
96
+ }
97
+ return parts.join("\n");
98
+ }
99
+
100
+ export function itemToEpisodic(item: MemoryItem): EpisodicMemory | null {
101
+ let summary: EpisodicMemory["summary"];
102
+ let branch: string;
103
+ let timestamp: string;
104
+
105
+ const payload = parseSourcePayload(item.source);
106
+ if (payload) {
107
+ summary = payload.summary;
108
+ branch = payload.branch;
109
+ timestamp = payload.timestamp;
110
+ } else {
111
+ // A flat item with no SourcePayload — e.g. a memory produced by
112
+ // MemoryStore.consolidate(), whose `source` is its own marker, not our
113
+ // JSON. Synthesize a minimal episodic memory from the content so merged
114
+ // summaries stay first-class for recall, listing, and injection.
115
+ summary = {
116
+ decisions: [],
117
+ patterns: [],
118
+ problemsSolved: [],
119
+ openThreads: [],
120
+ filesChanged: [],
121
+ oneLineSummary: item.content,
122
+ };
123
+ branch = "consolidated";
124
+ timestamp = new Date(item.createdAt).toISOString();
125
+ }
126
+
127
+ const parsed = EpisodicMemorySchema.safeParse({
128
+ memoryId: item.id,
129
+ project: item.namespace ?? "unknown",
130
+ branch,
131
+ timestamp,
132
+ summary,
133
+ importanceScore: item.importance,
134
+ accessCount: item.accessCount,
135
+ lastAccessed: new Date(item.lastAccessedAt).toISOString(),
136
+ });
137
+
138
+ return parsed.success ? parsed.data : null;
139
+ }
140
+
141
+ function parseSourcePayload(source: string | undefined): SourcePayload | null {
142
+ if (!source) return null;
143
+ try {
144
+ const parsed = JSON.parse(source) as Partial<SourcePayload>;
145
+ if (parsed && typeof parsed === "object" && parsed.summary) {
146
+ return parsed as SourcePayload;
147
+ }
148
+ return null;
149
+ } catch {
150
+ return null;
151
+ }
152
+ }
153
+
154
+ // --- Adapter ---
155
+ //
156
+ // Drop-in replacement for the episodic-vector subset of ValkeyClient, backed
157
+ // by @betterdb/agent-memory's MemoryStore. Knowledge entries and work queues
158
+ // stay on the existing ValkeyClient — they have no MemoryStore analog.
159
+ export class PluginMemoryStore {
160
+ private readonly store: MemoryStore;
161
+
162
+ constructor(client: MemoryStoreClient, embed?: EmbedFn) {
163
+ this.store = new MemoryStore({
164
+ client,
165
+ name: STORE_NAME,
166
+ embedFn: embed,
167
+ // Composite-score decay/blend from plugin config. This is the single
168
+ // time-decay in the system (recency, applied at query time) — there is
169
+ // no separate importance-aging pass. configRefresh:false keeps these
170
+ // values fixed rather than letting a Valkey config key override them.
171
+ halfLifeSeconds: config.recall.halfLifeDays * SECONDS_PER_DAY,
172
+ weights: {
173
+ similarity: config.recall.weightSimilarity,
174
+ recency: config.recall.weightRecency,
175
+ importance: config.recall.weightImportance,
176
+ },
177
+ // The plugin owns its own analytics/discovery story; keep the store quiet
178
+ // and offline so it pulls in no posthog/otel network behavior.
179
+ discovery: false,
180
+ configRefresh: false,
181
+ analytics: { disabled: true },
182
+ });
183
+ }
184
+
185
+ /** Create the `betterdb:mem:idx` vector index if absent (idempotent). */
186
+ ensureIndex(): Promise<void> {
187
+ return this.store.ensureIndex();
188
+ }
189
+
190
+ /**
191
+ * Store an episodic memory and return its generated id. The vector is derived
192
+ * from {@link buildEmbedText} (summary + structured detail) inside
193
+ * MemoryStore — callers no longer precompute an embedding. The full episodic
194
+ * memory is preserved in `source` for reconstruction; the embed text only
195
+ * shapes the vector.
196
+ */
197
+ storeMemory(memory: EpisodicMemory): Promise<string> {
198
+ return this.store.remember(buildEmbedText(memory), {
199
+ importance: memory.importanceScore,
200
+ namespace: memory.project,
201
+ // Branch as the native thread scope; content-type tags for filtered
202
+ // recall. Both are queryable, unlike the free-form `source` payload.
203
+ threadId: memory.branch,
204
+ tags: memoryTags(memory),
205
+ source: episodicToSource(memory),
206
+ });
207
+ }
208
+
209
+ /**
210
+ * KNN recall ranked by MemoryStore's composite score. Unlike the raw store,
211
+ * this returns each memory *with* its relevance so callers can gate on it —
212
+ * `relevance` is cosine similarity (0..1, higher = closer) derived from the
213
+ * hit's raw distance; `score` is the composite (similarity + recency +
214
+ * importance). Omit `project` to search across all namespaces; pass `branch`
215
+ * to scope to a git branch (native thread) and `tags` to filter by
216
+ * content type.
217
+ */
218
+ async recall(
219
+ query: string,
220
+ opts: {
221
+ project?: string;
222
+ branch?: string;
223
+ tags?: string[];
224
+ k: number;
225
+ threshold?: number;
226
+ reinforce?: boolean;
227
+ },
228
+ ): Promise<ScoredMemory[]> {
229
+ const hits = await this.store.recall(query, {
230
+ ...(opts.project !== undefined ? { namespace: opts.project } : {}),
231
+ ...(opts.branch !== undefined ? { threadId: opts.branch } : {}),
232
+ ...(opts.tags !== undefined && opts.tags.length > 0
233
+ ? { tags: opts.tags }
234
+ : {}),
235
+ k: opts.k,
236
+ ...(opts.threshold !== undefined ? { threshold: opts.threshold } : {}),
237
+ reinforce: opts.reinforce ?? true,
238
+ });
239
+ const out: ScoredMemory[] = [];
240
+ for (const hit of hits) {
241
+ const memory = itemToEpisodic(hit.item);
242
+ if (memory) {
243
+ out.push({
244
+ memory,
245
+ score: hit.score,
246
+ relevance: similarityFromDistance(hit.similarity),
247
+ });
248
+ }
249
+ }
250
+ return out;
251
+ }
252
+
253
+ /** KNN recall from a precomputed embedding (see {@link recall}). */
254
+ async searchMemories(
255
+ embedding: number[],
256
+ project: string,
257
+ topK: number,
258
+ ): Promise<EpisodicMemory[]> {
259
+ const hits = await this.store.recallByVector(embedding, {
260
+ namespace: project,
261
+ k: topK,
262
+ });
263
+ return hits
264
+ .map((hit) => itemToEpisodic(hit.item))
265
+ .filter((m): m is EpisodicMemory => m !== null);
266
+ }
267
+
268
+ /**
269
+ * List stored memories, optionally scoped to `project` and filtered by a
270
+ * minimum importance. Paginates through MemoryStore.list so callers that
271
+ * scan all memories (open-thread aggregation, distillation) get the full set.
272
+ * Pass `max` to stop early once that many matches are collected, so callers
273
+ * that only need a bounded slice don't materialize the whole store.
274
+ */
275
+ async listMemories(
276
+ project?: string,
277
+ minImportance?: number,
278
+ max?: number,
279
+ ): Promise<EpisodicMemory[]> {
280
+ const out: EpisodicMemory[] = [];
281
+ const limit = 100;
282
+ let offset = 0;
283
+
284
+ for (;;) {
285
+ const { items, total } = await this.store.list({
286
+ namespace: project,
287
+ limit,
288
+ offset,
289
+ });
290
+ if (items.length === 0) break;
291
+
292
+ for (const item of items) {
293
+ const memory = itemToEpisodic(item);
294
+ if (!memory) continue;
295
+ if (minImportance !== undefined && memory.importanceScore < minImportance) {
296
+ continue;
297
+ }
298
+ out.push(memory);
299
+ if (max !== undefined && out.length >= max) return out;
300
+ }
301
+
302
+ offset += items.length;
303
+ if (offset >= total) break;
304
+ }
305
+
306
+ return out;
307
+ }
308
+
309
+ /**
310
+ * List memories matching a scope (project namespace, branch thread, and/or
311
+ * content-type tags) using the SAME native index filter as
312
+ * {@link forgetByScope} — so a `listByScope` preview is exactly the set a
313
+ * `forgetByScope` with the same scope would delete. Unlike {@link listMemories}
314
+ * (which filters summary-derived tags in memory), this queries native tags,
315
+ * so memories stored before native tagging are matched identically by both.
316
+ */
317
+ async listByScope(scope: {
318
+ project?: string;
319
+ branch?: string;
320
+ tags?: string[];
321
+ }): Promise<EpisodicMemory[]> {
322
+ const out: EpisodicMemory[] = [];
323
+ const limit = 100;
324
+ let offset = 0;
325
+
326
+ for (;;) {
327
+ const { items, total } = await this.store.list({
328
+ ...(scope.project !== undefined ? { namespace: scope.project } : {}),
329
+ ...(scope.branch !== undefined ? { threadId: scope.branch } : {}),
330
+ ...(scope.tags !== undefined && scope.tags.length > 0
331
+ ? { tags: scope.tags }
332
+ : {}),
333
+ limit,
334
+ offset,
335
+ });
336
+ if (items.length === 0) break;
337
+
338
+ for (const item of items) {
339
+ const memory = itemToEpisodic(item);
340
+ if (memory) out.push(memory);
341
+ }
342
+
343
+ offset += items.length;
344
+ if (offset >= total) break;
345
+ }
346
+
347
+ return out;
348
+ }
349
+
350
+ /**
351
+ * Merge a selection of memories into one summary memory (and delete the
352
+ * sources). Selection criteria — scope, age, or max importance — are passed
353
+ * through to MemoryStore.consolidate.
354
+ */
355
+ consolidate(options: ConsolidateOptions): Promise<ConsolidateResult> {
356
+ return this.store.consolidate(options);
357
+ }
358
+
359
+ async getMemory(memoryId: string): Promise<EpisodicMemory | null> {
360
+ const item = await this.store.get(memoryId);
361
+ return item ? itemToEpisodic(item) : null;
362
+ }
363
+
364
+ async deleteMemory(memoryId: string): Promise<void> {
365
+ await this.store.forget(memoryId);
366
+ }
367
+
368
+ /**
369
+ * Bulk-delete every memory matching a scope (project namespace, branch
370
+ * thread, and/or tags). Returns the number deleted. At least one scope field
371
+ * should be set — an empty scope would match the whole store.
372
+ */
373
+ forgetByScope(scope: {
374
+ project?: string;
375
+ branch?: string;
376
+ tags?: string[];
377
+ }): Promise<number> {
378
+ const s: MemoryScope & { tags?: string[] } = {};
379
+ if (scope.project !== undefined) s.namespace = scope.project;
380
+ if (scope.branch !== undefined) s.threadId = scope.branch;
381
+ if (scope.tags !== undefined && scope.tags.length > 0) s.tags = scope.tags;
382
+ return this.store.forgetByScope(s);
383
+ }
384
+
385
+ /** Live store stats: item count, evictions, and active composite config. */
386
+ stats(): Promise<MemoryStats> {
387
+ return this.store.stats();
388
+ }
389
+
390
+ close(): Promise<void> {
391
+ return this.store.close();
392
+ }
393
+ }
394
+
395
+ /**
396
+ * Shared accessor for the episodic-vector store. Reuses the singleton
397
+ * ValkeyClient's connection (its `.call()` satisfies MemoryStoreClient) so the
398
+ * whole plugin runs on one iovalkey socket. Pass `embed` when the caller will
399
+ * remember/recall/ensureIndex; read-only callers (list/get/delete) may omit it.
400
+ */
401
+ export async function getPluginMemoryStore(
402
+ embed?: EmbedFn,
403
+ ): Promise<PluginMemoryStore> {
404
+ const valkey = await getValkeyClient();
405
+ return new PluginMemoryStore(valkey.redis, embed);
406
+ }
@@ -74,6 +74,7 @@ export { AnthropicSummarizeClient } from "./providers/anthropic.js";
74
74
  export { VoyageEmbedClient } from "./providers/voyage.js";
75
75
  export { GroqEmbedClient, GroqSummarizeClient } from "./providers/groq.js";
76
76
  export { TogetherEmbedClient, TogetherSummarizeClient } from "./providers/together.js";
77
+ export { LocalEmbedClient } from "./providers/local.js";
77
78
  export { buildSummarizePrompt } from "./providers/_prompt.js";
78
79
 
79
80
  // --- Provider Detection ---
@@ -146,15 +147,10 @@ async function resolveEmbedProvider(
146
147
  return new TogetherEmbedClient(p.togetherKey);
147
148
  }
148
149
 
149
- throw new Error(
150
- `No embedding provider available. Options:\n` +
151
- ` 1. Install Ollama and run: ollama pull mxbai-embed-large\n` +
152
- ` 2. Set VOYAGE_API_KEY for Voyage AI (voyage-3, dim=1024)\n` +
153
- ` 3. Set OPENAI_API_KEY for OpenAI (text-embedding-3-small, dim=1536)\n` +
154
- ` 4. Set GROQ_API_KEY for Groq (nomic-embed-text-v1_5, dim=768)\n` +
155
- ` 5. Set TOGETHER_API_KEY for Together AI (m2-bert-80M-8k-retrieval, dim=768)\n\n` +
156
- `Note: ANTHROPIC_API_KEY does not provide embeddings — pair it with another embed provider.`,
157
- );
150
+ // On-device fallback: zero-config, no API key, no service. Ensures a fresh
151
+ // install produces embeddings even with nothing else installed.
152
+ const { LocalEmbedClient } = await import("./providers/local.js");
153
+ return new LocalEmbedClient();
158
154
  }
159
155
 
160
156
  async function resolveSummarizeProvider(
@@ -217,6 +213,10 @@ function createExplicitEmbedProvider(
217
213
  p: typeof config.providers,
218
214
  ): ModelClient {
219
215
  switch (name) {
216
+ case "local": {
217
+ const { LocalEmbedClient } = require("./providers/local.js");
218
+ return new LocalEmbedClient();
219
+ }
220
220
  case "ollama": {
221
221
  const { OllamaModelClient } = require("./providers/ollama.js");
222
222
  return new OllamaModelClient(PRESET_CLEAN, config.ollama.url);
@@ -242,7 +242,7 @@ function createExplicitEmbedProvider(
242
242
  return new TogetherEmbedClient(p.togetherKey);
243
243
  }
244
244
  default:
245
- throw new Error(`Unknown embed provider: ${name}. Valid: ollama, openai, voyage, groq, together`);
245
+ throw new Error(`Unknown embed provider: ${name}. Valid: local, ollama, openai, voyage, groq, together`);
246
246
  }
247
247
  }
248
248
 
@@ -0,0 +1,58 @@
1
+ import type { SessionSummary } from "../../memory/schema.js";
2
+ import type { ModelClient, ModelPreset } from "../model.js";
3
+
4
+ // On-device embeddings via @xenova/transformers — no API key, no running
5
+ // service. Weights (all-MiniLM-L6-v2, Apache-2.0, 384-dim) download once on
6
+ // first use and are cached under the transformers cache dir thereafter.
7
+
8
+ const MODEL_ID = "Xenova/all-MiniLM-L6-v2";
9
+ const EMBED_DIM = 384;
10
+
11
+ type FeatureExtractor = (
12
+ text: string,
13
+ options: { pooling: "mean"; normalize: boolean },
14
+ ) => Promise<{ data: Float32Array }>;
15
+
16
+ interface TransformersModule {
17
+ pipeline(
18
+ task: "feature-extraction",
19
+ model: string,
20
+ ): Promise<FeatureExtractor>;
21
+ }
22
+
23
+ // Lazy singleton: the model loads once and is reused across embed calls, and
24
+ // @xenova/transformers is only imported when local embeddings are actually used.
25
+ let extractorPromise: Promise<FeatureExtractor> | null = null;
26
+
27
+ function getExtractor(): Promise<FeatureExtractor> {
28
+ if (!extractorPromise) {
29
+ extractorPromise = import("@xenova/transformers").then((mod) =>
30
+ (mod as unknown as TransformersModule).pipeline(
31
+ "feature-extraction",
32
+ MODEL_ID,
33
+ ),
34
+ );
35
+ }
36
+ return extractorPromise;
37
+ }
38
+
39
+ export class LocalEmbedClient implements ModelClient {
40
+ readonly embedDim = EMBED_DIM;
41
+ readonly preset: ModelPreset = {
42
+ embedModel: MODEL_ID,
43
+ summarizeModel: "n/a",
44
+ embedDim: EMBED_DIM,
45
+ };
46
+
47
+ async embed(text: string): Promise<number[]> {
48
+ const extract = await getExtractor();
49
+ const output = await extract(text, { pooling: "mean", normalize: true });
50
+ return Array.from(output.data);
51
+ }
52
+
53
+ async summarize(_transcript: string): Promise<SessionSummary> {
54
+ throw new Error(
55
+ "Local embeddings provider does not summarize — configure a summarize provider (Ollama, Anthropic, OpenAI, Groq, or Together)",
56
+ );
57
+ }
58
+ }