opencode-diane 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/CHANGELOG.md +180 -0
  2. package/LICENSE +21 -0
  3. package/README.md +206 -0
  4. package/WIKI.md +1430 -0
  5. package/dist/index.d.ts +28 -0
  6. package/dist/index.js +1632 -0
  7. package/dist/ingest/adaptive.d.ts +47 -0
  8. package/dist/ingest/adaptive.js +182 -0
  9. package/dist/ingest/code-health.d.ts +58 -0
  10. package/dist/ingest/code-health.js +202 -0
  11. package/dist/ingest/code-map.d.ts +71 -0
  12. package/dist/ingest/code-map.js +670 -0
  13. package/dist/ingest/cross-refs.d.ts +59 -0
  14. package/dist/ingest/cross-refs.js +1207 -0
  15. package/dist/ingest/docs.d.ts +49 -0
  16. package/dist/ingest/docs.js +325 -0
  17. package/dist/ingest/git.d.ts +77 -0
  18. package/dist/ingest/git.js +390 -0
  19. package/dist/ingest/live-session.d.ts +101 -0
  20. package/dist/ingest/live-session.js +173 -0
  21. package/dist/ingest/project-notes.d.ts +28 -0
  22. package/dist/ingest/project-notes.js +102 -0
  23. package/dist/ingest/project.d.ts +35 -0
  24. package/dist/ingest/project.js +430 -0
  25. package/dist/ingest/session-snapshot.d.ts +63 -0
  26. package/dist/ingest/session-snapshot.js +94 -0
  27. package/dist/ingest/sessions.d.ts +29 -0
  28. package/dist/ingest/sessions.js +164 -0
  29. package/dist/ingest/tables.d.ts +52 -0
  30. package/dist/ingest/tables.js +360 -0
  31. package/dist/mining/skill-miner.d.ts +53 -0
  32. package/dist/mining/skill-miner.js +234 -0
  33. package/dist/search/bm25.d.ts +81 -0
  34. package/dist/search/bm25.js +334 -0
  35. package/dist/search/e5-embedder.d.ts +30 -0
  36. package/dist/search/e5-embedder.js +91 -0
  37. package/dist/search/embed-pass.d.ts +26 -0
  38. package/dist/search/embed-pass.js +43 -0
  39. package/dist/search/embedder.d.ts +58 -0
  40. package/dist/search/embedder.js +85 -0
  41. package/dist/search/inverted-index.d.ts +51 -0
  42. package/dist/search/inverted-index.js +139 -0
  43. package/dist/search/ppr.d.ts +44 -0
  44. package/dist/search/ppr.js +118 -0
  45. package/dist/search/tokenize.d.ts +26 -0
  46. package/dist/search/tokenize.js +98 -0
  47. package/dist/store/eviction.d.ts +16 -0
  48. package/dist/store/eviction.js +37 -0
  49. package/dist/store/repository.d.ts +222 -0
  50. package/dist/store/repository.js +420 -0
  51. package/dist/store/sqlite-store.d.ts +89 -0
  52. package/dist/store/sqlite-store.js +252 -0
  53. package/dist/store/vector-store.d.ts +66 -0
  54. package/dist/store/vector-store.js +160 -0
  55. package/dist/types.d.ts +385 -0
  56. package/dist/types.js +9 -0
  57. package/dist/utils/file-log.d.ts +87 -0
  58. package/dist/utils/file-log.js +215 -0
  59. package/dist/utils/peer-detection.d.ts +45 -0
  60. package/dist/utils/peer-detection.js +90 -0
  61. package/dist/utils/shell.d.ts +43 -0
  62. package/dist/utils/shell.js +110 -0
  63. package/dist/utils/usage-skill.d.ts +42 -0
  64. package/dist/utils/usage-skill.js +129 -0
  65. package/dist/utils/xlsx.d.ts +36 -0
  66. package/dist/utils/xlsx.js +270 -0
  67. package/grammars/tree-sitter-c.wasm +0 -0
  68. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  69. package/grammars/tree-sitter-cpp.wasm +0 -0
  70. package/grammars/tree-sitter-css.wasm +0 -0
  71. package/grammars/tree-sitter-go.wasm +0 -0
  72. package/grammars/tree-sitter-html.wasm +0 -0
  73. package/grammars/tree-sitter-java.wasm +0 -0
  74. package/grammars/tree-sitter-javascript.wasm +0 -0
  75. package/grammars/tree-sitter-json.wasm +0 -0
  76. package/grammars/tree-sitter-php.wasm +0 -0
  77. package/grammars/tree-sitter-python.wasm +0 -0
  78. package/grammars/tree-sitter-rust.wasm +0 -0
  79. package/grammars/tree-sitter-typescript.wasm +0 -0
  80. package/package.json +80 -0
@@ -0,0 +1,222 @@
1
+ /**
2
+ * Repository — the single source of truth for memory CRUD at runtime.
3
+ *
4
+ * Holds the in-memory working set, the inverted index, and a debounced
5
+ * write-behind layer over a SQLite store. All public operations keep
6
+ * the index in sync; the index is never accessed directly from
7
+ * outside. The public API is identical to the JSON-era repository —
8
+ * the storage swap is entirely behind this class.
9
+ *
10
+ * Storage model:
11
+ *
12
+ * - SQLite (`bun:sqlite`, see sqlite-store.ts) is the durable
13
+ * backing. It is written incrementally — only changed rows, in one
14
+ * transaction per flush — replacing the JSON store's whole-file
15
+ * rewrite. It is read exactly once, at `load()`.
16
+ *
17
+ * - `byId` is an in-memory cache that fully mirrors the store. It
18
+ * stays a full mirror because the inverted index needs every doc
19
+ * in memory anyway (IDF, avgdl, the co-change graph), so caching
20
+ * the Memory objects alongside costs little and keeps every read
21
+ * O(1) — recall never touches SQLite.
22
+ *
23
+ * - Writes are write-behind: `insert` / `upsertBySubject` /
24
+ * `removeMemory` / useCount bumps mutate the cache + index
25
+ * immediately and record the changed id in a pending buffer
26
+ * (`pendingDirty` / `pendingDeleted`). The debounced `flush`
27
+ * drains that buffer into SQLite in a single transaction. This
28
+ * batching is what makes SQLite a win rather than a wash — per-row
29
+ * transactions during ingestion would be slower than the JSON
30
+ * file; one transaction per debounce is far faster.
31
+ *
32
+ * Performance notes:
33
+ *
34
+ * - The cache is a `Map<id, Memory>` — insert / delete / lookup are
35
+ * all O(1). `removeMemory` and `applyEviction` work off the cache.
36
+ *
37
+ * - `insertIfMissing` consults `dedupKeys`, an in-memory Set of
38
+ * compact composite keys, for O(1) idempotency. (SQLite is not
39
+ * consulted: a not-yet-flushed insert is in the buffer, not the
40
+ * DB, so the in-memory view is the authoritative one.)
41
+ *
42
+ * - `totalBytes` is a running counter; `countsByCategory` reads the
43
+ * inverted index's `byCategory` map — both O(1) / O(categories).
44
+ *
45
+ * - BM25 search is O(query terms × candidates); eviction sorts once
46
+ * per *batch*.
47
+ */
48
+ import type { Memory, RecallHit, Category, ResolvedConfig } from "../types.js";
49
+ import { type SearchOptions } from "../search/bm25.js";
50
+ import type { VectorStore } from "./vector-store.js";
51
+ export declare class MemoryRepository {
52
+ /** In-memory cache — a full mirror of the SQLite store, insertion-ordered. */
53
+ private byId;
54
+ /** Store metadata (ingest timestamps, last eviction). */
55
+ private meta;
56
+ private index;
57
+ private root;
58
+ private sqlite;
59
+ private flushTimer;
60
+ /** O(1) idempotency: composite key → memory id (in-memory, mirrors the cache). */
61
+ private dedupKeys;
62
+ /** Running sum of `sizeBytes` over all memories (+ fixed overhead). */
63
+ private bytesTotal;
64
+ /** Ids changed (inserted/updated) since the last flush. */
65
+ private pendingDirty;
66
+ /** Ids deleted since the last flush. */
67
+ private pendingDeleted;
68
+ /** Whether `meta` changed since the last flush. */
69
+ private metaDirty;
70
+ /**
71
+ * Optional semantic-search index. Attached by the plugin only when
72
+ * `enableSemanticSearch` is on; `undefined` otherwise, in which case
73
+ * every recall takes the unchanged pure-lexical path.
74
+ */
75
+ private vectorStore?;
76
+ private constructor();
77
+ /**
78
+ * Open the store for a project root and build the repository.
79
+ * Stays async to preserve the historical API even though
80
+ * `bun:sqlite` is synchronous — callers `await` this.
81
+ *
82
+ * If the legacy JSON-to-SQLite migration fails (rare, but possible
83
+ * when another process is touching the database during startup),
84
+ * `onMigrationError` is invoked with the cause and the plugin
85
+ * continues with an empty fresh database — see SqliteStore.open
86
+ * and migrateFromJson for why "keep starting, log loudly" beats
87
+ * "fail to start".
88
+ */
89
+ static load(root: string, onMigrationError?: (e: unknown) => void): Promise<MemoryRepository>;
90
+ /** Total number of stored memories. O(1). */
91
+ size(): number;
92
+ /** Total disk-bytes estimate — O(1), maintained incrementally. */
93
+ totalBytes(): number;
94
+ /** Per-category counts — O(categories), read straight from the index. */
95
+ countsByCategory(): Map<Category, number>;
96
+ insert(opts: {
97
+ category: Category;
98
+ subject: string;
99
+ content: string;
100
+ tags?: string[];
101
+ source: string;
102
+ pinned?: boolean;
103
+ }): Memory;
104
+ /** Insert many in one shot; faster than insert-loop on large batches. */
105
+ insertMany(items: Array<{
106
+ category: Category;
107
+ subject: string;
108
+ content: string;
109
+ tags?: string[];
110
+ source: string;
111
+ pinned?: boolean;
112
+ }>): Memory[];
113
+ /**
114
+ * Insert one memory only if the (category, subject, content) tuple
115
+ * doesn't already exist. Returns the existing entry if so. This is
116
+ * how ingestion stays idempotent across plugin restarts.
117
+ *
118
+ * O(1) via the in-memory `dedupKeys` index. SQLite is intentionally
119
+ * not consulted — a not-yet-flushed insert lives in the write-behind
120
+ * buffer, so the in-memory view is the authoritative one.
121
+ */
122
+ insertIfMissing(opts: {
123
+ category: Category;
124
+ subject: string;
125
+ content: string;
126
+ tags?: string[];
127
+ source: string;
128
+ pinned?: boolean;
129
+ }): Memory;
130
+ /**
131
+ * Insert a memory, first removing any existing memories that share
132
+ * its (category, subject). For "live" single-valued facts — a
133
+ * file's current LSP diagnostics, a file's current signature map —
134
+ * re-reporting must REPLACE prior state, not accumulate stale
135
+ * copies.
136
+ *
137
+ * O(memories sharing that subject) via the inverted index.
138
+ */
139
+ upsertBySubject(opts: {
140
+ category: Category;
141
+ subject: string;
142
+ content: string;
143
+ tags?: string[];
144
+ source: string;
145
+ pinned?: boolean;
146
+ }): Memory;
147
+ /** Remove a single memory, keeping every index + counter + buffer consistent. O(1). */
148
+ private removeMemory;
149
+ /**
150
+ * Attach a semantic vector index. Once attached, a recall that
151
+ * carries a `queryVector` fuses vector similarity with the lexical
152
+ * ranking; a recall without one, or before this is called, is
153
+ * unaffected. Idempotent.
154
+ */
155
+ attachVectorStore(vs: VectorStore): void;
156
+ /**
157
+ * Rank candidates for a recall.
158
+ *
159
+ * With no vector store attached, or no `queryVector` supplied, this
160
+ * is exactly the historical lexical path — `search()` and nothing
161
+ * else. That keeps the default (semantic-search-off) configuration
162
+ * byte-for-byte unchanged.
163
+ *
164
+ * With both present, it fuses two rankings via reciprocal-rank
165
+ * fusion: the BM25 lexical ranking and a vector-similarity ranking.
166
+ * A larger candidate pool is drawn from each side so a hit that is
167
+ * strong in only one ranking can still surface, then the fused list
168
+ * is trimmed back to `limit`. Vector candidates are filtered to the
169
+ * same category/subject scope as the lexical query.
170
+ */
171
+ private rankCandidates;
172
+ /**
173
+ * Budget-aware recall. `search()` ranks (and count-limits via
174
+ * `opts.limit`); if `opts.tokenBudget` is set *and* a `format`
175
+ * function is supplied, the ranked hits are then packed to that
176
+ * token ceiling. `useCount`/`usedAt` are bumped only for the hits
177
+ * actually KEPT.
178
+ */
179
+ recallDetailed(opts: SearchOptions, format?: (h: RecallHit) => string): {
180
+ hits: RecallHit[];
181
+ omitted: number;
182
+ };
183
+ /** Count-limited recall (no token budgeting). Stable convenience API. */
184
+ recall(opts: SearchOptions): RecallHit[];
185
+ /**
186
+ * All memories, insertion-ordered. Materialised from the cache on
187
+ * each call — O(n), but only the infrequent readers (outline,
188
+ * mining, snapshot scan) use it; the frequent mutating paths stay
189
+ * O(1).
190
+ */
191
+ allMemories(): readonly Memory[];
192
+ setIngestedAt(category: Category, ts: number): void;
193
+ getIngestedAt(category: Category): number | undefined;
194
+ applyEviction(config: ResolvedConfig): {
195
+ removed: number;
196
+ };
197
+ /** Record `id` as changed; schedule a flush. */
198
+ private markDirty;
199
+ /** Record `id` as deleted; schedule a flush. */
200
+ private markDeleted;
201
+ /** Record that `meta` changed; schedule a flush. */
202
+ private markMetaDirty;
203
+ private scheduleFlush;
204
+ /**
205
+ * Drain the write-behind buffer into SQLite in one transaction.
206
+ * Stays async to preserve the historical API; the work itself is
207
+ * synchronous (`bun:sqlite`). The pending sets are cleared only
208
+ * after the transaction succeeds — if it throws, they are retained
209
+ * and retried.
210
+ */
211
+ flush(): Promise<void>;
212
+ /** Flush synchronously now — used by tools and tests. */
213
+ forceFlush(): Promise<void>;
214
+ /**
215
+ * Flush and close the underlying database handle. Not part of the
216
+ * historical API, but tests create many short-lived repositories and
217
+ * should release their handles; in the plugin's own lifecycle the
218
+ * repository lives for the whole session and the OS reclaims the
219
+ * handle at process exit.
220
+ */
221
+ close(): Promise<void>;
222
+ }
@@ -0,0 +1,420 @@
1
+ /**
2
+ * Repository — the single source of truth for memory CRUD at runtime.
3
+ *
4
+ * Holds the in-memory working set, the inverted index, and a debounced
5
+ * write-behind layer over a SQLite store. All public operations keep
6
+ * the index in sync; the index is never accessed directly from
7
+ * outside. The public API is identical to the JSON-era repository —
8
+ * the storage swap is entirely behind this class.
9
+ *
10
+ * Storage model:
11
+ *
12
+ * - SQLite (`bun:sqlite`, see sqlite-store.ts) is the durable
13
+ * backing. It is written incrementally — only changed rows, in one
14
+ * transaction per flush — replacing the JSON store's whole-file
15
+ * rewrite. It is read exactly once, at `load()`.
16
+ *
17
+ * - `byId` is an in-memory cache that fully mirrors the store. It
18
+ * stays a full mirror because the inverted index needs every doc
19
+ * in memory anyway (IDF, avgdl, the co-change graph), so caching
20
+ * the Memory objects alongside costs little and keeps every read
21
+ * O(1) — recall never touches SQLite.
22
+ *
23
+ * - Writes are write-behind: `insert` / `upsertBySubject` /
24
+ * `removeMemory` / useCount bumps mutate the cache + index
25
+ * immediately and record the changed id in a pending buffer
26
+ * (`pendingDirty` / `pendingDeleted`). The debounced `flush`
27
+ * drains that buffer into SQLite in a single transaction. This
28
+ * batching is what makes SQLite a win rather than a wash — per-row
29
+ * transactions during ingestion would be slower than the JSON
30
+ * file; one transaction per debounce is far faster.
31
+ *
32
+ * Performance notes:
33
+ *
34
+ * - The cache is a `Map<id, Memory>` — insert / delete / lookup are
35
+ * all O(1). `removeMemory` and `applyEviction` work off the cache.
36
+ *
37
+ * - `insertIfMissing` consults `dedupKeys`, an in-memory Set of
38
+ * compact composite keys, for O(1) idempotency. (SQLite is not
39
+ * consulted: a not-yet-flushed insert is in the buffer, not the
40
+ * DB, so the in-memory view is the authoritative one.)
41
+ *
42
+ * - `totalBytes` is a running counter; `countsByCategory` reads the
43
+ * inverted index's `byCategory` map — both O(1) / O(categories).
44
+ *
45
+ * - BM25 search is O(query terms × candidates); eviction sorts once
46
+ * per *batch*.
47
+ */
48
+ import { InvertedIndex } from "../search/inverted-index.js";
49
+ import { search, packToTokenBudget } from "../search/bm25.js";
50
+ import { reciprocalRankFusion } from "../search/embedder.js";
51
+ import { evictIfOverBudget } from "./eviction.js";
52
+ import { SqliteStore } from "./sqlite-store.js";
53
+ const PERSIST_DEBOUNCE_MS = 400;
54
+ const STORE_OVERHEAD_BYTES = 64;
55
+ let idCounter = 0;
56
+ function newId() {
57
+ // Random-ish + counter — collision-free within a process.
58
+ idCounter += 1;
59
+ return `mem_${Date.now().toString(36)}_${idCounter.toString(36)}`;
60
+ }
61
+ /**
62
+ * Fast, allocation-light string hash (djb2). Keeps dedup keys compact
63
+ * even when `content` is long.
64
+ */
65
+ function hash32(s) {
66
+ let h = 5381;
67
+ for (let i = 0; i < s.length; i++) {
68
+ h = ((h << 5) + h + s.charCodeAt(i)) | 0;
69
+ }
70
+ return (h >>> 0).toString(36);
71
+ }
72
+ /** Composite key used for O(1) idempotent-insert checks. */
73
+ function dedupKey(category, subject, content) {
74
+ return `${category}\u0000${subject}\u0000${hash32(content)}`;
75
+ }
76
+ export class MemoryRepository {
77
+ /** In-memory cache — a full mirror of the SQLite store, insertion-ordered. */
78
+ byId = new Map();
79
+ /** Store metadata (ingest timestamps, last eviction). */
80
+ meta;
81
+ index = new InvertedIndex();
82
+ root;
83
+ sqlite;
84
+ flushTimer = null;
85
+ /** O(1) idempotency: composite key → memory id (in-memory, mirrors the cache). */
86
+ dedupKeys = new Map();
87
+ /** Running sum of `sizeBytes` over all memories (+ fixed overhead). */
88
+ bytesTotal = STORE_OVERHEAD_BYTES;
89
+ // ── write-behind buffer ───────────────────────────────────────────
90
+ /** Ids changed (inserted/updated) since the last flush. */
91
+ pendingDirty = new Set();
92
+ /** Ids deleted since the last flush. */
93
+ pendingDeleted = new Set();
94
+ /** Whether `meta` changed since the last flush. */
95
+ metaDirty = false;
96
+ /**
97
+ * Optional semantic-search index. Attached by the plugin only when
98
+ * `enableSemanticSearch` is on; `undefined` otherwise, in which case
99
+ * every recall takes the unchanged pure-lexical path.
100
+ */
101
+ vectorStore;
102
+ constructor(root, sqlite, loaded) {
103
+ this.root = root;
104
+ this.sqlite = sqlite;
105
+ this.meta = loaded.meta;
106
+ for (const m of loaded.memories) {
107
+ this.byId.set(m.id, m);
108
+ this.dedupKeys.set(dedupKey(m.category, m.subject, m.content), m.id);
109
+ this.bytesTotal += m.sizeBytes;
110
+ }
111
+ this.index.rebuildFromAll(loaded.memories);
112
+ }
113
+ /**
114
+ * Open the store for a project root and build the repository.
115
+ * Stays async to preserve the historical API even though
116
+ * `bun:sqlite` is synchronous — callers `await` this.
117
+ *
118
+ * If the legacy JSON-to-SQLite migration fails (rare, but possible
119
+ * when another process is touching the database during startup),
120
+ * `onMigrationError` is invoked with the cause and the plugin
121
+ * continues with an empty fresh database — see SqliteStore.open
122
+ * and migrateFromJson for why "keep starting, log loudly" beats
123
+ * "fail to start".
124
+ */
125
+ static async load(root, onMigrationError) {
126
+ const { store, loaded } = SqliteStore.open(root, undefined, onMigrationError);
127
+ return new MemoryRepository(root, store, loaded);
128
+ }
129
+ /** Total number of stored memories. O(1). */
130
+ size() {
131
+ return this.byId.size;
132
+ }
133
+ /** Total disk-bytes estimate — O(1), maintained incrementally. */
134
+ totalBytes() {
135
+ return this.bytesTotal;
136
+ }
137
+ /** Per-category counts — O(categories), read straight from the index. */
138
+ countsByCategory() {
139
+ const m = new Map();
140
+ for (const [cat, ids] of this.index.byCategory) {
141
+ if (ids.size > 0)
142
+ m.set(cat, ids.size);
143
+ }
144
+ return m;
145
+ }
146
+ insert(opts) {
147
+ const now = Date.now();
148
+ const sizeBytes = Buffer.byteLength(opts.content + opts.subject + (opts.tags ?? []).join(","), "utf-8");
149
+ const mem = {
150
+ id: newId(),
151
+ category: opts.category,
152
+ subject: opts.subject,
153
+ content: opts.content,
154
+ tags: opts.tags ?? [],
155
+ source: opts.source,
156
+ createdAt: now,
157
+ usedAt: now,
158
+ useCount: 0,
159
+ sizeBytes,
160
+ pinned: opts.pinned,
161
+ };
162
+ this.byId.set(mem.id, mem);
163
+ this.dedupKeys.set(dedupKey(mem.category, mem.subject, mem.content), mem.id);
164
+ this.bytesTotal += sizeBytes;
165
+ this.index.add(mem);
166
+ this.markDirty(mem.id);
167
+ return mem;
168
+ }
169
+ /** Insert many in one shot; faster than insert-loop on large batches. */
170
+ insertMany(items) {
171
+ const result = [];
172
+ for (const item of items)
173
+ result.push(this.insert(item));
174
+ return result;
175
+ }
176
+ /**
177
+ * Insert one memory only if the (category, subject, content) tuple
178
+ * doesn't already exist. Returns the existing entry if so. This is
179
+ * how ingestion stays idempotent across plugin restarts.
180
+ *
181
+ * O(1) via the in-memory `dedupKeys` index. SQLite is intentionally
182
+ * not consulted — a not-yet-flushed insert lives in the write-behind
183
+ * buffer, so the in-memory view is the authoritative one.
184
+ */
185
+ insertIfMissing(opts) {
186
+ const key = dedupKey(opts.category, opts.subject, opts.content);
187
+ const existingId = this.dedupKeys.get(key);
188
+ if (existingId !== undefined) {
189
+ const existing = this.byId.get(existingId);
190
+ if (existing)
191
+ return existing;
192
+ // dedup index pointed at a since-removed entry — fall through.
193
+ }
194
+ return this.insert(opts);
195
+ }
196
+ /**
197
+ * Insert a memory, first removing any existing memories that share
198
+ * its (category, subject). For "live" single-valued facts — a
199
+ * file's current LSP diagnostics, a file's current signature map —
200
+ * re-reporting must REPLACE prior state, not accumulate stale
201
+ * copies.
202
+ *
203
+ * O(memories sharing that subject) via the inverted index.
204
+ */
205
+ upsertBySubject(opts) {
206
+ const ids = this.index.bySubject.get(opts.subject);
207
+ if (ids) {
208
+ for (const id of [...ids]) {
209
+ const mem = this.byId.get(id);
210
+ if (mem && mem.category === opts.category)
211
+ this.removeMemory(mem);
212
+ }
213
+ }
214
+ return this.insert(opts);
215
+ }
216
+ /** Remove a single memory, keeping every index + counter + buffer consistent. O(1). */
217
+ removeMemory(mem) {
218
+ this.byId.delete(mem.id);
219
+ this.dedupKeys.delete(dedupKey(mem.category, mem.subject, mem.content));
220
+ this.bytesTotal -= mem.sizeBytes;
221
+ this.index.remove(mem);
222
+ this.markDeleted(mem.id);
223
+ }
224
+ /**
225
+ * Attach a semantic vector index. Once attached, a recall that
226
+ * carries a `queryVector` fuses vector similarity with the lexical
227
+ * ranking; a recall without one, or before this is called, is
228
+ * unaffected. Idempotent.
229
+ */
230
+ attachVectorStore(vs) {
231
+ this.vectorStore = vs;
232
+ }
233
+ /**
234
+ * Rank candidates for a recall.
235
+ *
236
+ * With no vector store attached, or no `queryVector` supplied, this
237
+ * is exactly the historical lexical path — `search()` and nothing
238
+ * else. That keeps the default (semantic-search-off) configuration
239
+ * byte-for-byte unchanged.
240
+ *
241
+ * With both present, it fuses two rankings via reciprocal-rank
242
+ * fusion: the BM25 lexical ranking and a vector-similarity ranking.
243
+ * A larger candidate pool is drawn from each side so a hit that is
244
+ * strong in only one ranking can still surface, then the fused list
245
+ * is trimmed back to `limit`. Vector candidates are filtered to the
246
+ * same category/subject scope as the lexical query.
247
+ */
248
+ rankCandidates(opts) {
249
+ if (!this.vectorStore || !opts.queryVector) {
250
+ return search(this.index, this.byId, opts);
251
+ }
252
+ const limit = opts.limit ?? 25;
253
+ const pool = Math.max(limit, 50);
254
+ const lexical = search(this.index, this.byId, { ...opts, limit: pool });
255
+ const vector = this.vectorStore.search(opts.queryVector, pool).filter((r) => {
256
+ const m = this.byId.get(r.id);
257
+ if (!m)
258
+ return false;
259
+ if (opts.category && m.category !== opts.category)
260
+ return false;
261
+ if (opts.subject && m.subject !== opts.subject)
262
+ return false;
263
+ return true;
264
+ });
265
+ const fused = reciprocalRankFusion([
266
+ lexical.map((h) => h.memory.id),
267
+ vector.map((v) => v.id),
268
+ ]);
269
+ const hits = [];
270
+ for (const f of fused) {
271
+ const m = this.byId.get(f.id);
272
+ if (m)
273
+ hits.push({ memory: m, score: f.score });
274
+ if (hits.length >= limit)
275
+ break;
276
+ }
277
+ return hits;
278
+ }
279
+ /**
280
+ * Budget-aware recall. `search()` ranks (and count-limits via
281
+ * `opts.limit`); if `opts.tokenBudget` is set *and* a `format`
282
+ * function is supplied, the ranked hits are then packed to that
283
+ * token ceiling. `useCount`/`usedAt` are bumped only for the hits
284
+ * actually KEPT.
285
+ */
286
+ recallDetailed(opts, format) {
287
+ const ranked = this.rankCandidates(opts);
288
+ let kept = ranked;
289
+ let omitted = 0;
290
+ if (opts.tokenBudget && opts.tokenBudget > 0 && format) {
291
+ const packed = packToTokenBudget(ranked, opts.tokenBudget, format);
292
+ kept = packed.kept;
293
+ omitted = packed.omitted;
294
+ }
295
+ const now = Date.now();
296
+ for (const h of kept) {
297
+ // packToTokenBudget may hand back a shallow clone with trimmed
298
+ // content — always bump the REAL stored memory by id.
299
+ const real = this.byId.get(h.memory.id);
300
+ if (real) {
301
+ real.useCount += 1;
302
+ real.usedAt = now;
303
+ this.markDirty(real.id);
304
+ }
305
+ }
306
+ return { hits: kept, omitted };
307
+ }
308
+ /** Count-limited recall (no token budgeting). Stable convenience API. */
309
+ recall(opts) {
310
+ return this.recallDetailed(opts).hits;
311
+ }
312
+ /**
313
+ * All memories, insertion-ordered. Materialised from the cache on
314
+ * each call — O(n), but only the infrequent readers (outline,
315
+ * mining, snapshot scan) use it; the frequent mutating paths stay
316
+ * O(1).
317
+ */
318
+ allMemories() {
319
+ return [...this.byId.values()];
320
+ }
321
+ setIngestedAt(category, ts) {
322
+ this.meta.ingestedAt[category] = ts;
323
+ this.markMetaDirty();
324
+ }
325
+ getIngestedAt(category) {
326
+ return this.meta.ingestedAt[category];
327
+ }
328
+ applyEviction(config) {
329
+ const removed = evictIfOverBudget([...this.byId.values()], config.maxMemoryBytes, this.bytesTotal);
330
+ if (removed.length === 0)
331
+ return { removed: 0 };
332
+ for (const mem of removed) {
333
+ this.byId.delete(mem.id);
334
+ this.dedupKeys.delete(dedupKey(mem.category, mem.subject, mem.content));
335
+ this.bytesTotal -= mem.sizeBytes;
336
+ this.index.remove(mem);
337
+ this.markDeleted(mem.id);
338
+ }
339
+ this.meta.lastEvictionAt = Date.now();
340
+ this.markMetaDirty();
341
+ return { removed: removed.length };
342
+ }
343
+ // ── write-behind buffer bookkeeping ───────────────────────────────
344
+ /** Record `id` as changed; schedule a flush. */
345
+ markDirty(id) {
346
+ this.pendingDirty.add(id);
347
+ this.pendingDeleted.delete(id);
348
+ this.scheduleFlush();
349
+ }
350
+ /** Record `id` as deleted; schedule a flush. */
351
+ markDeleted(id) {
352
+ this.pendingDeleted.add(id);
353
+ this.pendingDirty.delete(id);
354
+ this.scheduleFlush();
355
+ }
356
+ /** Record that `meta` changed; schedule a flush. */
357
+ markMetaDirty() {
358
+ this.metaDirty = true;
359
+ this.scheduleFlush();
360
+ }
361
+ scheduleFlush() {
362
+ if (this.flushTimer)
363
+ return;
364
+ this.flushTimer = setTimeout(() => {
365
+ this.flushTimer = null;
366
+ // The debounced flush runs detached — a write failure (project
367
+ // dir removed mid-session, disk full, permissions) must not
368
+ // surface as an unhandled rejection that takes down the host.
369
+ // The pending buffers are only cleared on success, so a failed
370
+ // flush is retried on the next mutation.
371
+ void this.flush().catch(() => {
372
+ /* buffers retained → retried on next mutation */
373
+ });
374
+ }, PERSIST_DEBOUNCE_MS);
375
+ }
376
+ /**
377
+ * Drain the write-behind buffer into SQLite in one transaction.
378
+ * Stays async to preserve the historical API; the work itself is
379
+ * synchronous (`bun:sqlite`). The pending sets are cleared only
380
+ * after the transaction succeeds — if it throws, they are retained
381
+ * and retried.
382
+ */
383
+ async flush() {
384
+ if (this.pendingDirty.size === 0 && this.pendingDeleted.size === 0 && !this.metaDirty) {
385
+ return;
386
+ }
387
+ const dirty = [];
388
+ for (const id of this.pendingDirty) {
389
+ const mem = this.byId.get(id);
390
+ if (mem)
391
+ dirty.push(mem);
392
+ }
393
+ const deleted = [...this.pendingDeleted];
394
+ // If this throws, the sets below are NOT cleared — the next
395
+ // mutation reschedules and retries.
396
+ this.sqlite.flush(dirty, deleted, this.meta);
397
+ this.pendingDirty.clear();
398
+ this.pendingDeleted.clear();
399
+ this.metaDirty = false;
400
+ }
401
+ /** Flush synchronously now — used by tools and tests. */
402
+ async forceFlush() {
403
+ if (this.flushTimer) {
404
+ clearTimeout(this.flushTimer);
405
+ this.flushTimer = null;
406
+ }
407
+ await this.flush();
408
+ }
409
+ /**
410
+ * Flush and close the underlying database handle. Not part of the
411
+ * historical API, but tests create many short-lived repositories and
412
+ * should release their handles; in the plugin's own lifecycle the
413
+ * repository lives for the whole session and the OS reclaims the
414
+ * handle at process exit.
415
+ */
416
+ async close() {
417
+ await this.forceFlush();
418
+ this.sqlite.close();
419
+ }
420
+ }