opencode-diane 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +180 -0
- package/LICENSE +21 -0
- package/README.md +206 -0
- package/WIKI.md +1430 -0
- package/dist/index.d.ts +28 -0
- package/dist/index.js +1632 -0
- package/dist/ingest/adaptive.d.ts +47 -0
- package/dist/ingest/adaptive.js +182 -0
- package/dist/ingest/code-health.d.ts +58 -0
- package/dist/ingest/code-health.js +202 -0
- package/dist/ingest/code-map.d.ts +71 -0
- package/dist/ingest/code-map.js +670 -0
- package/dist/ingest/cross-refs.d.ts +59 -0
- package/dist/ingest/cross-refs.js +1207 -0
- package/dist/ingest/docs.d.ts +49 -0
- package/dist/ingest/docs.js +325 -0
- package/dist/ingest/git.d.ts +77 -0
- package/dist/ingest/git.js +390 -0
- package/dist/ingest/live-session.d.ts +101 -0
- package/dist/ingest/live-session.js +173 -0
- package/dist/ingest/project-notes.d.ts +28 -0
- package/dist/ingest/project-notes.js +102 -0
- package/dist/ingest/project.d.ts +35 -0
- package/dist/ingest/project.js +430 -0
- package/dist/ingest/session-snapshot.d.ts +63 -0
- package/dist/ingest/session-snapshot.js +94 -0
- package/dist/ingest/sessions.d.ts +29 -0
- package/dist/ingest/sessions.js +164 -0
- package/dist/ingest/tables.d.ts +52 -0
- package/dist/ingest/tables.js +360 -0
- package/dist/mining/skill-miner.d.ts +53 -0
- package/dist/mining/skill-miner.js +234 -0
- package/dist/search/bm25.d.ts +81 -0
- package/dist/search/bm25.js +334 -0
- package/dist/search/e5-embedder.d.ts +30 -0
- package/dist/search/e5-embedder.js +91 -0
- package/dist/search/embed-pass.d.ts +26 -0
- package/dist/search/embed-pass.js +43 -0
- package/dist/search/embedder.d.ts +58 -0
- package/dist/search/embedder.js +85 -0
- package/dist/search/inverted-index.d.ts +51 -0
- package/dist/search/inverted-index.js +139 -0
- package/dist/search/ppr.d.ts +44 -0
- package/dist/search/ppr.js +118 -0
- package/dist/search/tokenize.d.ts +26 -0
- package/dist/search/tokenize.js +98 -0
- package/dist/store/eviction.d.ts +16 -0
- package/dist/store/eviction.js +37 -0
- package/dist/store/repository.d.ts +222 -0
- package/dist/store/repository.js +420 -0
- package/dist/store/sqlite-store.d.ts +89 -0
- package/dist/store/sqlite-store.js +252 -0
- package/dist/store/vector-store.d.ts +66 -0
- package/dist/store/vector-store.js +160 -0
- package/dist/types.d.ts +385 -0
- package/dist/types.js +9 -0
- package/dist/utils/file-log.d.ts +87 -0
- package/dist/utils/file-log.js +215 -0
- package/dist/utils/peer-detection.d.ts +45 -0
- package/dist/utils/peer-detection.js +90 -0
- package/dist/utils/shell.d.ts +43 -0
- package/dist/utils/shell.js +110 -0
- package/dist/utils/usage-skill.d.ts +42 -0
- package/dist/utils/usage-skill.js +129 -0
- package/dist/utils/xlsx.d.ts +36 -0
- package/dist/utils/xlsx.js +270 -0
- package/grammars/tree-sitter-c.wasm +0 -0
- package/grammars/tree-sitter-c_sharp.wasm +0 -0
- package/grammars/tree-sitter-cpp.wasm +0 -0
- package/grammars/tree-sitter-css.wasm +0 -0
- package/grammars/tree-sitter-go.wasm +0 -0
- package/grammars/tree-sitter-html.wasm +0 -0
- package/grammars/tree-sitter-java.wasm +0 -0
- package/grammars/tree-sitter-javascript.wasm +0 -0
- package/grammars/tree-sitter-json.wasm +0 -0
- package/grammars/tree-sitter-php.wasm +0 -0
- package/grammars/tree-sitter-python.wasm +0 -0
- package/grammars/tree-sitter-rust.wasm +0 -0
- package/grammars/tree-sitter-typescript.wasm +0 -0
- package/package.json +80 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Repository — the single source of truth for memory CRUD at runtime.
|
|
3
|
+
*
|
|
4
|
+
* Holds the in-memory working set, the inverted index, and a debounced
|
|
5
|
+
* write-behind layer over a SQLite store. All public operations keep
|
|
6
|
+
* the index in sync; the index is never accessed directly from
|
|
7
|
+
* outside. The public API is identical to the JSON-era repository —
|
|
8
|
+
* the storage swap is entirely behind this class.
|
|
9
|
+
*
|
|
10
|
+
* Storage model:
|
|
11
|
+
*
|
|
12
|
+
* - SQLite (`bun:sqlite`, see sqlite-store.ts) is the durable
|
|
13
|
+
* backing. It is written incrementally — only changed rows, in one
|
|
14
|
+
* transaction per flush — replacing the JSON store's whole-file
|
|
15
|
+
* rewrite. It is read exactly once, at `load()`.
|
|
16
|
+
*
|
|
17
|
+
* - `byId` is an in-memory cache that fully mirrors the store. It
|
|
18
|
+
* stays a full mirror because the inverted index needs every doc
|
|
19
|
+
* in memory anyway (IDF, avgdl, the co-change graph), so caching
|
|
20
|
+
* the Memory objects alongside costs little and keeps every read
|
|
21
|
+
* O(1) — recall never touches SQLite.
|
|
22
|
+
*
|
|
23
|
+
* - Writes are write-behind: `insert` / `upsertBySubject` /
|
|
24
|
+
* `removeMemory` / useCount bumps mutate the cache + index
|
|
25
|
+
* immediately and record the changed id in a pending buffer
|
|
26
|
+
* (`pendingDirty` / `pendingDeleted`). The debounced `flush`
|
|
27
|
+
* drains that buffer into SQLite in a single transaction. This
|
|
28
|
+
* batching is what makes SQLite a win rather than a wash — per-row
|
|
29
|
+
* transactions during ingestion would be slower than the JSON
|
|
30
|
+
* file; one transaction per debounce is far faster.
|
|
31
|
+
*
|
|
32
|
+
* Performance notes:
|
|
33
|
+
*
|
|
34
|
+
* - The cache is a `Map<id, Memory>` — insert / delete / lookup are
|
|
35
|
+
* all O(1). `removeMemory` and `applyEviction` work off the cache.
|
|
36
|
+
*
|
|
37
|
+
* - `insertIfMissing` consults `dedupKeys`, an in-memory Set of
|
|
38
|
+
* compact composite keys, for O(1) idempotency. (SQLite is not
|
|
39
|
+
* consulted: a not-yet-flushed insert is in the buffer, not the
|
|
40
|
+
* DB, so the in-memory view is the authoritative one.)
|
|
41
|
+
*
|
|
42
|
+
* - `totalBytes` is a running counter; `countsByCategory` reads the
|
|
43
|
+
* inverted index's `byCategory` map — both O(1) / O(categories).
|
|
44
|
+
*
|
|
45
|
+
* - BM25 search is O(query terms × candidates); eviction sorts once
|
|
46
|
+
* per *batch*.
|
|
47
|
+
*/
|
|
48
|
+
import type { Memory, RecallHit, Category, ResolvedConfig } from "../types.js";
|
|
49
|
+
import { type SearchOptions } from "../search/bm25.js";
|
|
50
|
+
import type { VectorStore } from "./vector-store.js";
|
|
51
|
+
export declare class MemoryRepository {
|
|
52
|
+
/** In-memory cache — a full mirror of the SQLite store, insertion-ordered. */
|
|
53
|
+
private byId;
|
|
54
|
+
/** Store metadata (ingest timestamps, last eviction). */
|
|
55
|
+
private meta;
|
|
56
|
+
private index;
|
|
57
|
+
private root;
|
|
58
|
+
private sqlite;
|
|
59
|
+
private flushTimer;
|
|
60
|
+
/** O(1) idempotency: composite key → memory id (in-memory, mirrors the cache). */
|
|
61
|
+
private dedupKeys;
|
|
62
|
+
/** Running sum of `sizeBytes` over all memories (+ fixed overhead). */
|
|
63
|
+
private bytesTotal;
|
|
64
|
+
/** Ids changed (inserted/updated) since the last flush. */
|
|
65
|
+
private pendingDirty;
|
|
66
|
+
/** Ids deleted since the last flush. */
|
|
67
|
+
private pendingDeleted;
|
|
68
|
+
/** Whether `meta` changed since the last flush. */
|
|
69
|
+
private metaDirty;
|
|
70
|
+
/**
|
|
71
|
+
* Optional semantic-search index. Attached by the plugin only when
|
|
72
|
+
* `enableSemanticSearch` is on; `undefined` otherwise, in which case
|
|
73
|
+
* every recall takes the unchanged pure-lexical path.
|
|
74
|
+
*/
|
|
75
|
+
private vectorStore?;
|
|
76
|
+
private constructor();
|
|
77
|
+
/**
|
|
78
|
+
* Open the store for a project root and build the repository.
|
|
79
|
+
* Stays async to preserve the historical API even though
|
|
80
|
+
* `bun:sqlite` is synchronous — callers `await` this.
|
|
81
|
+
*
|
|
82
|
+
* If the legacy JSON-to-SQLite migration fails (rare, but possible
|
|
83
|
+
* when another process is touching the database during startup),
|
|
84
|
+
* `onMigrationError` is invoked with the cause and the plugin
|
|
85
|
+
* continues with an empty fresh database — see SqliteStore.open
|
|
86
|
+
* and migrateFromJson for why "keep starting, log loudly" beats
|
|
87
|
+
* "fail to start".
|
|
88
|
+
*/
|
|
89
|
+
static load(root: string, onMigrationError?: (e: unknown) => void): Promise<MemoryRepository>;
|
|
90
|
+
/** Total number of stored memories. O(1). */
|
|
91
|
+
size(): number;
|
|
92
|
+
/** Total disk-bytes estimate — O(1), maintained incrementally. */
|
|
93
|
+
totalBytes(): number;
|
|
94
|
+
/** Per-category counts — O(categories), read straight from the index. */
|
|
95
|
+
countsByCategory(): Map<Category, number>;
|
|
96
|
+
insert(opts: {
|
|
97
|
+
category: Category;
|
|
98
|
+
subject: string;
|
|
99
|
+
content: string;
|
|
100
|
+
tags?: string[];
|
|
101
|
+
source: string;
|
|
102
|
+
pinned?: boolean;
|
|
103
|
+
}): Memory;
|
|
104
|
+
/** Insert many in one shot; faster than insert-loop on large batches. */
|
|
105
|
+
insertMany(items: Array<{
|
|
106
|
+
category: Category;
|
|
107
|
+
subject: string;
|
|
108
|
+
content: string;
|
|
109
|
+
tags?: string[];
|
|
110
|
+
source: string;
|
|
111
|
+
pinned?: boolean;
|
|
112
|
+
}>): Memory[];
|
|
113
|
+
/**
|
|
114
|
+
* Insert one memory only if the (category, subject, content) tuple
|
|
115
|
+
* doesn't already exist. Returns the existing entry if so. This is
|
|
116
|
+
* how ingestion stays idempotent across plugin restarts.
|
|
117
|
+
*
|
|
118
|
+
* O(1) via the in-memory `dedupKeys` index. SQLite is intentionally
|
|
119
|
+
* not consulted — a not-yet-flushed insert lives in the write-behind
|
|
120
|
+
* buffer, so the in-memory view is the authoritative one.
|
|
121
|
+
*/
|
|
122
|
+
insertIfMissing(opts: {
|
|
123
|
+
category: Category;
|
|
124
|
+
subject: string;
|
|
125
|
+
content: string;
|
|
126
|
+
tags?: string[];
|
|
127
|
+
source: string;
|
|
128
|
+
pinned?: boolean;
|
|
129
|
+
}): Memory;
|
|
130
|
+
/**
|
|
131
|
+
* Insert a memory, first removing any existing memories that share
|
|
132
|
+
* its (category, subject). For "live" single-valued facts — a
|
|
133
|
+
* file's current LSP diagnostics, a file's current signature map —
|
|
134
|
+
* re-reporting must REPLACE prior state, not accumulate stale
|
|
135
|
+
* copies.
|
|
136
|
+
*
|
|
137
|
+
* O(memories sharing that subject) via the inverted index.
|
|
138
|
+
*/
|
|
139
|
+
upsertBySubject(opts: {
|
|
140
|
+
category: Category;
|
|
141
|
+
subject: string;
|
|
142
|
+
content: string;
|
|
143
|
+
tags?: string[];
|
|
144
|
+
source: string;
|
|
145
|
+
pinned?: boolean;
|
|
146
|
+
}): Memory;
|
|
147
|
+
/** Remove a single memory, keeping every index + counter + buffer consistent. O(1). */
|
|
148
|
+
private removeMemory;
|
|
149
|
+
/**
|
|
150
|
+
* Attach a semantic vector index. Once attached, a recall that
|
|
151
|
+
* carries a `queryVector` fuses vector similarity with the lexical
|
|
152
|
+
* ranking; a recall without one, or before this is called, is
|
|
153
|
+
* unaffected. Idempotent.
|
|
154
|
+
*/
|
|
155
|
+
attachVectorStore(vs: VectorStore): void;
|
|
156
|
+
/**
|
|
157
|
+
* Rank candidates for a recall.
|
|
158
|
+
*
|
|
159
|
+
* With no vector store attached, or no `queryVector` supplied, this
|
|
160
|
+
* is exactly the historical lexical path — `search()` and nothing
|
|
161
|
+
* else. That keeps the default (semantic-search-off) configuration
|
|
162
|
+
* byte-for-byte unchanged.
|
|
163
|
+
*
|
|
164
|
+
* With both present, it fuses two rankings via reciprocal-rank
|
|
165
|
+
* fusion: the BM25 lexical ranking and a vector-similarity ranking.
|
|
166
|
+
* A larger candidate pool is drawn from each side so a hit that is
|
|
167
|
+
* strong in only one ranking can still surface, then the fused list
|
|
168
|
+
* is trimmed back to `limit`. Vector candidates are filtered to the
|
|
169
|
+
* same category/subject scope as the lexical query.
|
|
170
|
+
*/
|
|
171
|
+
private rankCandidates;
|
|
172
|
+
/**
|
|
173
|
+
* Budget-aware recall. `search()` ranks (and count-limits via
|
|
174
|
+
* `opts.limit`); if `opts.tokenBudget` is set *and* a `format`
|
|
175
|
+
* function is supplied, the ranked hits are then packed to that
|
|
176
|
+
* token ceiling. `useCount`/`usedAt` are bumped only for the hits
|
|
177
|
+
* actually KEPT.
|
|
178
|
+
*/
|
|
179
|
+
recallDetailed(opts: SearchOptions, format?: (h: RecallHit) => string): {
|
|
180
|
+
hits: RecallHit[];
|
|
181
|
+
omitted: number;
|
|
182
|
+
};
|
|
183
|
+
/** Count-limited recall (no token budgeting). Stable convenience API. */
|
|
184
|
+
recall(opts: SearchOptions): RecallHit[];
|
|
185
|
+
/**
|
|
186
|
+
* All memories, insertion-ordered. Materialised from the cache on
|
|
187
|
+
* each call — O(n), but only the infrequent readers (outline,
|
|
188
|
+
* mining, snapshot scan) use it; the frequent mutating paths stay
|
|
189
|
+
* O(1).
|
|
190
|
+
*/
|
|
191
|
+
allMemories(): readonly Memory[];
|
|
192
|
+
setIngestedAt(category: Category, ts: number): void;
|
|
193
|
+
getIngestedAt(category: Category): number | undefined;
|
|
194
|
+
applyEviction(config: ResolvedConfig): {
|
|
195
|
+
removed: number;
|
|
196
|
+
};
|
|
197
|
+
/** Record `id` as changed; schedule a flush. */
|
|
198
|
+
private markDirty;
|
|
199
|
+
/** Record `id` as deleted; schedule a flush. */
|
|
200
|
+
private markDeleted;
|
|
201
|
+
/** Record that `meta` changed; schedule a flush. */
|
|
202
|
+
private markMetaDirty;
|
|
203
|
+
private scheduleFlush;
|
|
204
|
+
/**
|
|
205
|
+
* Drain the write-behind buffer into SQLite in one transaction.
|
|
206
|
+
* Stays async to preserve the historical API; the work itself is
|
|
207
|
+
* synchronous (`bun:sqlite`). The pending sets are cleared only
|
|
208
|
+
* after the transaction succeeds — if it throws, they are retained
|
|
209
|
+
* and retried.
|
|
210
|
+
*/
|
|
211
|
+
flush(): Promise<void>;
|
|
212
|
+
/** Flush synchronously now — used by tools and tests. */
|
|
213
|
+
forceFlush(): Promise<void>;
|
|
214
|
+
/**
|
|
215
|
+
* Flush and close the underlying database handle. Not part of the
|
|
216
|
+
* historical API, but tests create many short-lived repositories and
|
|
217
|
+
* should release their handles; in the plugin's own lifecycle the
|
|
218
|
+
* repository lives for the whole session and the OS reclaims the
|
|
219
|
+
* handle at process exit.
|
|
220
|
+
*/
|
|
221
|
+
close(): Promise<void>;
|
|
222
|
+
}
|
|
@@ -0,0 +1,420 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Repository — the single source of truth for memory CRUD at runtime.
|
|
3
|
+
*
|
|
4
|
+
* Holds the in-memory working set, the inverted index, and a debounced
|
|
5
|
+
* write-behind layer over a SQLite store. All public operations keep
|
|
6
|
+
* the index in sync; the index is never accessed directly from
|
|
7
|
+
* outside. The public API is identical to the JSON-era repository —
|
|
8
|
+
* the storage swap is entirely behind this class.
|
|
9
|
+
*
|
|
10
|
+
* Storage model:
|
|
11
|
+
*
|
|
12
|
+
* - SQLite (`bun:sqlite`, see sqlite-store.ts) is the durable
|
|
13
|
+
* backing. It is written incrementally — only changed rows, in one
|
|
14
|
+
* transaction per flush — replacing the JSON store's whole-file
|
|
15
|
+
* rewrite. It is read exactly once, at `load()`.
|
|
16
|
+
*
|
|
17
|
+
* - `byId` is an in-memory cache that fully mirrors the store. It
|
|
18
|
+
* stays a full mirror because the inverted index needs every doc
|
|
19
|
+
* in memory anyway (IDF, avgdl, the co-change graph), so caching
|
|
20
|
+
* the Memory objects alongside costs little and keeps every read
|
|
21
|
+
* O(1) — recall never touches SQLite.
|
|
22
|
+
*
|
|
23
|
+
* - Writes are write-behind: `insert` / `upsertBySubject` /
|
|
24
|
+
* `removeMemory` / useCount bumps mutate the cache + index
|
|
25
|
+
* immediately and record the changed id in a pending buffer
|
|
26
|
+
* (`pendingDirty` / `pendingDeleted`). The debounced `flush`
|
|
27
|
+
* drains that buffer into SQLite in a single transaction. This
|
|
28
|
+
* batching is what makes SQLite a win rather than a wash — per-row
|
|
29
|
+
* transactions during ingestion would be slower than the JSON
|
|
30
|
+
* file; one transaction per debounce is far faster.
|
|
31
|
+
*
|
|
32
|
+
* Performance notes:
|
|
33
|
+
*
|
|
34
|
+
* - The cache is a `Map<id, Memory>` — insert / delete / lookup are
|
|
35
|
+
* all O(1). `removeMemory` and `applyEviction` work off the cache.
|
|
36
|
+
*
|
|
37
|
+
* - `insertIfMissing` consults `dedupKeys`, an in-memory Set of
|
|
38
|
+
* compact composite keys, for O(1) idempotency. (SQLite is not
|
|
39
|
+
* consulted: a not-yet-flushed insert is in the buffer, not the
|
|
40
|
+
* DB, so the in-memory view is the authoritative one.)
|
|
41
|
+
*
|
|
42
|
+
* - `totalBytes` is a running counter; `countsByCategory` reads the
|
|
43
|
+
* inverted index's `byCategory` map — both O(1) / O(categories).
|
|
44
|
+
*
|
|
45
|
+
* - BM25 search is O(query terms × candidates); eviction sorts once
|
|
46
|
+
* per *batch*.
|
|
47
|
+
*/
|
|
48
|
+
import { InvertedIndex } from "../search/inverted-index.js";
|
|
49
|
+
import { search, packToTokenBudget } from "../search/bm25.js";
|
|
50
|
+
import { reciprocalRankFusion } from "../search/embedder.js";
|
|
51
|
+
import { evictIfOverBudget } from "./eviction.js";
|
|
52
|
+
import { SqliteStore } from "./sqlite-store.js";
|
|
53
|
+
const PERSIST_DEBOUNCE_MS = 400;
|
|
54
|
+
const STORE_OVERHEAD_BYTES = 64;
|
|
55
|
+
let idCounter = 0;
|
|
56
|
+
function newId() {
|
|
57
|
+
// Random-ish + counter — collision-free within a process.
|
|
58
|
+
idCounter += 1;
|
|
59
|
+
return `mem_${Date.now().toString(36)}_${idCounter.toString(36)}`;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Fast, allocation-light string hash (djb2). Keeps dedup keys compact
|
|
63
|
+
* even when `content` is long.
|
|
64
|
+
*/
|
|
65
|
+
function hash32(s) {
|
|
66
|
+
let h = 5381;
|
|
67
|
+
for (let i = 0; i < s.length; i++) {
|
|
68
|
+
h = ((h << 5) + h + s.charCodeAt(i)) | 0;
|
|
69
|
+
}
|
|
70
|
+
return (h >>> 0).toString(36);
|
|
71
|
+
}
|
|
72
|
+
/** Composite key used for O(1) idempotent-insert checks. */
|
|
73
|
+
function dedupKey(category, subject, content) {
|
|
74
|
+
return `${category}\u0000${subject}\u0000${hash32(content)}`;
|
|
75
|
+
}
|
|
76
|
+
export class MemoryRepository {
|
|
77
|
+
/** In-memory cache — a full mirror of the SQLite store, insertion-ordered. */
|
|
78
|
+
byId = new Map();
|
|
79
|
+
/** Store metadata (ingest timestamps, last eviction). */
|
|
80
|
+
meta;
|
|
81
|
+
index = new InvertedIndex();
|
|
82
|
+
root;
|
|
83
|
+
sqlite;
|
|
84
|
+
flushTimer = null;
|
|
85
|
+
/** O(1) idempotency: composite key → memory id (in-memory, mirrors the cache). */
|
|
86
|
+
dedupKeys = new Map();
|
|
87
|
+
/** Running sum of `sizeBytes` over all memories (+ fixed overhead). */
|
|
88
|
+
bytesTotal = STORE_OVERHEAD_BYTES;
|
|
89
|
+
// ── write-behind buffer ───────────────────────────────────────────
|
|
90
|
+
/** Ids changed (inserted/updated) since the last flush. */
|
|
91
|
+
pendingDirty = new Set();
|
|
92
|
+
/** Ids deleted since the last flush. */
|
|
93
|
+
pendingDeleted = new Set();
|
|
94
|
+
/** Whether `meta` changed since the last flush. */
|
|
95
|
+
metaDirty = false;
|
|
96
|
+
/**
|
|
97
|
+
* Optional semantic-search index. Attached by the plugin only when
|
|
98
|
+
* `enableSemanticSearch` is on; `undefined` otherwise, in which case
|
|
99
|
+
* every recall takes the unchanged pure-lexical path.
|
|
100
|
+
*/
|
|
101
|
+
vectorStore;
|
|
102
|
+
constructor(root, sqlite, loaded) {
|
|
103
|
+
this.root = root;
|
|
104
|
+
this.sqlite = sqlite;
|
|
105
|
+
this.meta = loaded.meta;
|
|
106
|
+
for (const m of loaded.memories) {
|
|
107
|
+
this.byId.set(m.id, m);
|
|
108
|
+
this.dedupKeys.set(dedupKey(m.category, m.subject, m.content), m.id);
|
|
109
|
+
this.bytesTotal += m.sizeBytes;
|
|
110
|
+
}
|
|
111
|
+
this.index.rebuildFromAll(loaded.memories);
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Open the store for a project root and build the repository.
|
|
115
|
+
* Stays async to preserve the historical API even though
|
|
116
|
+
* `bun:sqlite` is synchronous — callers `await` this.
|
|
117
|
+
*
|
|
118
|
+
* If the legacy JSON-to-SQLite migration fails (rare, but possible
|
|
119
|
+
* when another process is touching the database during startup),
|
|
120
|
+
* `onMigrationError` is invoked with the cause and the plugin
|
|
121
|
+
* continues with an empty fresh database — see SqliteStore.open
|
|
122
|
+
* and migrateFromJson for why "keep starting, log loudly" beats
|
|
123
|
+
* "fail to start".
|
|
124
|
+
*/
|
|
125
|
+
static async load(root, onMigrationError) {
|
|
126
|
+
const { store, loaded } = SqliteStore.open(root, undefined, onMigrationError);
|
|
127
|
+
return new MemoryRepository(root, store, loaded);
|
|
128
|
+
}
|
|
129
|
+
/** Total number of stored memories. O(1). */
|
|
130
|
+
size() {
|
|
131
|
+
return this.byId.size;
|
|
132
|
+
}
|
|
133
|
+
/** Total disk-bytes estimate — O(1), maintained incrementally. */
|
|
134
|
+
totalBytes() {
|
|
135
|
+
return this.bytesTotal;
|
|
136
|
+
}
|
|
137
|
+
/** Per-category counts — O(categories), read straight from the index. */
|
|
138
|
+
countsByCategory() {
|
|
139
|
+
const m = new Map();
|
|
140
|
+
for (const [cat, ids] of this.index.byCategory) {
|
|
141
|
+
if (ids.size > 0)
|
|
142
|
+
m.set(cat, ids.size);
|
|
143
|
+
}
|
|
144
|
+
return m;
|
|
145
|
+
}
|
|
146
|
+
insert(opts) {
|
|
147
|
+
const now = Date.now();
|
|
148
|
+
const sizeBytes = Buffer.byteLength(opts.content + opts.subject + (opts.tags ?? []).join(","), "utf-8");
|
|
149
|
+
const mem = {
|
|
150
|
+
id: newId(),
|
|
151
|
+
category: opts.category,
|
|
152
|
+
subject: opts.subject,
|
|
153
|
+
content: opts.content,
|
|
154
|
+
tags: opts.tags ?? [],
|
|
155
|
+
source: opts.source,
|
|
156
|
+
createdAt: now,
|
|
157
|
+
usedAt: now,
|
|
158
|
+
useCount: 0,
|
|
159
|
+
sizeBytes,
|
|
160
|
+
pinned: opts.pinned,
|
|
161
|
+
};
|
|
162
|
+
this.byId.set(mem.id, mem);
|
|
163
|
+
this.dedupKeys.set(dedupKey(mem.category, mem.subject, mem.content), mem.id);
|
|
164
|
+
this.bytesTotal += sizeBytes;
|
|
165
|
+
this.index.add(mem);
|
|
166
|
+
this.markDirty(mem.id);
|
|
167
|
+
return mem;
|
|
168
|
+
}
|
|
169
|
+
/** Insert many in one shot; faster than insert-loop on large batches. */
|
|
170
|
+
insertMany(items) {
|
|
171
|
+
const result = [];
|
|
172
|
+
for (const item of items)
|
|
173
|
+
result.push(this.insert(item));
|
|
174
|
+
return result;
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Insert one memory only if the (category, subject, content) tuple
|
|
178
|
+
* doesn't already exist. Returns the existing entry if so. This is
|
|
179
|
+
* how ingestion stays idempotent across plugin restarts.
|
|
180
|
+
*
|
|
181
|
+
* O(1) via the in-memory `dedupKeys` index. SQLite is intentionally
|
|
182
|
+
* not consulted — a not-yet-flushed insert lives in the write-behind
|
|
183
|
+
* buffer, so the in-memory view is the authoritative one.
|
|
184
|
+
*/
|
|
185
|
+
insertIfMissing(opts) {
|
|
186
|
+
const key = dedupKey(opts.category, opts.subject, opts.content);
|
|
187
|
+
const existingId = this.dedupKeys.get(key);
|
|
188
|
+
if (existingId !== undefined) {
|
|
189
|
+
const existing = this.byId.get(existingId);
|
|
190
|
+
if (existing)
|
|
191
|
+
return existing;
|
|
192
|
+
// dedup index pointed at a since-removed entry — fall through.
|
|
193
|
+
}
|
|
194
|
+
return this.insert(opts);
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Insert a memory, first removing any existing memories that share
|
|
198
|
+
* its (category, subject). For "live" single-valued facts — a
|
|
199
|
+
* file's current LSP diagnostics, a file's current signature map —
|
|
200
|
+
* re-reporting must REPLACE prior state, not accumulate stale
|
|
201
|
+
* copies.
|
|
202
|
+
*
|
|
203
|
+
* O(memories sharing that subject) via the inverted index.
|
|
204
|
+
*/
|
|
205
|
+
upsertBySubject(opts) {
|
|
206
|
+
const ids = this.index.bySubject.get(opts.subject);
|
|
207
|
+
if (ids) {
|
|
208
|
+
for (const id of [...ids]) {
|
|
209
|
+
const mem = this.byId.get(id);
|
|
210
|
+
if (mem && mem.category === opts.category)
|
|
211
|
+
this.removeMemory(mem);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
return this.insert(opts);
|
|
215
|
+
}
|
|
216
|
+
/** Remove a single memory, keeping every index + counter + buffer consistent. O(1). */
|
|
217
|
+
removeMemory(mem) {
|
|
218
|
+
this.byId.delete(mem.id);
|
|
219
|
+
this.dedupKeys.delete(dedupKey(mem.category, mem.subject, mem.content));
|
|
220
|
+
this.bytesTotal -= mem.sizeBytes;
|
|
221
|
+
this.index.remove(mem);
|
|
222
|
+
this.markDeleted(mem.id);
|
|
223
|
+
}
|
|
224
|
+
/**
|
|
225
|
+
* Attach a semantic vector index. Once attached, a recall that
|
|
226
|
+
* carries a `queryVector` fuses vector similarity with the lexical
|
|
227
|
+
* ranking; a recall without one, or before this is called, is
|
|
228
|
+
* unaffected. Idempotent.
|
|
229
|
+
*/
|
|
230
|
+
attachVectorStore(vs) {
|
|
231
|
+
this.vectorStore = vs;
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* Rank candidates for a recall.
|
|
235
|
+
*
|
|
236
|
+
* With no vector store attached, or no `queryVector` supplied, this
|
|
237
|
+
* is exactly the historical lexical path — `search()` and nothing
|
|
238
|
+
* else. That keeps the default (semantic-search-off) configuration
|
|
239
|
+
* byte-for-byte unchanged.
|
|
240
|
+
*
|
|
241
|
+
* With both present, it fuses two rankings via reciprocal-rank
|
|
242
|
+
* fusion: the BM25 lexical ranking and a vector-similarity ranking.
|
|
243
|
+
* A larger candidate pool is drawn from each side so a hit that is
|
|
244
|
+
* strong in only one ranking can still surface, then the fused list
|
|
245
|
+
* is trimmed back to `limit`. Vector candidates are filtered to the
|
|
246
|
+
* same category/subject scope as the lexical query.
|
|
247
|
+
*/
|
|
248
|
+
rankCandidates(opts) {
|
|
249
|
+
if (!this.vectorStore || !opts.queryVector) {
|
|
250
|
+
return search(this.index, this.byId, opts);
|
|
251
|
+
}
|
|
252
|
+
const limit = opts.limit ?? 25;
|
|
253
|
+
const pool = Math.max(limit, 50);
|
|
254
|
+
const lexical = search(this.index, this.byId, { ...opts, limit: pool });
|
|
255
|
+
const vector = this.vectorStore.search(opts.queryVector, pool).filter((r) => {
|
|
256
|
+
const m = this.byId.get(r.id);
|
|
257
|
+
if (!m)
|
|
258
|
+
return false;
|
|
259
|
+
if (opts.category && m.category !== opts.category)
|
|
260
|
+
return false;
|
|
261
|
+
if (opts.subject && m.subject !== opts.subject)
|
|
262
|
+
return false;
|
|
263
|
+
return true;
|
|
264
|
+
});
|
|
265
|
+
const fused = reciprocalRankFusion([
|
|
266
|
+
lexical.map((h) => h.memory.id),
|
|
267
|
+
vector.map((v) => v.id),
|
|
268
|
+
]);
|
|
269
|
+
const hits = [];
|
|
270
|
+
for (const f of fused) {
|
|
271
|
+
const m = this.byId.get(f.id);
|
|
272
|
+
if (m)
|
|
273
|
+
hits.push({ memory: m, score: f.score });
|
|
274
|
+
if (hits.length >= limit)
|
|
275
|
+
break;
|
|
276
|
+
}
|
|
277
|
+
return hits;
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Budget-aware recall. `search()` ranks (and count-limits via
|
|
281
|
+
* `opts.limit`); if `opts.tokenBudget` is set *and* a `format`
|
|
282
|
+
* function is supplied, the ranked hits are then packed to that
|
|
283
|
+
* token ceiling. `useCount`/`usedAt` are bumped only for the hits
|
|
284
|
+
* actually KEPT.
|
|
285
|
+
*/
|
|
286
|
+
recallDetailed(opts, format) {
|
|
287
|
+
const ranked = this.rankCandidates(opts);
|
|
288
|
+
let kept = ranked;
|
|
289
|
+
let omitted = 0;
|
|
290
|
+
if (opts.tokenBudget && opts.tokenBudget > 0 && format) {
|
|
291
|
+
const packed = packToTokenBudget(ranked, opts.tokenBudget, format);
|
|
292
|
+
kept = packed.kept;
|
|
293
|
+
omitted = packed.omitted;
|
|
294
|
+
}
|
|
295
|
+
const now = Date.now();
|
|
296
|
+
for (const h of kept) {
|
|
297
|
+
// packToTokenBudget may hand back a shallow clone with trimmed
|
|
298
|
+
// content — always bump the REAL stored memory by id.
|
|
299
|
+
const real = this.byId.get(h.memory.id);
|
|
300
|
+
if (real) {
|
|
301
|
+
real.useCount += 1;
|
|
302
|
+
real.usedAt = now;
|
|
303
|
+
this.markDirty(real.id);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
return { hits: kept, omitted };
|
|
307
|
+
}
|
|
308
|
+
/** Count-limited recall (no token budgeting). Stable convenience API. */
|
|
309
|
+
recall(opts) {
|
|
310
|
+
return this.recallDetailed(opts).hits;
|
|
311
|
+
}
|
|
312
|
+
/**
|
|
313
|
+
* All memories, insertion-ordered. Materialised from the cache on
|
|
314
|
+
* each call — O(n), but only the infrequent readers (outline,
|
|
315
|
+
* mining, snapshot scan) use it; the frequent mutating paths stay
|
|
316
|
+
* O(1).
|
|
317
|
+
*/
|
|
318
|
+
allMemories() {
|
|
319
|
+
return [...this.byId.values()];
|
|
320
|
+
}
|
|
321
|
+
setIngestedAt(category, ts) {
|
|
322
|
+
this.meta.ingestedAt[category] = ts;
|
|
323
|
+
this.markMetaDirty();
|
|
324
|
+
}
|
|
325
|
+
getIngestedAt(category) {
|
|
326
|
+
return this.meta.ingestedAt[category];
|
|
327
|
+
}
|
|
328
|
+
applyEviction(config) {
|
|
329
|
+
const removed = evictIfOverBudget([...this.byId.values()], config.maxMemoryBytes, this.bytesTotal);
|
|
330
|
+
if (removed.length === 0)
|
|
331
|
+
return { removed: 0 };
|
|
332
|
+
for (const mem of removed) {
|
|
333
|
+
this.byId.delete(mem.id);
|
|
334
|
+
this.dedupKeys.delete(dedupKey(mem.category, mem.subject, mem.content));
|
|
335
|
+
this.bytesTotal -= mem.sizeBytes;
|
|
336
|
+
this.index.remove(mem);
|
|
337
|
+
this.markDeleted(mem.id);
|
|
338
|
+
}
|
|
339
|
+
this.meta.lastEvictionAt = Date.now();
|
|
340
|
+
this.markMetaDirty();
|
|
341
|
+
return { removed: removed.length };
|
|
342
|
+
}
|
|
343
|
+
// ── write-behind buffer bookkeeping ───────────────────────────────
|
|
344
|
+
/** Record `id` as changed; schedule a flush. */
|
|
345
|
+
markDirty(id) {
|
|
346
|
+
this.pendingDirty.add(id);
|
|
347
|
+
this.pendingDeleted.delete(id);
|
|
348
|
+
this.scheduleFlush();
|
|
349
|
+
}
|
|
350
|
+
/** Record `id` as deleted; schedule a flush. */
|
|
351
|
+
markDeleted(id) {
|
|
352
|
+
this.pendingDeleted.add(id);
|
|
353
|
+
this.pendingDirty.delete(id);
|
|
354
|
+
this.scheduleFlush();
|
|
355
|
+
}
|
|
356
|
+
/** Record that `meta` changed; schedule a flush. */
|
|
357
|
+
markMetaDirty() {
|
|
358
|
+
this.metaDirty = true;
|
|
359
|
+
this.scheduleFlush();
|
|
360
|
+
}
|
|
361
|
+
scheduleFlush() {
|
|
362
|
+
if (this.flushTimer)
|
|
363
|
+
return;
|
|
364
|
+
this.flushTimer = setTimeout(() => {
|
|
365
|
+
this.flushTimer = null;
|
|
366
|
+
// The debounced flush runs detached — a write failure (project
|
|
367
|
+
// dir removed mid-session, disk full, permissions) must not
|
|
368
|
+
// surface as an unhandled rejection that takes down the host.
|
|
369
|
+
// The pending buffers are only cleared on success, so a failed
|
|
370
|
+
// flush is retried on the next mutation.
|
|
371
|
+
void this.flush().catch(() => {
|
|
372
|
+
/* buffers retained → retried on next mutation */
|
|
373
|
+
});
|
|
374
|
+
}, PERSIST_DEBOUNCE_MS);
|
|
375
|
+
}
|
|
376
|
+
/**
|
|
377
|
+
* Drain the write-behind buffer into SQLite in one transaction.
|
|
378
|
+
* Stays async to preserve the historical API; the work itself is
|
|
379
|
+
* synchronous (`bun:sqlite`). The pending sets are cleared only
|
|
380
|
+
* after the transaction succeeds — if it throws, they are retained
|
|
381
|
+
* and retried.
|
|
382
|
+
*/
|
|
383
|
+
async flush() {
|
|
384
|
+
if (this.pendingDirty.size === 0 && this.pendingDeleted.size === 0 && !this.metaDirty) {
|
|
385
|
+
return;
|
|
386
|
+
}
|
|
387
|
+
const dirty = [];
|
|
388
|
+
for (const id of this.pendingDirty) {
|
|
389
|
+
const mem = this.byId.get(id);
|
|
390
|
+
if (mem)
|
|
391
|
+
dirty.push(mem);
|
|
392
|
+
}
|
|
393
|
+
const deleted = [...this.pendingDeleted];
|
|
394
|
+
// If this throws, the sets below are NOT cleared — the next
|
|
395
|
+
// mutation reschedules and retries.
|
|
396
|
+
this.sqlite.flush(dirty, deleted, this.meta);
|
|
397
|
+
this.pendingDirty.clear();
|
|
398
|
+
this.pendingDeleted.clear();
|
|
399
|
+
this.metaDirty = false;
|
|
400
|
+
}
|
|
401
|
+
/** Flush synchronously now — used by tools and tests. */
|
|
402
|
+
async forceFlush() {
|
|
403
|
+
if (this.flushTimer) {
|
|
404
|
+
clearTimeout(this.flushTimer);
|
|
405
|
+
this.flushTimer = null;
|
|
406
|
+
}
|
|
407
|
+
await this.flush();
|
|
408
|
+
}
|
|
409
|
+
/**
|
|
410
|
+
* Flush and close the underlying database handle. Not part of the
|
|
411
|
+
* historical API, but tests create many short-lived repositories and
|
|
412
|
+
* should release their handles; in the plugin's own lifecycle the
|
|
413
|
+
* repository lives for the whole session and the OS reclaims the
|
|
414
|
+
* handle at process exit.
|
|
415
|
+
*/
|
|
416
|
+
async close() {
|
|
417
|
+
await this.forceFlush();
|
|
418
|
+
this.sqlite.close();
|
|
419
|
+
}
|
|
420
|
+
}
|