opencode-diane 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/CHANGELOG.md +180 -0
  2. package/LICENSE +21 -0
  3. package/README.md +206 -0
  4. package/WIKI.md +1430 -0
  5. package/dist/index.d.ts +28 -0
  6. package/dist/index.js +1632 -0
  7. package/dist/ingest/adaptive.d.ts +47 -0
  8. package/dist/ingest/adaptive.js +182 -0
  9. package/dist/ingest/code-health.d.ts +58 -0
  10. package/dist/ingest/code-health.js +202 -0
  11. package/dist/ingest/code-map.d.ts +71 -0
  12. package/dist/ingest/code-map.js +670 -0
  13. package/dist/ingest/cross-refs.d.ts +59 -0
  14. package/dist/ingest/cross-refs.js +1207 -0
  15. package/dist/ingest/docs.d.ts +49 -0
  16. package/dist/ingest/docs.js +325 -0
  17. package/dist/ingest/git.d.ts +77 -0
  18. package/dist/ingest/git.js +390 -0
  19. package/dist/ingest/live-session.d.ts +101 -0
  20. package/dist/ingest/live-session.js +173 -0
  21. package/dist/ingest/project-notes.d.ts +28 -0
  22. package/dist/ingest/project-notes.js +102 -0
  23. package/dist/ingest/project.d.ts +35 -0
  24. package/dist/ingest/project.js +430 -0
  25. package/dist/ingest/session-snapshot.d.ts +63 -0
  26. package/dist/ingest/session-snapshot.js +94 -0
  27. package/dist/ingest/sessions.d.ts +29 -0
  28. package/dist/ingest/sessions.js +164 -0
  29. package/dist/ingest/tables.d.ts +52 -0
  30. package/dist/ingest/tables.js +360 -0
  31. package/dist/mining/skill-miner.d.ts +53 -0
  32. package/dist/mining/skill-miner.js +234 -0
  33. package/dist/search/bm25.d.ts +81 -0
  34. package/dist/search/bm25.js +334 -0
  35. package/dist/search/e5-embedder.d.ts +30 -0
  36. package/dist/search/e5-embedder.js +91 -0
  37. package/dist/search/embed-pass.d.ts +26 -0
  38. package/dist/search/embed-pass.js +43 -0
  39. package/dist/search/embedder.d.ts +58 -0
  40. package/dist/search/embedder.js +85 -0
  41. package/dist/search/inverted-index.d.ts +51 -0
  42. package/dist/search/inverted-index.js +139 -0
  43. package/dist/search/ppr.d.ts +44 -0
  44. package/dist/search/ppr.js +118 -0
  45. package/dist/search/tokenize.d.ts +26 -0
  46. package/dist/search/tokenize.js +98 -0
  47. package/dist/store/eviction.d.ts +16 -0
  48. package/dist/store/eviction.js +37 -0
  49. package/dist/store/repository.d.ts +222 -0
  50. package/dist/store/repository.js +420 -0
  51. package/dist/store/sqlite-store.d.ts +89 -0
  52. package/dist/store/sqlite-store.js +252 -0
  53. package/dist/store/vector-store.d.ts +66 -0
  54. package/dist/store/vector-store.js +160 -0
  55. package/dist/types.d.ts +385 -0
  56. package/dist/types.js +9 -0
  57. package/dist/utils/file-log.d.ts +87 -0
  58. package/dist/utils/file-log.js +215 -0
  59. package/dist/utils/peer-detection.d.ts +45 -0
  60. package/dist/utils/peer-detection.js +90 -0
  61. package/dist/utils/shell.d.ts +43 -0
  62. package/dist/utils/shell.js +110 -0
  63. package/dist/utils/usage-skill.d.ts +42 -0
  64. package/dist/utils/usage-skill.js +129 -0
  65. package/dist/utils/xlsx.d.ts +36 -0
  66. package/dist/utils/xlsx.js +270 -0
  67. package/grammars/tree-sitter-c.wasm +0 -0
  68. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  69. package/grammars/tree-sitter-cpp.wasm +0 -0
  70. package/grammars/tree-sitter-css.wasm +0 -0
  71. package/grammars/tree-sitter-go.wasm +0 -0
  72. package/grammars/tree-sitter-html.wasm +0 -0
  73. package/grammars/tree-sitter-java.wasm +0 -0
  74. package/grammars/tree-sitter-javascript.wasm +0 -0
  75. package/grammars/tree-sitter-json.wasm +0 -0
  76. package/grammars/tree-sitter-php.wasm +0 -0
  77. package/grammars/tree-sitter-python.wasm +0 -0
  78. package/grammars/tree-sitter-rust.wasm +0 -0
  79. package/grammars/tree-sitter-typescript.wasm +0 -0
  80. package/package.json +80 -0
@@ -0,0 +1,89 @@
1
+ /**
2
+ * SQLite-backed durable storage for the memory store.
3
+ *
4
+ * Replaces the old single-JSON-file persistence. The store lives at
5
+ * `.opencode/diane.db` — a real SQLite database in WAL mode.
6
+ *
7
+ * Why this exists: the JSON store rewrote the *entire* file on every
8
+ * debounced flush. On a large repo that is the dominant cost and the
9
+ * reason the plugin didn't scale. SQLite writes only the *changed*
10
+ * rows, in one transaction. That is the whole point of the migration.
11
+ *
12
+ * Scope, stated honestly: this is a durable *log*, not a query engine.
13
+ * The repository still keeps the working set in memory — the `byId`
14
+ * cache and the inverted index, which it needs anyway for the custom
15
+ * co-change-boosted BM25 scoring that FTS5 can't express. SQLite is
16
+ * touched in exactly two places: once at load (a single table scan)
17
+ * and at each flush (one delta transaction). Reads never hit it.
18
+ * Moving retrieval into SQLite (FTS5) would be a separate project.
19
+ *
20
+ * Uses `bun:sqlite` — built into the Bun runtime that OpenCode loads
21
+ * plugins under, so this adds no dependency.
22
+ */
23
+ import type { Memory, MemoryStoreFile } from "../types.js";
24
+ export declare function dbFilePath(root: string): string;
25
+ /** What the repository gets back from a load. */
26
+ export interface LoadedStore {
27
+ memories: Memory[];
28
+ meta: MemoryStoreFile["meta"];
29
+ }
30
+ export declare class SqliteStore {
31
+ private db;
32
+ private readonly upsertStmt;
33
+ private readonly deleteStmt;
34
+ private readonly metaUpsertStmt;
35
+ private constructor();
36
+ /**
37
+ * Open (or create) the store for a project root. On first open, if
38
+ * there is a legacy JSON store and no DB yet, the JSON is migrated
39
+ * into the fresh DB and renamed aside. Returns the store handle and
40
+ * everything in it (the repository reads this once at construction).
41
+ *
42
+ * A legacy-migration failure is reported via `onMigrationError`
43
+ * rather than thrown: the plugin's startup keeps going with an
44
+ * empty fresh database — losing memories is recoverable (the next
45
+ * open retries the migration); failing to start is not. See
46
+ * `migrateFromJson` for the rationale.
47
+ */
48
+ static open(root: string, log?: (msg: string) => void, onMigrationError?: (e: unknown) => void): {
49
+ store: SqliteStore;
50
+ loaded: LoadedStore;
51
+ };
52
+ /** Read the whole store back — called once, at repository construction. */
53
+ loadAll(): LoadedStore;
54
+ /**
55
+ * Persist a delta in ONE transaction: upsert every memory in
56
+ * `dirty`, delete every id in `deleted`, write `meta`. This is the
57
+ * incremental write that replaces the JSON whole-file rewrite —
58
+ * three changed memories cost three row writes, not a re-serialise
59
+ * of the entire store. A single transaction also means the flush is
60
+ * atomic: a crash mid-flush leaves the DB at the previous state, no
61
+ * temp-file-rename dance required.
62
+ */
63
+ flush(dirty: Iterable<Memory>, deleted: Iterable<string>, meta: MemoryStoreFile["meta"]): void;
64
+ /** Close the underlying database handle. */
65
+ close(): void;
66
+ /**
67
+ * One-time legacy migration: if a `diane.json` exists, load it,
68
+ * bulk-insert into the fresh DB in one transaction, and rename the
69
+ * JSON to `.json.migrated` so it is not re-migrated and the user
70
+ * keeps a backup.
71
+ *
72
+ * **Failure is not propagated.** A legacy-store migration is best-
73
+ * effort housekeeping; if it cannot complete for any reason (the
74
+ * JSON is corrupt, the database is held by another process, disk is
75
+ * full, a concurrent plugin's startup is blocking us) we MUST NOT
76
+ * crash the plugin's startup — that was the failure mode this code
77
+ * was rewritten to fix. Instead we log the cause, leave the JSON
78
+ * file untouched (the user keeps their data), and return 0 so the
79
+ * caller continues with an empty fresh database. The next open will
80
+ * find the JSON still in place and try the migration again.
81
+ *
82
+ * Returns the number of memories actually migrated, or 0 on any
83
+ * failure (including no JSON to migrate, which is also "0 migrated").
84
+ * `onError`, if provided, is called once on a real failure (not the
85
+ * "no JSON file" or "wrong schema version" cases) with the cause —
86
+ * the caller surfaces it as a structured event.
87
+ */
88
+ private migrateFromJson;
89
+ }
@@ -0,0 +1,252 @@
1
+ /**
2
+ * SQLite-backed durable storage for the memory store.
3
+ *
4
+ * Replaces the old single-JSON-file persistence. The store lives at
5
+ * `.opencode/diane.db` — a real SQLite database in WAL mode.
6
+ *
7
+ * Why this exists: the JSON store rewrote the *entire* file on every
8
+ * debounced flush. On a large repo that is the dominant cost and the
9
+ * reason the plugin didn't scale. SQLite writes only the *changed*
10
+ * rows, in one transaction. That is the whole point of the migration.
11
+ *
12
+ * Scope, stated honestly: this is a durable *log*, not a query engine.
13
+ * The repository still keeps the working set in memory — the `byId`
14
+ * cache and the inverted index, which it needs anyway for the custom
15
+ * co-change-boosted BM25 scoring that FTS5 can't express. SQLite is
16
+ * touched in exactly two places: once at load (a single table scan)
17
+ * and at each flush (one delta transaction). Reads never hit it.
18
+ * Moving retrieval into SQLite (FTS5) would be a separate project.
19
+ *
20
+ * Uses `bun:sqlite` — built into the Bun runtime that OpenCode loads
21
+ * plugins under, so this adds no dependency.
22
+ */
23
+ import { Database } from "bun:sqlite";
24
+ import { mkdirSync, existsSync, readFileSync, renameSync } from "node:fs";
25
+ import { dirname } from "node:path";
26
+ const DB_REL = ".opencode/diane.db";
27
+ /** Legacy JSON store — migrated into SQLite on first open, then renamed aside. */
28
+ const JSON_REL = ".opencode/diane.json";
29
+ export function dbFilePath(root) {
30
+ return `${root}/${DB_REL}`;
31
+ }
32
+ const SCHEMA = `
33
+ CREATE TABLE IF NOT EXISTS memories (
34
+ id TEXT PRIMARY KEY,
35
+ category TEXT NOT NULL,
36
+ subject TEXT NOT NULL,
37
+ content TEXT NOT NULL,
38
+ tags TEXT NOT NULL,
39
+ source TEXT NOT NULL,
40
+ created_at INTEGER NOT NULL,
41
+ used_at INTEGER NOT NULL,
42
+ use_count INTEGER NOT NULL,
43
+ size_bytes INTEGER NOT NULL,
44
+ pinned INTEGER NOT NULL DEFAULT 0
45
+ );
46
+ CREATE TABLE IF NOT EXISTS meta (
47
+ key TEXT PRIMARY KEY,
48
+ value TEXT NOT NULL
49
+ );
50
+ `;
51
+ function rowToMemory(r) {
52
+ return {
53
+ id: r.id,
54
+ category: r.category,
55
+ subject: r.subject,
56
+ content: r.content,
57
+ tags: JSON.parse(r.tags),
58
+ source: r.source,
59
+ createdAt: r.created_at,
60
+ usedAt: r.used_at,
61
+ useCount: r.use_count,
62
+ sizeBytes: r.size_bytes,
63
+ // pinned is optional on Memory — only set it when actually pinned,
64
+ // matching how the rest of the codebase treats the field.
65
+ pinned: r.pinned === 1 ? true : undefined,
66
+ };
67
+ }
68
+ function emptyMeta() {
69
+ return { ingestedAt: {}, lastEvictionAt: null, schema: 1 };
70
+ }
71
+ export class SqliteStore {
72
+ db;
73
+ // Prepared statements — created once, reused for every flush. This
74
+ // is the other half of "use SQLite well": the query planner runs
75
+ // once per statement, not once per row.
76
+ upsertStmt;
77
+ deleteStmt;
78
+ metaUpsertStmt;
79
+ constructor(db) {
80
+ this.db = db;
81
+ // WAL: concurrent-read friendly and the right default for a store
82
+ // two OpenCode sessions might touch at once. NORMAL synchronous is
83
+ // safe under WAL and much faster than FULL.
84
+ db.exec("PRAGMA journal_mode = WAL");
85
+ db.exec("PRAGMA synchronous = NORMAL");
86
+ db.exec(SCHEMA);
87
+ this.upsertStmt = db.query(`INSERT INTO memories
88
+ (id, category, subject, content, tags, source,
89
+ created_at, used_at, use_count, size_bytes, pinned)
90
+ VALUES
91
+ ($id, $category, $subject, $content, $tags, $source,
92
+ $createdAt, $usedAt, $useCount, $sizeBytes, $pinned)
93
+ ON CONFLICT(id) DO UPDATE SET
94
+ category = excluded.category,
95
+ subject = excluded.subject,
96
+ content = excluded.content,
97
+ tags = excluded.tags,
98
+ source = excluded.source,
99
+ created_at = excluded.created_at,
100
+ used_at = excluded.used_at,
101
+ use_count = excluded.use_count,
102
+ size_bytes = excluded.size_bytes,
103
+ pinned = excluded.pinned`);
104
+ this.deleteStmt = db.query(`DELETE FROM memories WHERE id = $id`);
105
+ this.metaUpsertStmt = db.query(`INSERT INTO meta (key, value) VALUES ($key, $value)
106
+ ON CONFLICT(key) DO UPDATE SET value = excluded.value`);
107
+ }
108
+ /**
109
+ * Open (or create) the store for a project root. On first open, if
110
+ * there is a legacy JSON store and no DB yet, the JSON is migrated
111
+ * into the fresh DB and renamed aside. Returns the store handle and
112
+ * everything in it (the repository reads this once at construction).
113
+ *
114
+ * A legacy-migration failure is reported via `onMigrationError`
115
+ * rather than thrown: the plugin's startup keeps going with an
116
+ * empty fresh database — losing memories is recoverable (the next
117
+ * open retries the migration); failing to start is not. See
118
+ * `migrateFromJson` for the rationale.
119
+ */
120
+ static open(root, log, onMigrationError) {
121
+ const dbPath = dbFilePath(root);
122
+ mkdirSync(dirname(dbPath), { recursive: true });
123
+ const dbExisted = existsSync(dbPath);
124
+ const db = new Database(dbPath, { create: true });
125
+ const store = new SqliteStore(db);
126
+ if (!dbExisted) {
127
+ const migrated = store.migrateFromJson(root, onMigrationError);
128
+ if (migrated > 0 && log) {
129
+ log(`migrated ${migrated} memories from legacy diane.json`);
130
+ }
131
+ }
132
+ return { store, loaded: store.loadAll() };
133
+ }
134
+ /** Read the whole store back — called once, at repository construction. */
135
+ loadAll() {
136
+ const rows = this.db.query(`SELECT * FROM memories`).all();
137
+ const memories = rows.map(rowToMemory);
138
+ const meta = emptyMeta();
139
+ const metaRows = this.db.query(`SELECT key, value FROM meta`).all();
140
+ for (const { key, value } of metaRows) {
141
+ try {
142
+ if (key === "ingestedAt")
143
+ meta.ingestedAt = JSON.parse(value);
144
+ else if (key === "lastEvictionAt")
145
+ meta.lastEvictionAt = JSON.parse(value);
146
+ else if (key === "schema")
147
+ meta.schema = JSON.parse(value);
148
+ }
149
+ catch {
150
+ /* a corrupt meta row falls back to the default — non-fatal */
151
+ }
152
+ }
153
+ return { memories, meta };
154
+ }
155
+ /**
156
+ * Persist a delta in ONE transaction: upsert every memory in
157
+ * `dirty`, delete every id in `deleted`, write `meta`. This is the
158
+ * incremental write that replaces the JSON whole-file rewrite —
159
+ * three changed memories cost three row writes, not a re-serialise
160
+ * of the entire store. A single transaction also means the flush is
161
+ * atomic: a crash mid-flush leaves the DB at the previous state, no
162
+ * temp-file-rename dance required.
163
+ */
164
+ flush(dirty, deleted, meta) {
165
+ const run = this.db.transaction(() => {
166
+ for (const m of dirty) {
167
+ this.upsertStmt.run({
168
+ $id: m.id,
169
+ $category: m.category,
170
+ $subject: m.subject,
171
+ $content: m.content,
172
+ $tags: JSON.stringify(m.tags),
173
+ $source: m.source,
174
+ $createdAt: m.createdAt,
175
+ $usedAt: m.usedAt,
176
+ $useCount: m.useCount,
177
+ $sizeBytes: m.sizeBytes,
178
+ $pinned: m.pinned ? 1 : 0,
179
+ });
180
+ }
181
+ for (const id of deleted) {
182
+ this.deleteStmt.run({ $id: id });
183
+ }
184
+ this.metaUpsertStmt.run({ $key: "ingestedAt", $value: JSON.stringify(meta.ingestedAt) });
185
+ this.metaUpsertStmt.run({ $key: "lastEvictionAt", $value: JSON.stringify(meta.lastEvictionAt) });
186
+ this.metaUpsertStmt.run({ $key: "schema", $value: JSON.stringify(meta.schema) });
187
+ });
188
+ run();
189
+ }
190
+ /** Close the underlying database handle. */
191
+ close() {
192
+ this.db.close();
193
+ }
194
+ /**
195
+ * One-time legacy migration: if a `diane.json` exists, load it,
196
+ * bulk-insert into the fresh DB in one transaction, and rename the
197
+ * JSON to `.json.migrated` so it is not re-migrated and the user
198
+ * keeps a backup.
199
+ *
200
+ * **Failure is not propagated.** A legacy-store migration is best-
201
+ * effort housekeeping; if it cannot complete for any reason (the
202
+ * JSON is corrupt, the database is held by another process, disk is
203
+ * full, a concurrent plugin's startup is blocking us) we MUST NOT
204
+ * crash the plugin's startup — that was the failure mode this code
205
+ * was rewritten to fix. Instead we log the cause, leave the JSON
206
+ * file untouched (the user keeps their data), and return 0 so the
207
+ * caller continues with an empty fresh database. The next open will
208
+ * find the JSON still in place and try the migration again.
209
+ *
210
+ * Returns the number of memories actually migrated, or 0 on any
211
+ * failure (including no JSON to migrate, which is also "0 migrated").
212
+ * `onError`, if provided, is called once on a real failure (not the
213
+ * "no JSON file" or "wrong schema version" cases) with the cause —
214
+ * the caller surfaces it as a structured event.
215
+ */
216
+ migrateFromJson(root, onError) {
217
+ const jsonPath = `${root}/${JSON_REL}`;
218
+ if (!existsSync(jsonPath))
219
+ return 0;
220
+ let parsed;
221
+ try {
222
+ parsed = JSON.parse(readFileSync(jsonPath, "utf-8"));
223
+ }
224
+ catch {
225
+ return 0; // corrupt JSON — start fresh, leave the file untouched
226
+ }
227
+ if (!parsed || parsed.version !== 1 || !Array.isArray(parsed.memories)) {
228
+ return 0;
229
+ }
230
+ const meta = parsed.meta ?? emptyMeta();
231
+ try {
232
+ this.flush(parsed.memories, [], meta);
233
+ }
234
+ catch (e) {
235
+ // The bulk insert failed mid-way. The SQLite transaction is
236
+ // rolled back automatically; the JSON file is still in place;
237
+ // we report the cause and leave the database in its empty
238
+ // freshly-created state. The plugin's startup continues.
239
+ if (onError)
240
+ onError(e);
241
+ return 0;
242
+ }
243
+ try {
244
+ renameSync(jsonPath, `${jsonPath}.migrated`);
245
+ }
246
+ catch {
247
+ /* best effort — if the rename fails the DB now exists, so the
248
+ next open won't re-migrate anyway */
249
+ }
250
+ return parsed.memories.length;
251
+ }
252
+ }
@@ -0,0 +1,66 @@
1
+ /**
2
+ * vector-store.ts — persistence and search for memory embeddings.
3
+ *
4
+ * This is a SELF-CONTAINED, OPT-IN component. It is constructed only
5
+ * when `enableSemanticSearch` is on, and it uses its OWN database file
6
+ * (`.opencode/diane-vectors.db`) — the primary store, its schema, and
7
+ * its migration path are never touched. When the feature is off this
8
+ * file is never imported and the file never created, so the default
9
+ * configuration carries zero cost from semantic search.
10
+ *
11
+ * The cache is keyed to a model id. If the configured embedding model
12
+ * changes, the stored vectors are from a different space and are
13
+ * dropped wholesale on open — a stale vector is worse than none.
14
+ *
15
+ * Vectors are L2-normalised on the way in, so similarity search is a
16
+ * plain dot product.
17
+ */
18
+ import { type FusedItem } from "../search/embedder.js";
19
+ /** Absolute path of the vector database for a project root. */
20
+ export declare function vectorDbPath(root: string): string;
21
+ export declare class VectorStore {
22
+ private db;
23
+ /** In-memory mirror — every stored vector, for brute-force search. */
24
+ private mem;
25
+ /** Vector dimension, learned from the first vector stored. 0 until then. */
26
+ private dim;
27
+ readonly modelId: string;
28
+ private constructor();
29
+ /**
30
+ * Open (or create) the vector store for a project root, bound to a
31
+ * model id. If a different model produced the existing vectors, they
32
+ * are discarded — vectors from two models are not comparable.
33
+ */
34
+ static open(root: string, modelId: string): VectorStore;
35
+ /** Load every persisted vector into the in-memory mirror. */
36
+ private loadAll;
37
+ /** Number of vectors held. */
38
+ size(): number;
39
+ /** Whether a memory id already has a stored vector. */
40
+ has(id: string): boolean;
41
+ /** The memory ids in `ids` that do NOT yet have a vector — the embedding to-do list. */
42
+ missing(ids: Iterable<string>): string[];
43
+ /**
44
+ * Store a batch of (id, vector) pairs — normalised, mirrored in
45
+ * memory, and persisted in one transaction. A vector whose length
46
+ * disagrees with the established dimension is skipped (it cannot
47
+ * have come from the same model) rather than corrupting search.
48
+ */
49
+ putMany(entries: Array<{
50
+ id: string;
51
+ vec: Float32Array;
52
+ }>): void;
53
+ /**
54
+ * Drop vectors whose id is not in `validIds` — used to clear out
55
+ * memories that were evicted or replaced. Returns the count removed.
56
+ */
57
+ prune(validIds: Set<string>): number;
58
+ /**
59
+ * Top-`k` memory ids by cosine similarity to `queryVec` (vectors are
60
+ * normalised, so this is a dot product), highest first. A
61
+ * dimension mismatch yields an empty result rather than a throw.
62
+ */
63
+ search(queryVec: Float32Array, k: number): FusedItem[];
64
+ /** Close the underlying database handle. */
65
+ close(): void;
66
+ }
@@ -0,0 +1,160 @@
1
+ /**
2
+ * vector-store.ts — persistence and search for memory embeddings.
3
+ *
4
+ * This is a SELF-CONTAINED, OPT-IN component. It is constructed only
5
+ * when `enableSemanticSearch` is on, and it uses its OWN database file
6
+ * (`.opencode/diane-vectors.db`) — the primary store, its schema, and
7
+ * its migration path are never touched. When the feature is off this
8
+ * file is never imported and the file never created, so the default
9
+ * configuration carries zero cost from semantic search.
10
+ *
11
+ * The cache is keyed to a model id. If the configured embedding model
12
+ * changes, the stored vectors are from a different space and are
13
+ * dropped wholesale on open — a stale vector is worse than none.
14
+ *
15
+ * Vectors are L2-normalised on the way in, so similarity search is a
16
+ * plain dot product.
17
+ */
18
+ import { Database } from "bun:sqlite";
19
+ import { mkdirSync } from "node:fs";
20
+ import { dirname, join } from "node:path";
21
+ import { dot, normalize } from "../search/embedder.js";
22
+ const DB_REL = ".opencode/diane-vectors.db";
23
+ /** Absolute path of the vector database for a project root. */
24
+ export function vectorDbPath(root) {
25
+ return join(root, DB_REL);
26
+ }
27
+ export class VectorStore {
28
+ db;
29
+ /** In-memory mirror — every stored vector, for brute-force search. */
30
+ mem = new Map();
31
+ /** Vector dimension, learned from the first vector stored. 0 until then. */
32
+ dim = 0;
33
+ modelId;
34
+ constructor(db, modelId) {
35
+ this.db = db;
36
+ this.modelId = modelId;
37
+ }
38
+ /**
39
+ * Open (or create) the vector store for a project root, bound to a
40
+ * model id. If a different model produced the existing vectors, they
41
+ * are discarded — vectors from two models are not comparable.
42
+ */
43
+ static open(root, modelId) {
44
+ const path = vectorDbPath(root);
45
+ mkdirSync(dirname(path), { recursive: true });
46
+ const db = new Database(path, { create: true });
47
+ db.exec("PRAGMA journal_mode = WAL");
48
+ db.exec("PRAGMA synchronous = NORMAL");
49
+ db.exec("CREATE TABLE IF NOT EXISTS vectors (memory_id TEXT PRIMARY KEY, vec BLOB NOT NULL)");
50
+ db.exec("CREATE TABLE IF NOT EXISTS vmeta (key TEXT PRIMARY KEY, value TEXT NOT NULL)");
51
+ const storedModel = db.query("SELECT value FROM vmeta WHERE key = 'model'").get()?.value;
52
+ if (storedModel && storedModel !== modelId) {
53
+ // Model changed — the cached vectors live in a different space.
54
+ db.exec("DELETE FROM vectors");
55
+ }
56
+ db.query("INSERT OR REPLACE INTO vmeta (key, value) VALUES ('model', ?)").run(modelId);
57
+ const store = new VectorStore(db, modelId);
58
+ store.loadAll();
59
+ return store;
60
+ }
61
+ /** Load every persisted vector into the in-memory mirror. */
62
+ loadAll() {
63
+ const rows = this.db.query("SELECT memory_id, vec FROM vectors").all();
64
+ for (const row of rows) {
65
+ const f32 = bytesToFloat32(row.vec);
66
+ if (this.dim === 0)
67
+ this.dim = f32.length;
68
+ if (f32.length === this.dim)
69
+ this.mem.set(row.memory_id, f32);
70
+ }
71
+ }
72
+ /** Number of vectors held. */
73
+ size() {
74
+ return this.mem.size;
75
+ }
76
+ /** Whether a memory id already has a stored vector. */
77
+ has(id) {
78
+ return this.mem.has(id);
79
+ }
80
+ /** The memory ids in `ids` that do NOT yet have a vector — the embedding to-do list. */
81
+ missing(ids) {
82
+ const out = [];
83
+ for (const id of ids)
84
+ if (!this.mem.has(id))
85
+ out.push(id);
86
+ return out;
87
+ }
88
+ /**
89
+ * Store a batch of (id, vector) pairs — normalised, mirrored in
90
+ * memory, and persisted in one transaction. A vector whose length
91
+ * disagrees with the established dimension is skipped (it cannot
92
+ * have come from the same model) rather than corrupting search.
93
+ */
94
+ putMany(entries) {
95
+ const insert = this.db.query("INSERT OR REPLACE INTO vectors (memory_id, vec) VALUES (?, ?)");
96
+ const tx = this.db.transaction((batch) => {
97
+ for (const { id, vec } of batch) {
98
+ if (this.dim === 0)
99
+ this.dim = vec.length;
100
+ if (vec.length !== this.dim)
101
+ continue;
102
+ const n = normalize(vec);
103
+ this.mem.set(id, n);
104
+ insert.run(id, float32ToBytes(n));
105
+ }
106
+ });
107
+ tx(entries);
108
+ }
109
+ /**
110
+ * Drop vectors whose id is not in `validIds` — used to clear out
111
+ * memories that were evicted or replaced. Returns the count removed.
112
+ */
113
+ prune(validIds) {
114
+ const stale = [];
115
+ for (const id of this.mem.keys())
116
+ if (!validIds.has(id))
117
+ stale.push(id);
118
+ if (stale.length === 0)
119
+ return 0;
120
+ const del = this.db.query("DELETE FROM vectors WHERE memory_id = ?");
121
+ const tx = this.db.transaction((ids) => {
122
+ for (const id of ids) {
123
+ del.run(id);
124
+ this.mem.delete(id);
125
+ }
126
+ });
127
+ tx(stale);
128
+ return stale.length;
129
+ }
130
+ /**
131
+ * Top-`k` memory ids by cosine similarity to `queryVec` (vectors are
132
+ * normalised, so this is a dot product), highest first. A
133
+ * dimension mismatch yields an empty result rather than a throw.
134
+ */
135
+ search(queryVec, k) {
136
+ if (this.dim === 0 || queryVec.length !== this.dim || k <= 0)
137
+ return [];
138
+ const q = normalize(Float32Array.from(queryVec));
139
+ const scored = [];
140
+ for (const [id, vec] of this.mem) {
141
+ scored.push({ id, score: dot(q, vec) });
142
+ }
143
+ scored.sort((a, b) => b.score - a.score);
144
+ return scored.slice(0, k);
145
+ }
146
+ /** Close the underlying database handle. */
147
+ close() {
148
+ this.db.close();
149
+ }
150
+ }
151
+ /** Float32Array → a Buffer of its raw little-endian bytes for BLOB storage. */
152
+ function float32ToBytes(v) {
153
+ return new Uint8Array(v.buffer, v.byteOffset, v.byteLength);
154
+ }
155
+ /** Raw BLOB bytes → Float32Array. Copies, so alignment is never an issue. */
156
+ function bytesToFloat32(bytes) {
157
+ const copy = new Uint8Array(bytes.length);
158
+ copy.set(bytes);
159
+ return new Float32Array(copy.buffer);
160
+ }