opencode-diane 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +180 -0
- package/LICENSE +21 -0
- package/README.md +206 -0
- package/WIKI.md +1430 -0
- package/dist/index.d.ts +28 -0
- package/dist/index.js +1632 -0
- package/dist/ingest/adaptive.d.ts +47 -0
- package/dist/ingest/adaptive.js +182 -0
- package/dist/ingest/code-health.d.ts +58 -0
- package/dist/ingest/code-health.js +202 -0
- package/dist/ingest/code-map.d.ts +71 -0
- package/dist/ingest/code-map.js +670 -0
- package/dist/ingest/cross-refs.d.ts +59 -0
- package/dist/ingest/cross-refs.js +1207 -0
- package/dist/ingest/docs.d.ts +49 -0
- package/dist/ingest/docs.js +325 -0
- package/dist/ingest/git.d.ts +77 -0
- package/dist/ingest/git.js +390 -0
- package/dist/ingest/live-session.d.ts +101 -0
- package/dist/ingest/live-session.js +173 -0
- package/dist/ingest/project-notes.d.ts +28 -0
- package/dist/ingest/project-notes.js +102 -0
- package/dist/ingest/project.d.ts +35 -0
- package/dist/ingest/project.js +430 -0
- package/dist/ingest/session-snapshot.d.ts +63 -0
- package/dist/ingest/session-snapshot.js +94 -0
- package/dist/ingest/sessions.d.ts +29 -0
- package/dist/ingest/sessions.js +164 -0
- package/dist/ingest/tables.d.ts +52 -0
- package/dist/ingest/tables.js +360 -0
- package/dist/mining/skill-miner.d.ts +53 -0
- package/dist/mining/skill-miner.js +234 -0
- package/dist/search/bm25.d.ts +81 -0
- package/dist/search/bm25.js +334 -0
- package/dist/search/e5-embedder.d.ts +30 -0
- package/dist/search/e5-embedder.js +91 -0
- package/dist/search/embed-pass.d.ts +26 -0
- package/dist/search/embed-pass.js +43 -0
- package/dist/search/embedder.d.ts +58 -0
- package/dist/search/embedder.js +85 -0
- package/dist/search/inverted-index.d.ts +51 -0
- package/dist/search/inverted-index.js +139 -0
- package/dist/search/ppr.d.ts +44 -0
- package/dist/search/ppr.js +118 -0
- package/dist/search/tokenize.d.ts +26 -0
- package/dist/search/tokenize.js +98 -0
- package/dist/store/eviction.d.ts +16 -0
- package/dist/store/eviction.js +37 -0
- package/dist/store/repository.d.ts +222 -0
- package/dist/store/repository.js +420 -0
- package/dist/store/sqlite-store.d.ts +89 -0
- package/dist/store/sqlite-store.js +252 -0
- package/dist/store/vector-store.d.ts +66 -0
- package/dist/store/vector-store.js +160 -0
- package/dist/types.d.ts +385 -0
- package/dist/types.js +9 -0
- package/dist/utils/file-log.d.ts +87 -0
- package/dist/utils/file-log.js +215 -0
- package/dist/utils/peer-detection.d.ts +45 -0
- package/dist/utils/peer-detection.js +90 -0
- package/dist/utils/shell.d.ts +43 -0
- package/dist/utils/shell.js +110 -0
- package/dist/utils/usage-skill.d.ts +42 -0
- package/dist/utils/usage-skill.js +129 -0
- package/dist/utils/xlsx.d.ts +36 -0
- package/dist/utils/xlsx.js +270 -0
- package/grammars/tree-sitter-c.wasm +0 -0
- package/grammars/tree-sitter-c_sharp.wasm +0 -0
- package/grammars/tree-sitter-cpp.wasm +0 -0
- package/grammars/tree-sitter-css.wasm +0 -0
- package/grammars/tree-sitter-go.wasm +0 -0
- package/grammars/tree-sitter-html.wasm +0 -0
- package/grammars/tree-sitter-java.wasm +0 -0
- package/grammars/tree-sitter-javascript.wasm +0 -0
- package/grammars/tree-sitter-json.wasm +0 -0
- package/grammars/tree-sitter-php.wasm +0 -0
- package/grammars/tree-sitter-python.wasm +0 -0
- package/grammars/tree-sitter-rust.wasm +0 -0
- package/grammars/tree-sitter-typescript.wasm +0 -0
- package/package.json +80 -0
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SQLite-backed durable storage for the memory store.
|
|
3
|
+
*
|
|
4
|
+
* Replaces the old single-JSON-file persistence. The store lives at
|
|
5
|
+
* `.opencode/diane.db` — a real SQLite database in WAL mode.
|
|
6
|
+
*
|
|
7
|
+
* Why this exists: the JSON store rewrote the *entire* file on every
|
|
8
|
+
* debounced flush. On a large repo that is the dominant cost and the
|
|
9
|
+
* reason the plugin didn't scale. SQLite writes only the *changed*
|
|
10
|
+
* rows, in one transaction. That is the whole point of the migration.
|
|
11
|
+
*
|
|
12
|
+
* Scope, stated honestly: this is a durable *log*, not a query engine.
|
|
13
|
+
* The repository still keeps the working set in memory — the `byId`
|
|
14
|
+
* cache and the inverted index, which it needs anyway for the custom
|
|
15
|
+
* co-change-boosted BM25 scoring that FTS5 can't express. SQLite is
|
|
16
|
+
* touched in exactly two places: once at load (a single table scan)
|
|
17
|
+
* and at each flush (one delta transaction). Reads never hit it.
|
|
18
|
+
* Moving retrieval into SQLite (FTS5) would be a separate project.
|
|
19
|
+
*
|
|
20
|
+
* Uses `bun:sqlite` — built into the Bun runtime that OpenCode loads
|
|
21
|
+
* plugins under, so this adds no dependency.
|
|
22
|
+
*/
|
|
23
|
+
import type { Memory, MemoryStoreFile } from "../types.js";
|
|
24
|
+
export declare function dbFilePath(root: string): string;
|
|
25
|
+
/** What the repository gets back from a load. */
|
|
26
|
+
export interface LoadedStore {
|
|
27
|
+
memories: Memory[];
|
|
28
|
+
meta: MemoryStoreFile["meta"];
|
|
29
|
+
}
|
|
30
|
+
export declare class SqliteStore {
|
|
31
|
+
private db;
|
|
32
|
+
private readonly upsertStmt;
|
|
33
|
+
private readonly deleteStmt;
|
|
34
|
+
private readonly metaUpsertStmt;
|
|
35
|
+
private constructor();
|
|
36
|
+
/**
|
|
37
|
+
* Open (or create) the store for a project root. On first open, if
|
|
38
|
+
* there is a legacy JSON store and no DB yet, the JSON is migrated
|
|
39
|
+
* into the fresh DB and renamed aside. Returns the store handle and
|
|
40
|
+
* everything in it (the repository reads this once at construction).
|
|
41
|
+
*
|
|
42
|
+
* A legacy-migration failure is reported via `onMigrationError`
|
|
43
|
+
* rather than thrown: the plugin's startup keeps going with an
|
|
44
|
+
* empty fresh database — losing memories is recoverable (the next
|
|
45
|
+
* open retries the migration); failing to start is not. See
|
|
46
|
+
* `migrateFromJson` for the rationale.
|
|
47
|
+
*/
|
|
48
|
+
static open(root: string, log?: (msg: string) => void, onMigrationError?: (e: unknown) => void): {
|
|
49
|
+
store: SqliteStore;
|
|
50
|
+
loaded: LoadedStore;
|
|
51
|
+
};
|
|
52
|
+
/** Read the whole store back — called once, at repository construction. */
|
|
53
|
+
loadAll(): LoadedStore;
|
|
54
|
+
/**
|
|
55
|
+
* Persist a delta in ONE transaction: upsert every memory in
|
|
56
|
+
* `dirty`, delete every id in `deleted`, write `meta`. This is the
|
|
57
|
+
* incremental write that replaces the JSON whole-file rewrite —
|
|
58
|
+
* three changed memories cost three row writes, not a re-serialise
|
|
59
|
+
* of the entire store. A single transaction also means the flush is
|
|
60
|
+
* atomic: a crash mid-flush leaves the DB at the previous state, no
|
|
61
|
+
* temp-file-rename dance required.
|
|
62
|
+
*/
|
|
63
|
+
flush(dirty: Iterable<Memory>, deleted: Iterable<string>, meta: MemoryStoreFile["meta"]): void;
|
|
64
|
+
/** Close the underlying database handle. */
|
|
65
|
+
close(): void;
|
|
66
|
+
/**
|
|
67
|
+
* One-time legacy migration: if a `diane.json` exists, load it,
|
|
68
|
+
* bulk-insert into the fresh DB in one transaction, and rename the
|
|
69
|
+
* JSON to `.json.migrated` so it is not re-migrated and the user
|
|
70
|
+
* keeps a backup.
|
|
71
|
+
*
|
|
72
|
+
* **Failure is not propagated.** A legacy-store migration is best-
|
|
73
|
+
* effort housekeeping; if it cannot complete for any reason (the
|
|
74
|
+
* JSON is corrupt, the database is held by another process, disk is
|
|
75
|
+
* full, a concurrent plugin's startup is blocking us) we MUST NOT
|
|
76
|
+
* crash the plugin's startup — that was the failure mode this code
|
|
77
|
+
* was rewritten to fix. Instead we log the cause, leave the JSON
|
|
78
|
+
* file untouched (the user keeps their data), and return 0 so the
|
|
79
|
+
* caller continues with an empty fresh database. The next open will
|
|
80
|
+
* find the JSON still in place and try the migration again.
|
|
81
|
+
*
|
|
82
|
+
* Returns the number of memories actually migrated, or 0 on any
|
|
83
|
+
* failure (including no JSON to migrate, which is also "0 migrated").
|
|
84
|
+
* `onError`, if provided, is called once on a real failure (not the
|
|
85
|
+
* "no JSON file" or "wrong schema version" cases) with the cause —
|
|
86
|
+
* the caller surfaces it as a structured event.
|
|
87
|
+
*/
|
|
88
|
+
private migrateFromJson;
|
|
89
|
+
}
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SQLite-backed durable storage for the memory store.
|
|
3
|
+
*
|
|
4
|
+
* Replaces the old single-JSON-file persistence. The store lives at
|
|
5
|
+
* `.opencode/diane.db` — a real SQLite database in WAL mode.
|
|
6
|
+
*
|
|
7
|
+
* Why this exists: the JSON store rewrote the *entire* file on every
|
|
8
|
+
* debounced flush. On a large repo that is the dominant cost and the
|
|
9
|
+
* reason the plugin didn't scale. SQLite writes only the *changed*
|
|
10
|
+
* rows, in one transaction. That is the whole point of the migration.
|
|
11
|
+
*
|
|
12
|
+
* Scope, stated honestly: this is a durable *log*, not a query engine.
|
|
13
|
+
* The repository still keeps the working set in memory — the `byId`
|
|
14
|
+
* cache and the inverted index, which it needs anyway for the custom
|
|
15
|
+
* co-change-boosted BM25 scoring that FTS5 can't express. SQLite is
|
|
16
|
+
* touched in exactly two places: once at load (a single table scan)
|
|
17
|
+
* and at each flush (one delta transaction). Reads never hit it.
|
|
18
|
+
* Moving retrieval into SQLite (FTS5) would be a separate project.
|
|
19
|
+
*
|
|
20
|
+
* Uses `bun:sqlite` — built into the Bun runtime that OpenCode loads
|
|
21
|
+
* plugins under, so this adds no dependency.
|
|
22
|
+
*/
|
|
23
|
+
import { Database } from "bun:sqlite";
|
|
24
|
+
import { mkdirSync, existsSync, readFileSync, renameSync } from "node:fs";
|
|
25
|
+
import { dirname } from "node:path";
|
|
26
|
+
const DB_REL = ".opencode/diane.db";
|
|
27
|
+
/** Legacy JSON store — migrated into SQLite on first open, then renamed aside. */
|
|
28
|
+
const JSON_REL = ".opencode/diane.json";
|
|
29
|
+
export function dbFilePath(root) {
|
|
30
|
+
return `${root}/${DB_REL}`;
|
|
31
|
+
}
|
|
32
|
+
const SCHEMA = `
|
|
33
|
+
CREATE TABLE IF NOT EXISTS memories (
|
|
34
|
+
id TEXT PRIMARY KEY,
|
|
35
|
+
category TEXT NOT NULL,
|
|
36
|
+
subject TEXT NOT NULL,
|
|
37
|
+
content TEXT NOT NULL,
|
|
38
|
+
tags TEXT NOT NULL,
|
|
39
|
+
source TEXT NOT NULL,
|
|
40
|
+
created_at INTEGER NOT NULL,
|
|
41
|
+
used_at INTEGER NOT NULL,
|
|
42
|
+
use_count INTEGER NOT NULL,
|
|
43
|
+
size_bytes INTEGER NOT NULL,
|
|
44
|
+
pinned INTEGER NOT NULL DEFAULT 0
|
|
45
|
+
);
|
|
46
|
+
CREATE TABLE IF NOT EXISTS meta (
|
|
47
|
+
key TEXT PRIMARY KEY,
|
|
48
|
+
value TEXT NOT NULL
|
|
49
|
+
);
|
|
50
|
+
`;
|
|
51
|
+
function rowToMemory(r) {
|
|
52
|
+
return {
|
|
53
|
+
id: r.id,
|
|
54
|
+
category: r.category,
|
|
55
|
+
subject: r.subject,
|
|
56
|
+
content: r.content,
|
|
57
|
+
tags: JSON.parse(r.tags),
|
|
58
|
+
source: r.source,
|
|
59
|
+
createdAt: r.created_at,
|
|
60
|
+
usedAt: r.used_at,
|
|
61
|
+
useCount: r.use_count,
|
|
62
|
+
sizeBytes: r.size_bytes,
|
|
63
|
+
// pinned is optional on Memory — only set it when actually pinned,
|
|
64
|
+
// matching how the rest of the codebase treats the field.
|
|
65
|
+
pinned: r.pinned === 1 ? true : undefined,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
function emptyMeta() {
|
|
69
|
+
return { ingestedAt: {}, lastEvictionAt: null, schema: 1 };
|
|
70
|
+
}
|
|
71
|
+
export class SqliteStore {
|
|
72
|
+
db;
|
|
73
|
+
// Prepared statements — created once, reused for every flush. This
|
|
74
|
+
// is the other half of "use SQLite well": the query planner runs
|
|
75
|
+
// once per statement, not once per row.
|
|
76
|
+
upsertStmt;
|
|
77
|
+
deleteStmt;
|
|
78
|
+
metaUpsertStmt;
|
|
79
|
+
constructor(db) {
|
|
80
|
+
this.db = db;
|
|
81
|
+
// WAL: concurrent-read friendly and the right default for a store
|
|
82
|
+
// two OpenCode sessions might touch at once. NORMAL synchronous is
|
|
83
|
+
// safe under WAL and much faster than FULL.
|
|
84
|
+
db.exec("PRAGMA journal_mode = WAL");
|
|
85
|
+
db.exec("PRAGMA synchronous = NORMAL");
|
|
86
|
+
db.exec(SCHEMA);
|
|
87
|
+
this.upsertStmt = db.query(`INSERT INTO memories
|
|
88
|
+
(id, category, subject, content, tags, source,
|
|
89
|
+
created_at, used_at, use_count, size_bytes, pinned)
|
|
90
|
+
VALUES
|
|
91
|
+
($id, $category, $subject, $content, $tags, $source,
|
|
92
|
+
$createdAt, $usedAt, $useCount, $sizeBytes, $pinned)
|
|
93
|
+
ON CONFLICT(id) DO UPDATE SET
|
|
94
|
+
category = excluded.category,
|
|
95
|
+
subject = excluded.subject,
|
|
96
|
+
content = excluded.content,
|
|
97
|
+
tags = excluded.tags,
|
|
98
|
+
source = excluded.source,
|
|
99
|
+
created_at = excluded.created_at,
|
|
100
|
+
used_at = excluded.used_at,
|
|
101
|
+
use_count = excluded.use_count,
|
|
102
|
+
size_bytes = excluded.size_bytes,
|
|
103
|
+
pinned = excluded.pinned`);
|
|
104
|
+
this.deleteStmt = db.query(`DELETE FROM memories WHERE id = $id`);
|
|
105
|
+
this.metaUpsertStmt = db.query(`INSERT INTO meta (key, value) VALUES ($key, $value)
|
|
106
|
+
ON CONFLICT(key) DO UPDATE SET value = excluded.value`);
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Open (or create) the store for a project root. On first open, if
|
|
110
|
+
* there is a legacy JSON store and no DB yet, the JSON is migrated
|
|
111
|
+
* into the fresh DB and renamed aside. Returns the store handle and
|
|
112
|
+
* everything in it (the repository reads this once at construction).
|
|
113
|
+
*
|
|
114
|
+
* A legacy-migration failure is reported via `onMigrationError`
|
|
115
|
+
* rather than thrown: the plugin's startup keeps going with an
|
|
116
|
+
* empty fresh database — losing memories is recoverable (the next
|
|
117
|
+
* open retries the migration); failing to start is not. See
|
|
118
|
+
* `migrateFromJson` for the rationale.
|
|
119
|
+
*/
|
|
120
|
+
static open(root, log, onMigrationError) {
|
|
121
|
+
const dbPath = dbFilePath(root);
|
|
122
|
+
mkdirSync(dirname(dbPath), { recursive: true });
|
|
123
|
+
const dbExisted = existsSync(dbPath);
|
|
124
|
+
const db = new Database(dbPath, { create: true });
|
|
125
|
+
const store = new SqliteStore(db);
|
|
126
|
+
if (!dbExisted) {
|
|
127
|
+
const migrated = store.migrateFromJson(root, onMigrationError);
|
|
128
|
+
if (migrated > 0 && log) {
|
|
129
|
+
log(`migrated ${migrated} memories from legacy diane.json`);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return { store, loaded: store.loadAll() };
|
|
133
|
+
}
|
|
134
|
+
/** Read the whole store back — called once, at repository construction. */
|
|
135
|
+
loadAll() {
|
|
136
|
+
const rows = this.db.query(`SELECT * FROM memories`).all();
|
|
137
|
+
const memories = rows.map(rowToMemory);
|
|
138
|
+
const meta = emptyMeta();
|
|
139
|
+
const metaRows = this.db.query(`SELECT key, value FROM meta`).all();
|
|
140
|
+
for (const { key, value } of metaRows) {
|
|
141
|
+
try {
|
|
142
|
+
if (key === "ingestedAt")
|
|
143
|
+
meta.ingestedAt = JSON.parse(value);
|
|
144
|
+
else if (key === "lastEvictionAt")
|
|
145
|
+
meta.lastEvictionAt = JSON.parse(value);
|
|
146
|
+
else if (key === "schema")
|
|
147
|
+
meta.schema = JSON.parse(value);
|
|
148
|
+
}
|
|
149
|
+
catch {
|
|
150
|
+
/* a corrupt meta row falls back to the default — non-fatal */
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
return { memories, meta };
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Persist a delta in ONE transaction: upsert every memory in
|
|
157
|
+
* `dirty`, delete every id in `deleted`, write `meta`. This is the
|
|
158
|
+
* incremental write that replaces the JSON whole-file rewrite —
|
|
159
|
+
* three changed memories cost three row writes, not a re-serialise
|
|
160
|
+
* of the entire store. A single transaction also means the flush is
|
|
161
|
+
* atomic: a crash mid-flush leaves the DB at the previous state, no
|
|
162
|
+
* temp-file-rename dance required.
|
|
163
|
+
*/
|
|
164
|
+
flush(dirty, deleted, meta) {
|
|
165
|
+
const run = this.db.transaction(() => {
|
|
166
|
+
for (const m of dirty) {
|
|
167
|
+
this.upsertStmt.run({
|
|
168
|
+
$id: m.id,
|
|
169
|
+
$category: m.category,
|
|
170
|
+
$subject: m.subject,
|
|
171
|
+
$content: m.content,
|
|
172
|
+
$tags: JSON.stringify(m.tags),
|
|
173
|
+
$source: m.source,
|
|
174
|
+
$createdAt: m.createdAt,
|
|
175
|
+
$usedAt: m.usedAt,
|
|
176
|
+
$useCount: m.useCount,
|
|
177
|
+
$sizeBytes: m.sizeBytes,
|
|
178
|
+
$pinned: m.pinned ? 1 : 0,
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
for (const id of deleted) {
|
|
182
|
+
this.deleteStmt.run({ $id: id });
|
|
183
|
+
}
|
|
184
|
+
this.metaUpsertStmt.run({ $key: "ingestedAt", $value: JSON.stringify(meta.ingestedAt) });
|
|
185
|
+
this.metaUpsertStmt.run({ $key: "lastEvictionAt", $value: JSON.stringify(meta.lastEvictionAt) });
|
|
186
|
+
this.metaUpsertStmt.run({ $key: "schema", $value: JSON.stringify(meta.schema) });
|
|
187
|
+
});
|
|
188
|
+
run();
|
|
189
|
+
}
|
|
190
|
+
/** Close the underlying database handle. */
|
|
191
|
+
close() {
|
|
192
|
+
this.db.close();
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* One-time legacy migration: if a `diane.json` exists, load it,
|
|
196
|
+
* bulk-insert into the fresh DB in one transaction, and rename the
|
|
197
|
+
* JSON to `.json.migrated` so it is not re-migrated and the user
|
|
198
|
+
* keeps a backup.
|
|
199
|
+
*
|
|
200
|
+
* **Failure is not propagated.** A legacy-store migration is best-
|
|
201
|
+
* effort housekeeping; if it cannot complete for any reason (the
|
|
202
|
+
* JSON is corrupt, the database is held by another process, disk is
|
|
203
|
+
* full, a concurrent plugin's startup is blocking us) we MUST NOT
|
|
204
|
+
* crash the plugin's startup — that was the failure mode this code
|
|
205
|
+
* was rewritten to fix. Instead we log the cause, leave the JSON
|
|
206
|
+
* file untouched (the user keeps their data), and return 0 so the
|
|
207
|
+
* caller continues with an empty fresh database. The next open will
|
|
208
|
+
* find the JSON still in place and try the migration again.
|
|
209
|
+
*
|
|
210
|
+
* Returns the number of memories actually migrated, or 0 on any
|
|
211
|
+
* failure (including no JSON to migrate, which is also "0 migrated").
|
|
212
|
+
* `onError`, if provided, is called once on a real failure (not the
|
|
213
|
+
* "no JSON file" or "wrong schema version" cases) with the cause —
|
|
214
|
+
* the caller surfaces it as a structured event.
|
|
215
|
+
*/
|
|
216
|
+
migrateFromJson(root, onError) {
|
|
217
|
+
const jsonPath = `${root}/${JSON_REL}`;
|
|
218
|
+
if (!existsSync(jsonPath))
|
|
219
|
+
return 0;
|
|
220
|
+
let parsed;
|
|
221
|
+
try {
|
|
222
|
+
parsed = JSON.parse(readFileSync(jsonPath, "utf-8"));
|
|
223
|
+
}
|
|
224
|
+
catch {
|
|
225
|
+
return 0; // corrupt JSON — start fresh, leave the file untouched
|
|
226
|
+
}
|
|
227
|
+
if (!parsed || parsed.version !== 1 || !Array.isArray(parsed.memories)) {
|
|
228
|
+
return 0;
|
|
229
|
+
}
|
|
230
|
+
const meta = parsed.meta ?? emptyMeta();
|
|
231
|
+
try {
|
|
232
|
+
this.flush(parsed.memories, [], meta);
|
|
233
|
+
}
|
|
234
|
+
catch (e) {
|
|
235
|
+
// The bulk insert failed mid-way. The SQLite transaction is
|
|
236
|
+
// rolled back automatically; the JSON file is still in place;
|
|
237
|
+
// we report the cause and leave the database in its empty
|
|
238
|
+
// freshly-created state. The plugin's startup continues.
|
|
239
|
+
if (onError)
|
|
240
|
+
onError(e);
|
|
241
|
+
return 0;
|
|
242
|
+
}
|
|
243
|
+
try {
|
|
244
|
+
renameSync(jsonPath, `${jsonPath}.migrated`);
|
|
245
|
+
}
|
|
246
|
+
catch {
|
|
247
|
+
/* best effort — if the rename fails the DB now exists, so the
|
|
248
|
+
next open won't re-migrate anyway */
|
|
249
|
+
}
|
|
250
|
+
return parsed.memories.length;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* vector-store.ts — persistence and search for memory embeddings.
|
|
3
|
+
*
|
|
4
|
+
* This is a SELF-CONTAINED, OPT-IN component. It is constructed only
|
|
5
|
+
* when `enableSemanticSearch` is on, and it uses its OWN database file
|
|
6
|
+
* (`.opencode/diane-vectors.db`) — the primary store, its schema, and
|
|
7
|
+
* its migration path are never touched. When the feature is off this
|
|
8
|
+
* file is never imported and the file never created, so the default
|
|
9
|
+
* configuration carries zero cost from semantic search.
|
|
10
|
+
*
|
|
11
|
+
* The cache is keyed to a model id. If the configured embedding model
|
|
12
|
+
* changes, the stored vectors are from a different space and are
|
|
13
|
+
* dropped wholesale on open — a stale vector is worse than none.
|
|
14
|
+
*
|
|
15
|
+
* Vectors are L2-normalised on the way in, so similarity search is a
|
|
16
|
+
* plain dot product.
|
|
17
|
+
*/
|
|
18
|
+
import { type FusedItem } from "../search/embedder.js";
|
|
19
|
+
/** Absolute path of the vector database for a project root. */
|
|
20
|
+
export declare function vectorDbPath(root: string): string;
|
|
21
|
+
export declare class VectorStore {
|
|
22
|
+
private db;
|
|
23
|
+
/** In-memory mirror — every stored vector, for brute-force search. */
|
|
24
|
+
private mem;
|
|
25
|
+
/** Vector dimension, learned from the first vector stored. 0 until then. */
|
|
26
|
+
private dim;
|
|
27
|
+
readonly modelId: string;
|
|
28
|
+
private constructor();
|
|
29
|
+
/**
|
|
30
|
+
* Open (or create) the vector store for a project root, bound to a
|
|
31
|
+
* model id. If a different model produced the existing vectors, they
|
|
32
|
+
* are discarded — vectors from two models are not comparable.
|
|
33
|
+
*/
|
|
34
|
+
static open(root: string, modelId: string): VectorStore;
|
|
35
|
+
/** Load every persisted vector into the in-memory mirror. */
|
|
36
|
+
private loadAll;
|
|
37
|
+
/** Number of vectors held. */
|
|
38
|
+
size(): number;
|
|
39
|
+
/** Whether a memory id already has a stored vector. */
|
|
40
|
+
has(id: string): boolean;
|
|
41
|
+
/** The memory ids in `ids` that do NOT yet have a vector — the embedding to-do list. */
|
|
42
|
+
missing(ids: Iterable<string>): string[];
|
|
43
|
+
/**
|
|
44
|
+
* Store a batch of (id, vector) pairs — normalised, mirrored in
|
|
45
|
+
* memory, and persisted in one transaction. A vector whose length
|
|
46
|
+
* disagrees with the established dimension is skipped (it cannot
|
|
47
|
+
* have come from the same model) rather than corrupting search.
|
|
48
|
+
*/
|
|
49
|
+
putMany(entries: Array<{
|
|
50
|
+
id: string;
|
|
51
|
+
vec: Float32Array;
|
|
52
|
+
}>): void;
|
|
53
|
+
/**
|
|
54
|
+
* Drop vectors whose id is not in `validIds` — used to clear out
|
|
55
|
+
* memories that were evicted or replaced. Returns the count removed.
|
|
56
|
+
*/
|
|
57
|
+
prune(validIds: Set<string>): number;
|
|
58
|
+
/**
|
|
59
|
+
* Top-`k` memory ids by cosine similarity to `queryVec` (vectors are
|
|
60
|
+
* normalised, so this is a dot product), highest first. A
|
|
61
|
+
* dimension mismatch yields an empty result rather than a throw.
|
|
62
|
+
*/
|
|
63
|
+
search(queryVec: Float32Array, k: number): FusedItem[];
|
|
64
|
+
/** Close the underlying database handle. */
|
|
65
|
+
close(): void;
|
|
66
|
+
}
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* vector-store.ts — persistence and search for memory embeddings.
|
|
3
|
+
*
|
|
4
|
+
* This is a SELF-CONTAINED, OPT-IN component. It is constructed only
|
|
5
|
+
* when `enableSemanticSearch` is on, and it uses its OWN database file
|
|
6
|
+
* (`.opencode/diane-vectors.db`) — the primary store, its schema, and
|
|
7
|
+
* its migration path are never touched. When the feature is off this
|
|
8
|
+
* file is never imported and the file never created, so the default
|
|
9
|
+
* configuration carries zero cost from semantic search.
|
|
10
|
+
*
|
|
11
|
+
* The cache is keyed to a model id. If the configured embedding model
|
|
12
|
+
* changes, the stored vectors are from a different space and are
|
|
13
|
+
* dropped wholesale on open — a stale vector is worse than none.
|
|
14
|
+
*
|
|
15
|
+
* Vectors are L2-normalised on the way in, so similarity search is a
|
|
16
|
+
* plain dot product.
|
|
17
|
+
*/
|
|
18
|
+
import { Database } from "bun:sqlite";
|
|
19
|
+
import { mkdirSync } from "node:fs";
|
|
20
|
+
import { dirname, join } from "node:path";
|
|
21
|
+
import { dot, normalize } from "../search/embedder.js";
|
|
22
|
+
const DB_REL = ".opencode/diane-vectors.db";
|
|
23
|
+
/** Absolute path of the vector database for a project root. */
|
|
24
|
+
export function vectorDbPath(root) {
|
|
25
|
+
return join(root, DB_REL);
|
|
26
|
+
}
|
|
27
|
+
export class VectorStore {
|
|
28
|
+
db;
|
|
29
|
+
/** In-memory mirror — every stored vector, for brute-force search. */
|
|
30
|
+
mem = new Map();
|
|
31
|
+
/** Vector dimension, learned from the first vector stored. 0 until then. */
|
|
32
|
+
dim = 0;
|
|
33
|
+
modelId;
|
|
34
|
+
constructor(db, modelId) {
|
|
35
|
+
this.db = db;
|
|
36
|
+
this.modelId = modelId;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Open (or create) the vector store for a project root, bound to a
|
|
40
|
+
* model id. If a different model produced the existing vectors, they
|
|
41
|
+
* are discarded — vectors from two models are not comparable.
|
|
42
|
+
*/
|
|
43
|
+
static open(root, modelId) {
|
|
44
|
+
const path = vectorDbPath(root);
|
|
45
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
46
|
+
const db = new Database(path, { create: true });
|
|
47
|
+
db.exec("PRAGMA journal_mode = WAL");
|
|
48
|
+
db.exec("PRAGMA synchronous = NORMAL");
|
|
49
|
+
db.exec("CREATE TABLE IF NOT EXISTS vectors (memory_id TEXT PRIMARY KEY, vec BLOB NOT NULL)");
|
|
50
|
+
db.exec("CREATE TABLE IF NOT EXISTS vmeta (key TEXT PRIMARY KEY, value TEXT NOT NULL)");
|
|
51
|
+
const storedModel = db.query("SELECT value FROM vmeta WHERE key = 'model'").get()?.value;
|
|
52
|
+
if (storedModel && storedModel !== modelId) {
|
|
53
|
+
// Model changed — the cached vectors live in a different space.
|
|
54
|
+
db.exec("DELETE FROM vectors");
|
|
55
|
+
}
|
|
56
|
+
db.query("INSERT OR REPLACE INTO vmeta (key, value) VALUES ('model', ?)").run(modelId);
|
|
57
|
+
const store = new VectorStore(db, modelId);
|
|
58
|
+
store.loadAll();
|
|
59
|
+
return store;
|
|
60
|
+
}
|
|
61
|
+
/** Load every persisted vector into the in-memory mirror. */
|
|
62
|
+
loadAll() {
|
|
63
|
+
const rows = this.db.query("SELECT memory_id, vec FROM vectors").all();
|
|
64
|
+
for (const row of rows) {
|
|
65
|
+
const f32 = bytesToFloat32(row.vec);
|
|
66
|
+
if (this.dim === 0)
|
|
67
|
+
this.dim = f32.length;
|
|
68
|
+
if (f32.length === this.dim)
|
|
69
|
+
this.mem.set(row.memory_id, f32);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
/** Number of vectors held. */
|
|
73
|
+
size() {
|
|
74
|
+
return this.mem.size;
|
|
75
|
+
}
|
|
76
|
+
/** Whether a memory id already has a stored vector. */
|
|
77
|
+
has(id) {
|
|
78
|
+
return this.mem.has(id);
|
|
79
|
+
}
|
|
80
|
+
/** The memory ids in `ids` that do NOT yet have a vector — the embedding to-do list. */
|
|
81
|
+
missing(ids) {
|
|
82
|
+
const out = [];
|
|
83
|
+
for (const id of ids)
|
|
84
|
+
if (!this.mem.has(id))
|
|
85
|
+
out.push(id);
|
|
86
|
+
return out;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Store a batch of (id, vector) pairs — normalised, mirrored in
|
|
90
|
+
* memory, and persisted in one transaction. A vector whose length
|
|
91
|
+
* disagrees with the established dimension is skipped (it cannot
|
|
92
|
+
* have come from the same model) rather than corrupting search.
|
|
93
|
+
*/
|
|
94
|
+
putMany(entries) {
|
|
95
|
+
const insert = this.db.query("INSERT OR REPLACE INTO vectors (memory_id, vec) VALUES (?, ?)");
|
|
96
|
+
const tx = this.db.transaction((batch) => {
|
|
97
|
+
for (const { id, vec } of batch) {
|
|
98
|
+
if (this.dim === 0)
|
|
99
|
+
this.dim = vec.length;
|
|
100
|
+
if (vec.length !== this.dim)
|
|
101
|
+
continue;
|
|
102
|
+
const n = normalize(vec);
|
|
103
|
+
this.mem.set(id, n);
|
|
104
|
+
insert.run(id, float32ToBytes(n));
|
|
105
|
+
}
|
|
106
|
+
});
|
|
107
|
+
tx(entries);
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Drop vectors whose id is not in `validIds` — used to clear out
|
|
111
|
+
* memories that were evicted or replaced. Returns the count removed.
|
|
112
|
+
*/
|
|
113
|
+
prune(validIds) {
|
|
114
|
+
const stale = [];
|
|
115
|
+
for (const id of this.mem.keys())
|
|
116
|
+
if (!validIds.has(id))
|
|
117
|
+
stale.push(id);
|
|
118
|
+
if (stale.length === 0)
|
|
119
|
+
return 0;
|
|
120
|
+
const del = this.db.query("DELETE FROM vectors WHERE memory_id = ?");
|
|
121
|
+
const tx = this.db.transaction((ids) => {
|
|
122
|
+
for (const id of ids) {
|
|
123
|
+
del.run(id);
|
|
124
|
+
this.mem.delete(id);
|
|
125
|
+
}
|
|
126
|
+
});
|
|
127
|
+
tx(stale);
|
|
128
|
+
return stale.length;
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Top-`k` memory ids by cosine similarity to `queryVec` (vectors are
|
|
132
|
+
* normalised, so this is a dot product), highest first. A
|
|
133
|
+
* dimension mismatch yields an empty result rather than a throw.
|
|
134
|
+
*/
|
|
135
|
+
search(queryVec, k) {
|
|
136
|
+
if (this.dim === 0 || queryVec.length !== this.dim || k <= 0)
|
|
137
|
+
return [];
|
|
138
|
+
const q = normalize(Float32Array.from(queryVec));
|
|
139
|
+
const scored = [];
|
|
140
|
+
for (const [id, vec] of this.mem) {
|
|
141
|
+
scored.push({ id, score: dot(q, vec) });
|
|
142
|
+
}
|
|
143
|
+
scored.sort((a, b) => b.score - a.score);
|
|
144
|
+
return scored.slice(0, k);
|
|
145
|
+
}
|
|
146
|
+
/** Close the underlying database handle. */
|
|
147
|
+
close() {
|
|
148
|
+
this.db.close();
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
/** Float32Array → a Buffer of its raw little-endian bytes for BLOB storage. */
|
|
152
|
+
function float32ToBytes(v) {
|
|
153
|
+
return new Uint8Array(v.buffer, v.byteOffset, v.byteLength);
|
|
154
|
+
}
|
|
155
|
+
/** Raw BLOB bytes → Float32Array. Copies, so alignment is never an issue. */
|
|
156
|
+
function bytesToFloat32(bytes) {
|
|
157
|
+
const copy = new Uint8Array(bytes.length);
|
|
158
|
+
copy.set(bytes);
|
|
159
|
+
return new Float32Array(copy.buffer);
|
|
160
|
+
}
|