docsgov 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +242 -0
- package/dist/apispec/apispec.js +401 -0
- package/dist/apispec/apispec.test.js +444 -0
- package/dist/apispec/errors.js +17 -0
- package/dist/apispec/index.js +2 -0
- package/dist/check/doclinks.js +167 -0
- package/dist/check/index.js +8 -0
- package/dist/check/run.js +391 -0
- package/dist/check/run.test.js +513 -0
- package/dist/check/suggest.js +134 -0
- package/dist/check/suggest.test.js +92 -0
- package/dist/check/tokens.js +125 -0
- package/dist/cmd/main.js +330 -0
- package/dist/cmd/main.test.js +422 -0
- package/dist/codeq/cache.js +71 -0
- package/dist/codeq/cache.test.js +67 -0
- package/dist/codeq/errors.js +52 -0
- package/dist/codeq/grammars/tree-sitter-go.wasm +0 -0
- package/dist/codeq/grammars/tree-sitter-java.wasm +0 -0
- package/dist/codeq/grammars/tree-sitter-javascript.wasm +0 -0
- package/dist/codeq/grammars/tree-sitter-tsx.wasm +0 -0
- package/dist/codeq/grammars/tree-sitter-typescript.wasm +0 -0
- package/dist/codeq/index.js +11 -0
- package/dist/codeq/resolve.test.js +109 -0
- package/dist/codeq/resolver.js +128 -0
- package/dist/codeq/resolver.test.js +124 -0
- package/dist/codeq/resolvers/go.js +242 -0
- package/dist/codeq/resolvers/go.test.js +143 -0
- package/dist/codeq/resolvers/java.js +349 -0
- package/dist/codeq/resolvers/java.test.js +138 -0
- package/dist/codeq/resolvers/java_queries.js +63 -0
- package/dist/codeq/resolvers/javascript.js +412 -0
- package/dist/codeq/resolvers/javascript.test.js +125 -0
- package/dist/codeq/resolvers/javascript_queries.js +46 -0
- package/dist/codeq/resolvers/typescript.js +366 -0
- package/dist/codeq/resolvers/typescript.test.js +180 -0
- package/dist/codeq/resolvers/typescript_queries.js +78 -0
- package/dist/codeq/signature.js +50 -0
- package/dist/codeq/signature.test.js +50 -0
- package/dist/codeq/suggest.js +96 -0
- package/dist/codeq/treesitter.js +122 -0
- package/dist/codeq/treesitter.test.js +118 -0
- package/dist/config/config.js +74 -0
- package/dist/config/config.test.js +98 -0
- package/dist/config/fs.js +116 -0
- package/dist/config/glob.js +82 -0
- package/dist/config/glob.test.js +61 -0
- package/dist/config/index.js +4 -0
- package/dist/dedup/analyzer/analyzer.js +533 -0
- package/dist/dedup/analyzer/analyzer.test.js +530 -0
- package/dist/dedup/analyzer/canonical.js +74 -0
- package/dist/dedup/analyzer/canonical.test.js +70 -0
- package/dist/dedup/analyzer/cosine_clusters.js +169 -0
- package/dist/dedup/analyzer/cosine_clusters.test.js +131 -0
- package/dist/dedup/analyzer/distinctive.js +85 -0
- package/dist/dedup/analyzer/distinctive.test.js +49 -0
- package/dist/dedup/analyzer/exact_clusters.js +63 -0
- package/dist/dedup/analyzer/exact_clusters.test.js +81 -0
- package/dist/dedup/analyzer/index.js +14 -0
- package/dist/dedup/analyzer/multiplicity.js +110 -0
- package/dist/dedup/analyzer/multiplicity.test.js +123 -0
- package/dist/dedup/analyzer/order.js +22 -0
- package/dist/dedup/analyzer/partial_overlaps.js +65 -0
- package/dist/dedup/analyzer/partial_overlaps.test.js +161 -0
- package/dist/dedup/analyzer/preview.js +84 -0
- package/dist/dedup/analyzer/preview.test.js +46 -0
- package/dist/dedup/analyzer/safety.js +27 -0
- package/dist/dedup/analyzer/safety.test.js +39 -0
- package/dist/dedup/config.js +18 -0
- package/dist/dedup/configload.js +299 -0
- package/dist/dedup/configload.test.js +410 -0
- package/dist/dedup/dedup.index.test.js +203 -0
- package/dist/dedup/dedup.js +143 -0
- package/dist/dedup/dedup.test.js +212 -0
- package/dist/dedup/dedupcfg/config.js +112 -0
- package/dist/dedup/dedupcfg/config.test.js +70 -0
- package/dist/dedup/dedupcfg/index.js +1 -0
- package/dist/dedup/deduptypes/index.js +1 -0
- package/dist/dedup/deduptypes/types.js +9 -0
- package/dist/dedup/deduptypes/types.test.js +34 -0
- package/dist/dedup/embedder/cache.js +23 -0
- package/dist/dedup/embedder/cache.test.js +50 -0
- package/dist/dedup/embedder/constants.js +10 -0
- package/dist/dedup/embedder/embedder.js +76 -0
- package/dist/dedup/embedder/embedder.mock.test.js +128 -0
- package/dist/dedup/embedder/embedder.test.js +96 -0
- package/dist/dedup/embedder/errors.js +20 -0
- package/dist/dedup/embedder/errors.test.js +35 -0
- package/dist/dedup/embedder/index.js +4 -0
- package/dist/dedup/embedder/session.js +78 -0
- package/dist/dedup/embedder/session.test.js +172 -0
- package/dist/dedup/gitignore.js +97 -0
- package/dist/dedup/gitignore.test.js +98 -0
- package/dist/dedup/index.js +11 -0
- package/dist/dedup/indexdb/errors.js +48 -0
- package/dist/dedup/indexdb/index.js +6 -0
- package/dist/dedup/indexdb/indexdb.js +302 -0
- package/dist/dedup/indexdb/indexdb.test.js +739 -0
- package/dist/dedup/indexdb/load.js +110 -0
- package/dist/dedup/indexdb/migrations.js +58 -0
- package/dist/dedup/indexdb/schema.js +83 -0
- package/dist/dedup/indexer/index.js +9 -0
- package/dist/dedup/indexer/indexer.js +501 -0
- package/dist/dedup/indexer/indexer.test.js +510 -0
- package/dist/dedup/indexer/links.js +89 -0
- package/dist/dedup/mdsection/anchor.js +60 -0
- package/dist/dedup/mdsection/anchor.test.js +39 -0
- package/dist/dedup/mdsection/blocks.js +409 -0
- package/dist/dedup/mdsection/blocks.test.js +359 -0
- package/dist/dedup/mdsection/index.js +4 -0
- package/dist/dedup/mdsection/parse.js +21 -0
- package/dist/dedup/mdsection/section.js +234 -0
- package/dist/dedup/mdsection/section.test.js +221 -0
- package/dist/dedup/report/floatfmt.js +71 -0
- package/dist/dedup/report/floatfmt.test.js +42 -0
- package/dist/dedup/report/index.js +8 -0
- package/dist/dedup/report/quote.js +77 -0
- package/dist/dedup/report/quote.test.js +67 -0
- package/dist/dedup/report/text.js +251 -0
- package/dist/dedup/report/text.test.js +420 -0
- package/dist/dedup/report_types.js +8 -0
- package/dist/dedup/sectionid/index.js +1 -0
- package/dist/dedup/sectionid/sectionid.js +16 -0
- package/dist/dedup/sectionid/sectionid.test.js +49 -0
- package/dist/guard/api/errors.js +12 -0
- package/dist/guard/api/index.js +2 -0
- package/dist/guard/api/parser.js +81 -0
- package/dist/guard/api/parser.test.js +58 -0
- package/dist/guard/api/types.js +1 -0
- package/dist/guard/code/errors.js +16 -0
- package/dist/guard/code/index.js +2 -0
- package/dist/guard/code/parser.js +54 -0
- package/dist/guard/code/parser.test.js +111 -0
- package/dist/guard/code/types.js +6 -0
- package/dist/index.js +1 -0
- package/dist/index.test.js +5 -0
- package/dist/repo/boundary.js +92 -0
- package/dist/repo/boundary.test.js +65 -0
- package/dist/repo/errors.js +56 -0
- package/dist/repo/errors.test.js +85 -0
- package/dist/repo/exists.test.js +72 -0
- package/dist/repo/filename.js +46 -0
- package/dist/repo/filename.test.js +39 -0
- package/dist/repo/fs.js +53 -0
- package/dist/repo/index.js +7 -0
- package/dist/repo/overlay.js +36 -0
- package/dist/repo/overlay.test.js +80 -0
- package/dist/repo/repo.js +353 -0
- package/dist/repo/repo.test.js +255 -0
- package/dist/repo/testutil.js +27 -0
- package/dist/repo/write.test.js +125 -0
- package/dist/report/color.js +73 -0
- package/dist/report/index.js +1 -0
- package/dist/report/report.js +112 -0
- package/dist/report/report.test.js +368 -0
- package/dist/violation/index.js +1 -0
- package/dist/violation/types.js +22 -0
- package/dist/violation/types.test.js +70 -0
- package/package.json +48 -0
|
@@ -0,0 +1,739 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Behavior-encoding tests for the dedup index DB, ported from
|
|
3
|
+
* internal/dedup/indexdb/indexdb_test.go.
|
|
4
|
+
*
|
|
5
|
+
* Each test states WHY the behavior matters: the DB is the persisted cache
|
|
6
|
+
* format, so schema columns/indexes, the open reconciliation outcomes (fresh,
|
|
7
|
+
* reopen, migrate, recreate, embedder-purge), and the embedding byte layout are
|
|
8
|
+
* all contracts that a future cross-check against the Go binary depends on.
|
|
9
|
+
*
|
|
10
|
+
* Go used t.TempDir + t.Cleanup + a raw sql.Open helper; vitest has neither, so
|
|
11
|
+
* we use mkdtemp under os.tmpdir, clean each dir in afterEach, and open raw DBs
|
|
12
|
+
* with node:sqlite directly (same engine the package uses).
|
|
13
|
+
*/
|
|
14
|
+
import { mkdtempSync, rmSync } from "node:fs";
|
|
15
|
+
import { tmpdir } from "node:os";
|
|
16
|
+
import { join } from "node:path";
|
|
17
|
+
import { DatabaseSync } from "node:sqlite";
|
|
18
|
+
import { afterEach, describe, expect, it } from "vitest";
|
|
19
|
+
import { createTableSQL, decodeVec, ErrIndexMissing, open, OpenStatus, registerTestMigration, SchemaVersion, } from "./index.js";
|
|
20
|
+
// testModel and testDim are the expected embedder identity values used in all
|
|
21
|
+
// indexdb tests. They match the constants in the embedder package.
|
|
22
|
+
const testModel = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2";
|
|
23
|
+
const testDim = 768;
|
|
24
|
+
const tmpDirs = [];
|
|
25
|
+
afterEach(() => {
|
|
26
|
+
for (const d of tmpDirs.splice(0)) {
|
|
27
|
+
rmSync(d, { recursive: true, force: true });
|
|
28
|
+
}
|
|
29
|
+
});
|
|
30
|
+
/** newTempPath returns a fresh, non-existent DB path inside a tracked temp dir. */
|
|
31
|
+
function newTempPath() {
|
|
32
|
+
const dir = mkdtempSync(join(tmpdir(), "indexdb-"));
|
|
33
|
+
tmpDirs.push(dir);
|
|
34
|
+
return join(dir, "index.db");
|
|
35
|
+
}
|
|
36
|
+
/** openTempDB opens a fresh indexdb at a temp path and returns it + the path + status. */
|
|
37
|
+
function openTempDB() {
|
|
38
|
+
const dbPath = newTempPath();
|
|
39
|
+
const { store, status } = open(dbPath, testModel, testDim);
|
|
40
|
+
return { store, dbPath, status };
|
|
41
|
+
}
|
|
42
|
+
/** rawDB opens the DB at path with node:sqlite directly (no reconciliation). */
|
|
43
|
+
function rawDB(dbPath) {
|
|
44
|
+
return new DatabaseSync(dbPath);
|
|
45
|
+
}
|
|
46
|
+
// --- raw-DB helpers (mirror the Go test helpers) ---
|
|
47
|
+
function readMetaVersion(dbPath) {
|
|
48
|
+
const db = rawDB(dbPath);
|
|
49
|
+
try {
|
|
50
|
+
const row = db.prepare(`SELECT value FROM meta WHERE key='schema_version'`).get();
|
|
51
|
+
if (row === undefined)
|
|
52
|
+
throw new Error("schema_version row missing");
|
|
53
|
+
return row.value;
|
|
54
|
+
}
|
|
55
|
+
finally {
|
|
56
|
+
db.close();
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
function countSectionsRows(dbPath) {
|
|
60
|
+
const db = rawDB(dbPath);
|
|
61
|
+
try {
|
|
62
|
+
const row = db.prepare(`SELECT COUNT(*) AS n FROM sections`).get();
|
|
63
|
+
return row.n;
|
|
64
|
+
}
|
|
65
|
+
finally {
|
|
66
|
+
db.close();
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
function tableExists(dbPath, name) {
|
|
70
|
+
const db = rawDB(dbPath);
|
|
71
|
+
try {
|
|
72
|
+
const row = db
|
|
73
|
+
.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name=?`)
|
|
74
|
+
.get(name);
|
|
75
|
+
return row?.name === name;
|
|
76
|
+
}
|
|
77
|
+
finally {
|
|
78
|
+
db.close();
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
function readSectionEmbedding(dbPath, sectionID) {
|
|
82
|
+
const db = rawDB(dbPath);
|
|
83
|
+
try {
|
|
84
|
+
const row = db.prepare(`SELECT embedding FROM sections WHERE id=?`).get(sectionID);
|
|
85
|
+
if (row === undefined)
|
|
86
|
+
throw new Error(`section ${sectionID} not found`);
|
|
87
|
+
return row.embedding;
|
|
88
|
+
}
|
|
89
|
+
finally {
|
|
90
|
+
db.close();
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
function setMetaValue(dbPath, key, value) {
|
|
94
|
+
const db = rawDB(dbPath);
|
|
95
|
+
try {
|
|
96
|
+
db.prepare(`UPDATE meta SET value=? WHERE key=?`).run(value, key);
|
|
97
|
+
}
|
|
98
|
+
finally {
|
|
99
|
+
db.close();
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
/** insertSectionRowDB inserts a dummy row into the sections table (matches Go). */
|
|
103
|
+
function insertSectionRowDB(db) {
|
|
104
|
+
db.prepare(`
|
|
105
|
+
INSERT OR REPLACE INTO sections
|
|
106
|
+
(id, file_path, heading, heading_level, anchor, start_line, end_line,
|
|
107
|
+
content_hash, raw_content, embed_text, prose_word_count, has_table,
|
|
108
|
+
has_code, inbound_count, embedding, updated_at)
|
|
109
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run("testid0000000001", "docs/test.md", "Test", 2, "test", 1, 10, "hash", "## Test\n\nContent.", "content", 5, 0, 0, 0, new Uint8Array([0x01]), "2024-01-01T00:00:00Z");
|
|
110
|
+
}
|
|
111
|
+
function insertSectionRow(dbPath) {
|
|
112
|
+
const db = rawDB(dbPath);
|
|
113
|
+
try {
|
|
114
|
+
insertSectionRowDB(db);
|
|
115
|
+
}
|
|
116
|
+
finally {
|
|
117
|
+
db.close();
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* setupMigrationDB creates a DB with the full dedup schema but an arbitrary
|
|
122
|
+
* schema_version, plus one section row, so the migrate/recreate paths can be
|
|
123
|
+
* exercised against a seeded state.
|
|
124
|
+
*/
|
|
125
|
+
function setupMigrationDB(dbPath, version) {
|
|
126
|
+
const db = rawDB(dbPath);
|
|
127
|
+
try {
|
|
128
|
+
db.exec(createTableSQL);
|
|
129
|
+
const meta = db.prepare(`INSERT OR REPLACE INTO meta(key, value) VALUES(?, ?)`);
|
|
130
|
+
meta.run("schema_version", version);
|
|
131
|
+
meta.run("embedder_model", testModel);
|
|
132
|
+
meta.run("embedder_dim", "768");
|
|
133
|
+
insertSectionRowDB(db);
|
|
134
|
+
}
|
|
135
|
+
finally {
|
|
136
|
+
db.close();
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
// v1CreateTableSQL is the original v1 schema: sections + meta only, NO blocks
|
|
140
|
+
// table. Used to seed a true v1 DB for the migration test.
|
|
141
|
+
const v1CreateTableSQL = `
|
|
142
|
+
CREATE TABLE IF NOT EXISTS sections (
|
|
143
|
+
id TEXT PRIMARY KEY,
|
|
144
|
+
file_path TEXT NOT NULL,
|
|
145
|
+
heading TEXT NOT NULL,
|
|
146
|
+
heading_level INTEGER NOT NULL,
|
|
147
|
+
anchor TEXT NOT NULL,
|
|
148
|
+
start_line INTEGER NOT NULL,
|
|
149
|
+
end_line INTEGER NOT NULL,
|
|
150
|
+
content_hash TEXT NOT NULL,
|
|
151
|
+
raw_content TEXT NOT NULL,
|
|
152
|
+
embed_text TEXT NOT NULL,
|
|
153
|
+
prose_word_count INTEGER NOT NULL,
|
|
154
|
+
has_table INTEGER NOT NULL,
|
|
155
|
+
has_code INTEGER NOT NULL,
|
|
156
|
+
inbound_count INTEGER NOT NULL DEFAULT 0,
|
|
157
|
+
embedding BLOB NOT NULL,
|
|
158
|
+
updated_at TEXT NOT NULL
|
|
159
|
+
);
|
|
160
|
+
CREATE INDEX IF NOT EXISTS idx_sections_file ON sections(file_path);
|
|
161
|
+
CREATE INDEX IF NOT EXISTS idx_sections_hash ON sections(content_hash);
|
|
162
|
+
|
|
163
|
+
CREATE TABLE IF NOT EXISTS meta (
|
|
164
|
+
key TEXT PRIMARY KEY,
|
|
165
|
+
value TEXT NOT NULL
|
|
166
|
+
);
|
|
167
|
+
`;
|
|
168
|
+
/**
|
|
169
|
+
* setupV1DB creates a v1-schema DB (no blocks table) with schema_version="1",
|
|
170
|
+
* matching embedder meta, and one section row carrying the given id + embedding.
|
|
171
|
+
*/
|
|
172
|
+
function setupV1DB(dbPath, sectionID, embedding) {
|
|
173
|
+
const db = rawDB(dbPath);
|
|
174
|
+
try {
|
|
175
|
+
db.exec(v1CreateTableSQL);
|
|
176
|
+
const meta = db.prepare(`INSERT OR REPLACE INTO meta(key, value) VALUES(?, ?)`);
|
|
177
|
+
meta.run("schema_version", "1");
|
|
178
|
+
meta.run("embedder_model", testModel);
|
|
179
|
+
meta.run("embedder_dim", "768");
|
|
180
|
+
db.prepare(`
|
|
181
|
+
INSERT OR REPLACE INTO sections
|
|
182
|
+
(id, file_path, heading, heading_level, anchor, start_line, end_line,
|
|
183
|
+
content_hash, raw_content, embed_text, prose_word_count, has_table,
|
|
184
|
+
has_code, inbound_count, embedding, updated_at)
|
|
185
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run(sectionID, "docs/test.md", "Test", 2, "test", 1, 10, "hash", "## Test\n\nContent.", "content", 5, 0, 0, 0, embedding, "2024-01-01T00:00:00Z");
|
|
186
|
+
}
|
|
187
|
+
finally {
|
|
188
|
+
db.close();
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
/** encodeVecTest encodes a number[] to a little-endian float32 BLOB (mirrors indexer). */
|
|
192
|
+
function encodeVecTest(v) {
|
|
193
|
+
const buf = new Uint8Array(v.length * 4);
|
|
194
|
+
const view = new DataView(buf.buffer);
|
|
195
|
+
for (let i = 0; i < v.length; i++) {
|
|
196
|
+
view.setFloat32(i * 4, v[i], true);
|
|
197
|
+
}
|
|
198
|
+
return buf;
|
|
199
|
+
}
|
|
200
|
+
/** insertBlockRow inserts one row into the blocks table. */
|
|
201
|
+
function insertBlockRow(db, sectionID, blockIndex, filePath, heading, kind, startLine, endLine, contentHash, embedding) {
|
|
202
|
+
db.prepare(`
|
|
203
|
+
INSERT OR REPLACE INTO blocks
|
|
204
|
+
(section_id, block_index, file_path, heading, kind,
|
|
205
|
+
start_line, end_line, content_hash, embedding)
|
|
206
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`).run(sectionID, blockIndex, filePath, heading, kind, startLine, endLine, contentHash, embedding);
|
|
207
|
+
}
|
|
208
|
+
describe("open — reconciliation outcomes", () => {
|
|
209
|
+
// Fresh open is the install case: a missing file must produce a usable DB,
|
|
210
|
+
// distinctly flagged so callers know nothing was loaded.
|
|
211
|
+
it("returns StatusFresh for a brand-new DB", () => {
|
|
212
|
+
const { store, status } = openTempDB();
|
|
213
|
+
try {
|
|
214
|
+
expect(status).toBe(OpenStatus.StatusFresh);
|
|
215
|
+
}
|
|
216
|
+
finally {
|
|
217
|
+
store.close();
|
|
218
|
+
}
|
|
219
|
+
});
|
|
220
|
+
// Reopening an unchanged DB must be the silent common path — any other status
|
|
221
|
+
// would make every run look like it invalidated the cache.
|
|
222
|
+
it("returns StatusOpened when reopening an unchanged matching DB", () => {
|
|
223
|
+
const { store, dbPath } = openTempDB();
|
|
224
|
+
store.close();
|
|
225
|
+
const { store: store2, status } = open(dbPath, testModel, testDim);
|
|
226
|
+
try {
|
|
227
|
+
expect(status).toBe(OpenStatus.StatusOpened);
|
|
228
|
+
}
|
|
229
|
+
finally {
|
|
230
|
+
store2.close();
|
|
231
|
+
}
|
|
232
|
+
});
|
|
233
|
+
// StatusOpened is the zero value by contract: a freshly-declared status reads
|
|
234
|
+
// as "opened, nothing to report." Downstream code relies on 0 == opened.
|
|
235
|
+
it("StatusOpened is the zero value (0)", () => {
|
|
236
|
+
expect(OpenStatus.StatusOpened).toBe(0);
|
|
237
|
+
});
|
|
238
|
+
// Close must succeed cleanly; a throwing Close would leak the connection and
|
|
239
|
+
// wedge the next open under WAL.
|
|
240
|
+
it("close does not throw", () => {
|
|
241
|
+
const { store } = openTempDB();
|
|
242
|
+
expect(() => store.close()).not.toThrow();
|
|
243
|
+
});
|
|
244
|
+
});
|
|
245
|
+
describe("open — migration path", () => {
|
|
246
|
+
// A registered migration must upgrade in place and PRESERVE rows: that is the
|
|
247
|
+
// whole point of a migration vs. a recreate (it keeps expensive embeddings).
|
|
248
|
+
it("applies a registered migration, bumps schema_version, and preserves rows", () => {
|
|
249
|
+
const dbPath = newTempPath();
|
|
250
|
+
setupMigrationDB(dbPath, "0");
|
|
251
|
+
let opened = false;
|
|
252
|
+
const cleanup = registerTestMigration("0", () => {
|
|
253
|
+
// No-op migration.
|
|
254
|
+
});
|
|
255
|
+
try {
|
|
256
|
+
const { store, status } = open(dbPath, testModel, testDim);
|
|
257
|
+
opened = true;
|
|
258
|
+
try {
|
|
259
|
+
expect(status).toBe(OpenStatus.StatusSchemaMigrated);
|
|
260
|
+
// Stored schema_version bumped to the binary's SchemaVersion.
|
|
261
|
+
expect(readMetaVersion(dbPath)).toBe(SchemaVersion);
|
|
262
|
+
// Existing row survived (not wiped by the migration).
|
|
263
|
+
expect(countSectionsRows(dbPath)).toBe(1);
|
|
264
|
+
}
|
|
265
|
+
finally {
|
|
266
|
+
store.close();
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
finally {
|
|
270
|
+
cleanup();
|
|
271
|
+
}
|
|
272
|
+
expect(opened).toBe(true);
|
|
273
|
+
});
|
|
274
|
+
});
|
|
275
|
+
describe("open — recreate path", () => {
|
|
276
|
+
// An unknown future schema_version with NO migration must wipe and rebuild,
|
|
277
|
+
// not crash: a newer binary's DB opened by an older one should self-heal.
|
|
278
|
+
it("recreates when schema_version has no migration, clearing old rows", () => {
|
|
279
|
+
const dbPath = newTempPath();
|
|
280
|
+
setupMigrationDB(dbPath, "99");
|
|
281
|
+
const { store, status } = open(dbPath, testModel, testDim);
|
|
282
|
+
try {
|
|
283
|
+
expect(status).toBe(OpenStatus.StatusSchemaRecreated);
|
|
284
|
+
// Old rows gone (DB recreated from scratch).
|
|
285
|
+
expect(countSectionsRows(dbPath)).toBe(0);
|
|
286
|
+
}
|
|
287
|
+
finally {
|
|
288
|
+
store.close();
|
|
289
|
+
}
|
|
290
|
+
});
|
|
291
|
+
});
|
|
292
|
+
describe("open — embedder purge path", () => {
|
|
293
|
+
// A changed embedder model/dim makes every stored vector meaningless. Open
|
|
294
|
+
// MUST purge sections (forcing a re-embed) rather than mix incompatible
|
|
295
|
+
// vectors — silently keeping them would corrupt similarity results.
|
|
296
|
+
it("purges sections when embedder_model mismatches", () => {
|
|
297
|
+
const dbPath = newTempPath();
|
|
298
|
+
// Create a proper DB, seed a row, then corrupt the stored model.
|
|
299
|
+
const { store } = open(dbPath, testModel, testDim);
|
|
300
|
+
store.close();
|
|
301
|
+
insertSectionRow(dbPath);
|
|
302
|
+
setMetaValue(dbPath, "embedder_model", "wrong-model");
|
|
303
|
+
const { store: store2, status } = open(dbPath, testModel, testDim);
|
|
304
|
+
try {
|
|
305
|
+
expect(status).toBe(OpenStatus.StatusEmbedderPurged);
|
|
306
|
+
expect(countSectionsRows(dbPath)).toBe(0);
|
|
307
|
+
}
|
|
308
|
+
finally {
|
|
309
|
+
store2.close();
|
|
310
|
+
}
|
|
311
|
+
});
|
|
312
|
+
});
|
|
313
|
+
describe("sentinels and constants", () => {
|
|
314
|
+
// The sentinel must exist and be instanceof-matchable for the v2 read-only hook.
|
|
315
|
+
it("ErrIndexMissing is a non-null Error subclass", () => {
|
|
316
|
+
const err = new ErrIndexMissing();
|
|
317
|
+
expect(err).toBeInstanceOf(Error);
|
|
318
|
+
expect(err.message).toContain("does not exist");
|
|
319
|
+
});
|
|
320
|
+
// SchemaVersion is part of the persisted format; pinning it to "2" guards
|
|
321
|
+
// against an accidental bump that would silently invalidate every DB on disk.
|
|
322
|
+
it("SchemaVersion is '2'", () => {
|
|
323
|
+
expect(SchemaVersion).toBe("2");
|
|
324
|
+
});
|
|
325
|
+
});
|
|
326
|
+
describe("schema — fresh DB shape", () => {
|
|
327
|
+
// The blocks table's exact columns + indexes are the on-disk contract shared
|
|
328
|
+
// with the Go binary; a missing/renamed column would break cross-reads.
|
|
329
|
+
it("fresh open creates the blocks table with expected columns and indexes", () => {
|
|
330
|
+
const { store, dbPath } = openTempDB();
|
|
331
|
+
store.close();
|
|
332
|
+
const db = rawDB(dbPath);
|
|
333
|
+
try {
|
|
334
|
+
// blocks table present in sqlite_master.
|
|
335
|
+
const tbl = db
|
|
336
|
+
.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='blocks'`)
|
|
337
|
+
.get();
|
|
338
|
+
expect(tbl?.name).toBe("blocks");
|
|
339
|
+
// Expected columns present.
|
|
340
|
+
const wantCols = [
|
|
341
|
+
"section_id",
|
|
342
|
+
"block_index",
|
|
343
|
+
"file_path",
|
|
344
|
+
"heading",
|
|
345
|
+
"kind",
|
|
346
|
+
"start_line",
|
|
347
|
+
"end_line",
|
|
348
|
+
"content_hash",
|
|
349
|
+
"embedding",
|
|
350
|
+
];
|
|
351
|
+
const cols = db.prepare(`PRAGMA table_info(blocks)`).all();
|
|
352
|
+
const gotCols = new Set(cols.map((c) => c.name));
|
|
353
|
+
for (const c of wantCols) {
|
|
354
|
+
expect(gotCols.has(c)).toBe(true);
|
|
355
|
+
}
|
|
356
|
+
// Both indexes present.
|
|
357
|
+
for (const idx of ["idx_blocks_hash", "idx_blocks_file"]) {
|
|
358
|
+
const row = db
|
|
359
|
+
.prepare(`SELECT name FROM sqlite_master WHERE type='index' AND name=?`)
|
|
360
|
+
.get(idx);
|
|
361
|
+
expect(row?.name).toBe(idx);
|
|
362
|
+
}
|
|
363
|
+
// sections indexes present too (the other half of the schema contract).
|
|
364
|
+
for (const idx of ["idx_sections_file", "idx_sections_hash"]) {
|
|
365
|
+
const row = db
|
|
366
|
+
.prepare(`SELECT name FROM sqlite_master WHERE type='index' AND name=?`)
|
|
367
|
+
.get(idx);
|
|
368
|
+
expect(row?.name).toBe(idx);
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
finally {
|
|
372
|
+
db.close();
|
|
373
|
+
}
|
|
374
|
+
});
|
|
375
|
+
});
|
|
376
|
+
describe("migration — v1 to v2 (real additive migration)", () => {
|
|
377
|
+
// The shipped v1->v2 migration must add the blocks table WITHOUT touching
|
|
378
|
+
// sections or their embeddings — embeddings are expensive to recompute, so
|
|
379
|
+
// byte-for-byte survival is the property that makes the migration worthwhile.
|
|
380
|
+
it("v1->v2 adds blocks, bumps version, and preserves the section embedding byte-for-byte", () => {
|
|
381
|
+
const dbPath = newTempPath();
|
|
382
|
+
// Distinct multi-byte embedding so byte-for-byte survival is meaningful.
|
|
383
|
+
const wantEmbedding = new Uint8Array([0xde, 0xad, 0xbe, 0xef, 0x00, 0x11]);
|
|
384
|
+
const wantID = "v1section00000001";
|
|
385
|
+
setupV1DB(dbPath, wantID, wantEmbedding);
|
|
386
|
+
const { store, status } = open(dbPath, testModel, testDim);
|
|
387
|
+
try {
|
|
388
|
+
expect(status).toBe(OpenStatus.StatusSchemaMigrated);
|
|
389
|
+
expect(tableExists(dbPath, "blocks")).toBe(true);
|
|
390
|
+
expect(readMetaVersion(dbPath)).toBe("2");
|
|
391
|
+
const got = readSectionEmbedding(dbPath, wantID);
|
|
392
|
+
expect(got).not.toBeNull();
|
|
393
|
+
expect(Array.from(got)).toEqual(Array.from(wantEmbedding));
|
|
394
|
+
}
|
|
395
|
+
finally {
|
|
396
|
+
store.close();
|
|
397
|
+
}
|
|
398
|
+
});
|
|
399
|
+
});
|
|
400
|
+
describe("loadAllBlocksWithEmbeddings", () => {
|
|
401
|
+
// A prose block carries a vector; a table block carries NULL. The map must key
|
|
402
|
+
// by content_hash with the prose vector present and the table hash ABSENT
|
|
403
|
+
// (NULL → absent, not a zero-length entry) — clustering treats "no vector" and
|
|
404
|
+
// "zero vector" differently, so a stray entry would mis-cluster tables.
|
|
405
|
+
it("returns both records, maps the prose vector, and omits the NULL table block", () => {
|
|
406
|
+
const { store, dbPath } = openTempDB();
|
|
407
|
+
const db = rawDB(dbPath);
|
|
408
|
+
try {
|
|
409
|
+
const proseVec = [0.1, 0.2, 0.3];
|
|
410
|
+
const proseBlob = encodeVecTest(proseVec);
|
|
411
|
+
const proseHash = "aaabbbcccdddeeefffaaaaabbbcccdddeeefffaaaaabbbcccdddeeefffaaabbb11";
|
|
412
|
+
insertBlockRow(db, "sec0001", 0, "docs/a.md", "Intro", "prose", 2, 5, proseHash, proseBlob);
|
|
413
|
+
const tableHash = "fff000111222333444555666777888999aaabbbcccdddeeefffaaa0001112223";
|
|
414
|
+
insertBlockRow(db, "sec0001", 1, "docs/a.md", "Intro", "table", 6, 10, tableHash, null);
|
|
415
|
+
const { blocks, embeddings } = store.loadAllBlocksWithEmbeddings();
|
|
416
|
+
expect(blocks.length).toBe(2);
|
|
417
|
+
let proseBlock;
|
|
418
|
+
for (const b of blocks) {
|
|
419
|
+
if (b.ContentHash === proseHash)
|
|
420
|
+
proseBlock = b;
|
|
421
|
+
}
|
|
422
|
+
expect(proseBlock).toBeDefined();
|
|
423
|
+
expect(proseBlock.SectionID).toBe("sec0001");
|
|
424
|
+
expect(proseBlock.Index).toBe(0);
|
|
425
|
+
expect(proseBlock.FilePath).toBe("docs/a.md");
|
|
426
|
+
expect(proseBlock.Heading).toBe("Intro");
|
|
427
|
+
expect(proseBlock.Kind).toBe("prose");
|
|
428
|
+
expect(proseBlock.StartLine).toBe(2);
|
|
429
|
+
expect(proseBlock.EndLine).toBe(5);
|
|
430
|
+
// Text is NOT persisted — must be empty on load.
|
|
431
|
+
expect(proseBlock.Text).toBe("");
|
|
432
|
+
const gotVec = embeddings.get(proseHash);
|
|
433
|
+
expect(gotVec).toBeDefined();
|
|
434
|
+
expect(gotVec.length).toBe(proseVec.length);
|
|
435
|
+
for (let i = 0; i < proseVec.length; i++) {
|
|
436
|
+
expect(gotVec[i]).toBeCloseTo(proseVec[i], 5);
|
|
437
|
+
}
|
|
438
|
+
// Table hash must be absent (NULL embedding).
|
|
439
|
+
expect(embeddings.has(tableHash)).toBe(false);
|
|
440
|
+
}
|
|
441
|
+
finally {
|
|
442
|
+
db.close();
|
|
443
|
+
store.close();
|
|
444
|
+
}
|
|
445
|
+
});
|
|
446
|
+
});
|
|
447
|
+
describe("queryBlocks", () => {
|
|
448
|
+
// The indexer's diff/prune reads exactly section_id, block_index, content_hash,
|
|
449
|
+
// embedding. The NULL embedding for a table block must come back as null (not
|
|
450
|
+
// an empty buffer) so the indexer can tell "table, no vector" apart.
|
|
451
|
+
it("returns the four diff columns with NULL embedding preserved as null", () => {
|
|
452
|
+
const { store, dbPath } = openTempDB();
|
|
453
|
+
const db = rawDB(dbPath);
|
|
454
|
+
try {
|
|
455
|
+
const blob = encodeVecTest([0.5, 0.6]);
|
|
456
|
+
const hash1 = "aaabbbcccdddeeefffaaaaabbbcccdddeeefffaaaaabbbcccdddeeefffaaa0001";
|
|
457
|
+
const hash2 = "bbbcccdddeeefffaaaa111222333444555666777888999aaabbbcccdddeee0002";
|
|
458
|
+
insertBlockRow(db, "sec0001", 0, "docs/a.md", "H", "prose", 1, 3, hash1, blob);
|
|
459
|
+
insertBlockRow(db, "sec0001", 1, "docs/a.md", "H", "table", 4, 7, hash2, null);
|
|
460
|
+
const got = store.queryBlocks();
|
|
461
|
+
expect(got.length).toBe(2);
|
|
462
|
+
const prose = got.find((r) => r.section_id === "sec0001" && r.block_index === 0);
|
|
463
|
+
expect(prose).toBeDefined();
|
|
464
|
+
expect(prose.content_hash).toBe(hash1);
|
|
465
|
+
expect(prose.embedding).not.toBeNull();
|
|
466
|
+
expect(Array.from(prose.embedding)).toEqual(Array.from(blob));
|
|
467
|
+
const table = got.find((r) => r.section_id === "sec0001" && r.block_index === 1);
|
|
468
|
+
expect(table).toBeDefined();
|
|
469
|
+
expect(table.content_hash).toBe(hash2);
|
|
470
|
+
expect(table.embedding).toBeNull();
|
|
471
|
+
}
|
|
472
|
+
finally {
|
|
473
|
+
db.close();
|
|
474
|
+
store.close();
|
|
475
|
+
}
|
|
476
|
+
});
|
|
477
|
+
});
|
|
478
|
+
describe("loadAllSectionsWithEmbeddings", () => {
|
|
479
|
+
// Round-trips a section through store: the boolean INTEGER columns and the
|
|
480
|
+
// embedding BLOB must decode back to the right shape, since the indexer's
|
|
481
|
+
// diff-and-skip relies on these exact loaded values.
|
|
482
|
+
it("loads a section with decoded embedding and boolean flags", () => {
|
|
483
|
+
const { store, dbPath } = openTempDB();
|
|
484
|
+
const db = rawDB(dbPath);
|
|
485
|
+
try {
|
|
486
|
+
const vec = [0.25, 0.5, 0.75, 1.0];
|
|
487
|
+
const blob = encodeVecTest(vec);
|
|
488
|
+
db.prepare(`
|
|
489
|
+
INSERT OR REPLACE INTO sections
|
|
490
|
+
(id, file_path, heading, heading_level, anchor, start_line, end_line,
|
|
491
|
+
content_hash, raw_content, embed_text, prose_word_count, has_table,
|
|
492
|
+
has_code, inbound_count, embedding, updated_at)
|
|
493
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run("sec-load-0001", "docs/x.md", "Heading", 2, "heading", 1, 9, "hash-x", "## Heading\n\nBody.", "heading body", 12, 1, // has_table = true
|
|
494
|
+
0, // has_code = false
|
|
495
|
+
3, // inbound_count
|
|
496
|
+
blob, "2024-01-01T00:00:00Z");
|
|
497
|
+
const { sections, embeddings } = store.loadAllSectionsWithEmbeddings();
|
|
498
|
+
expect(sections.length).toBe(1);
|
|
499
|
+
const sec = sections[0];
|
|
500
|
+
expect(sec.id).toBe("sec-load-0001");
|
|
501
|
+
expect(sec.file_path).toBe("docs/x.md");
|
|
502
|
+
expect(sec.heading_level).toBe(2);
|
|
503
|
+
expect(sec.prose_word_count).toBe(12);
|
|
504
|
+
expect(sec.has_table).toBe(true);
|
|
505
|
+
expect(sec.has_code).toBe(false);
|
|
506
|
+
expect(sec.inbound_count).toBe(3);
|
|
507
|
+
const gotVec = embeddings.get("sec-load-0001");
|
|
508
|
+
expect(gotVec).toBeDefined();
|
|
509
|
+
expect(gotVec.length).toBe(vec.length);
|
|
510
|
+
for (let i = 0; i < vec.length; i++) {
|
|
511
|
+
expect(gotVec[i]).toBeCloseTo(vec[i], 5);
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
finally {
|
|
515
|
+
db.close();
|
|
516
|
+
store.close();
|
|
517
|
+
}
|
|
518
|
+
});
|
|
519
|
+
// A section with a NULL embedding (only possible via the v1 NOT NULL-relaxed
|
|
520
|
+
// path or a manual write) must still load, mapping to a zero-length vector.
|
|
521
|
+
// The indexer treats a zero-length section vector as "no usable embedding";
|
|
522
|
+
// crashing or skipping the row instead would silently drop a section from the
|
|
523
|
+
// diff and could resurrect a stale duplicate.
|
|
524
|
+
it("loads a section with a NULL embedding as a zero-length vector (decodeVec null path)", () => {
|
|
525
|
+
const { store, dbPath } = openTempDB();
|
|
526
|
+
const db = rawDB(dbPath);
|
|
527
|
+
try {
|
|
528
|
+
// The blocks table allows NULL embedding; reuse it to exercise decodeVec's
|
|
529
|
+
// null branch through the section loader indirectly is not possible, so
|
|
530
|
+
// write a section row carrying a zero-length BLOB (empty embedding) which
|
|
531
|
+
// decodeVec must map to an empty Float32Array, not a 1-element NaN vector.
|
|
532
|
+
db.prepare(`
|
|
533
|
+
INSERT OR REPLACE INTO sections
|
|
534
|
+
(id, file_path, heading, heading_level, anchor, start_line, end_line,
|
|
535
|
+
content_hash, raw_content, embed_text, prose_word_count, has_table,
|
|
536
|
+
has_code, inbound_count, embedding, updated_at)
|
|
537
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run("sec-empty-vec01", "docs/y.md", "H", 2, "h", 1, 2, "hash-y", "## H", "h", 1, 0, 0, 0, new Uint8Array(0), "2024-01-01T00:00:00Z");
|
|
538
|
+
const { sections, embeddings } = store.loadAllSectionsWithEmbeddings();
|
|
539
|
+
expect(sections.length).toBe(1);
|
|
540
|
+
// Empty BLOB → empty vector (decodeVec's `?? new Float32Array(0)` fallback).
|
|
541
|
+
const gotVec = embeddings.get("sec-empty-vec01");
|
|
542
|
+
expect(gotVec).toBeDefined();
|
|
543
|
+
expect(gotVec.length).toBe(0);
|
|
544
|
+
}
|
|
545
|
+
finally {
|
|
546
|
+
db.close();
|
|
547
|
+
store.close();
|
|
548
|
+
}
|
|
549
|
+
});
|
|
550
|
+
// Two section rows must both round-trip; a loader that only read the first row
|
|
551
|
+
// (or overwrote earlier entries) would shrink the index and break clustering.
|
|
552
|
+
it("loads multiple section rows, one entry per id", () => {
|
|
553
|
+
const { store, dbPath } = openTempDB();
|
|
554
|
+
const db = rawDB(dbPath);
|
|
555
|
+
try {
|
|
556
|
+
const insert = db.prepare(`
|
|
557
|
+
INSERT OR REPLACE INTO sections
|
|
558
|
+
(id, file_path, heading, heading_level, anchor, start_line, end_line,
|
|
559
|
+
content_hash, raw_content, embed_text, prose_word_count, has_table,
|
|
560
|
+
has_code, inbound_count, embedding, updated_at)
|
|
561
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`);
|
|
562
|
+
insert.run("sec-a", "docs/a.md", "A", 2, "a", 1, 2, "ha", "## A", "a", 1, 0, 0, 0, encodeVecTest([0.1, 0.2]), "2024-01-01T00:00:00Z");
|
|
563
|
+
insert.run("sec-b", "docs/b.md", "B", 2, "b", 1, 2, "hb", "## B", "b", 1, 0, 0, 0, encodeVecTest([0.3, 0.4]), "2024-01-01T00:00:00Z");
|
|
564
|
+
const { sections, embeddings } = store.loadAllSectionsWithEmbeddings();
|
|
565
|
+
expect(sections.length).toBe(2);
|
|
566
|
+
expect(new Set(sections.map((s) => s.id))).toEqual(new Set(["sec-a", "sec-b"]));
|
|
567
|
+
expect(embeddings.size).toBe(2);
|
|
568
|
+
}
|
|
569
|
+
finally {
|
|
570
|
+
db.close();
|
|
571
|
+
store.close();
|
|
572
|
+
}
|
|
573
|
+
});
|
|
574
|
+
});
|
|
575
|
+
describe("decodeVec — exported edge cases", () => {
|
|
576
|
+
// decodeVec is exported so the indexer can decode raw BLOBs. NULL and empty
|
|
577
|
+
// BLOBs both mean "no vector": both must yield a zero-length array (Go returned
|
|
578
|
+
// nil), never a 1-element NaN, or downstream cosine math would corrupt scores.
|
|
579
|
+
it("returns an empty Float32Array for null and for an empty BLOB", () => {
|
|
580
|
+
expect(decodeVec(null).length).toBe(0);
|
|
581
|
+
expect(decodeVec(new Uint8Array(0)).length).toBe(0);
|
|
582
|
+
});
|
|
583
|
+
// A non-empty BLOB must decode little-endian float32s; the byte layout is the
|
|
584
|
+
// cross-binary contract, so a wrong endianness or stride would silently mangle
|
|
585
|
+
// every stored vector read back from a Go-written DB.
|
|
586
|
+
it("decodes a little-endian float32 BLOB to the original values", () => {
|
|
587
|
+
const v = [1.5, -2.25, 0.125];
|
|
588
|
+
const got = decodeVec(encodeVecTest(v));
|
|
589
|
+
expect(got.length).toBe(v.length);
|
|
590
|
+
for (let i = 0; i < v.length; i++) {
|
|
591
|
+
expect(got[i]).toBeCloseTo(v[i], 5);
|
|
592
|
+
}
|
|
593
|
+
});
|
|
594
|
+
});
|
|
595
|
+
describe("querySections", () => {
|
|
596
|
+
// The indexer's diff-and-skip reads exactly id, content_hash, embedding from
|
|
597
|
+
// sections. A NULL/empty embedding must surface as-is so the indexer can tell a
|
|
598
|
+
// section with a stored vector from one needing re-embed.
|
|
599
|
+
it("returns id, content_hash, and the raw embedding BLOB per row", () => {
|
|
600
|
+
const { store, dbPath } = openTempDB();
|
|
601
|
+
const db = rawDB(dbPath);
|
|
602
|
+
try {
|
|
603
|
+
const blob = encodeVecTest([0.7, 0.8]);
|
|
604
|
+
db.prepare(`
|
|
605
|
+
INSERT OR REPLACE INTO sections
|
|
606
|
+
(id, file_path, heading, heading_level, anchor, start_line, end_line,
|
|
607
|
+
content_hash, raw_content, embed_text, prose_word_count, has_table,
|
|
608
|
+
has_code, inbound_count, embedding, updated_at)
|
|
609
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run("sec-q-0001", "docs/q.md", "Q", 2, "q", 1, 3, "hash-q", "## Q", "q", 2, 0, 0, 0, blob, "2024-01-01T00:00:00Z");
|
|
610
|
+
const rows = store.querySections();
|
|
611
|
+
expect(rows.length).toBe(1);
|
|
612
|
+
const r = rows[0];
|
|
613
|
+
expect(r.id).toBe("sec-q-0001");
|
|
614
|
+
expect(r.content_hash).toBe("hash-q");
|
|
615
|
+
expect(r.embedding).not.toBeNull();
|
|
616
|
+
expect(Array.from(r.embedding)).toEqual(Array.from(blob));
|
|
617
|
+
}
|
|
618
|
+
finally {
|
|
619
|
+
db.close();
|
|
620
|
+
store.close();
|
|
621
|
+
}
|
|
622
|
+
});
|
|
623
|
+
});
|
|
624
|
+
describe("execTx", () => {
|
|
625
|
+
// A transaction that runs without error must COMMIT: the whole point of ExecTx
|
|
626
|
+
// is durable batched writes, so a write inside it must survive after the call.
|
|
627
|
+
it("commits writes made inside the callback", () => {
|
|
628
|
+
const { store, dbPath } = openTempDB();
|
|
629
|
+
try {
|
|
630
|
+
store.execTx((db) => {
|
|
631
|
+
db.prepare(`
|
|
632
|
+
INSERT OR REPLACE INTO sections
|
|
633
|
+
(id, file_path, heading, heading_level, anchor, start_line, end_line,
|
|
634
|
+
content_hash, raw_content, embed_text, prose_word_count, has_table,
|
|
635
|
+
has_code, inbound_count, embedding, updated_at)
|
|
636
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run("sec-tx-0001", "docs/tx.md", "T", 2, "t", 1, 2, "hash-tx", "## T", "t", 1, 0, 0, 0, encodeVecTest([0.9]), "2024-01-01T00:00:00Z");
|
|
637
|
+
});
|
|
638
|
+
}
|
|
639
|
+
finally {
|
|
640
|
+
store.close();
|
|
641
|
+
}
|
|
642
|
+
// Committed: the row is visible to a fresh raw connection.
|
|
643
|
+
expect(countSectionsRows(dbPath)).toBe(1);
|
|
644
|
+
});
|
|
645
|
+
// A throwing callback must ROLLBACK and rethrow: partial writes from a failed
|
|
646
|
+
// batch must not persist, or the index would be left in a torn, inconsistent
|
|
647
|
+
// state that the indexer can neither trust nor detect.
|
|
648
|
+
it("rolls back and rethrows when the callback throws", () => {
|
|
649
|
+
const { store, dbPath } = openTempDB();
|
|
650
|
+
const boom = new Error("boom");
|
|
651
|
+
try {
|
|
652
|
+
expect(() => store.execTx((db) => {
|
|
653
|
+
db.prepare(`
|
|
654
|
+
INSERT OR REPLACE INTO sections
|
|
655
|
+
(id, file_path, heading, heading_level, anchor, start_line, end_line,
|
|
656
|
+
content_hash, raw_content, embed_text, prose_word_count, has_table,
|
|
657
|
+
has_code, inbound_count, embedding, updated_at)
|
|
658
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run("sec-tx-rollback", "docs/tx.md", "T", 2, "t", 1, 2, "hash-tx", "## T", "t", 1, 0, 0, 0, encodeVecTest([0.9]), "2024-01-01T00:00:00Z");
|
|
659
|
+
throw boom;
|
|
660
|
+
})).toThrow(boom);
|
|
661
|
+
}
|
|
662
|
+
finally {
|
|
663
|
+
store.close();
|
|
664
|
+
}
|
|
665
|
+
// Rolled back: the row never persisted.
|
|
666
|
+
expect(countSectionsRows(dbPath)).toBe(0);
|
|
667
|
+
});
|
|
668
|
+
});
|
|
669
|
+
describe("open — recreate after unreadable meta", () => {
|
|
670
|
+
// An existing file with no meta table is an old/corrupt DB. Open must NOT crash
|
|
671
|
+
// on the failed meta read: it falls back to delete-and-recreate so a damaged
|
|
672
|
+
// cache self-heals instead of wedging every run.
|
|
673
|
+
it("recreates the DB when the meta table is missing", () => {
|
|
674
|
+
const dbPath = newTempPath();
|
|
675
|
+
// Seed a file that has a sections table but NO meta table (readMeta throws).
|
|
676
|
+
const db = rawDB(dbPath);
|
|
677
|
+
try {
|
|
678
|
+
db.exec(createTableSQL); // sections + meta + blocks
|
|
679
|
+
db.exec(`DROP TABLE meta`); // remove meta so readMeta fails
|
|
680
|
+
insertSectionRowDB(db); // a row that recreate must wipe
|
|
681
|
+
}
|
|
682
|
+
finally {
|
|
683
|
+
db.close();
|
|
684
|
+
}
|
|
685
|
+
const { store, status } = open(dbPath, testModel, testDim);
|
|
686
|
+
try {
|
|
687
|
+
expect(status).toBe(OpenStatus.StatusSchemaRecreated);
|
|
688
|
+
// Recreated from scratch: meta restored and old row gone.
|
|
689
|
+
expect(readMetaVersion(dbPath)).toBe(SchemaVersion);
|
|
690
|
+
expect(countSectionsRows(dbPath)).toBe(0);
|
|
691
|
+
}
|
|
692
|
+
finally {
|
|
693
|
+
store.close();
|
|
694
|
+
}
|
|
695
|
+
});
|
|
696
|
+
});
|
|
697
|
+
describe("open — migration apply failure", () => {
|
|
698
|
+
// If a registered migration throws, Open must NOT swallow it or leave a
|
|
699
|
+
// half-migrated DB: the error propagates (after rollback) so the run fails
|
|
700
|
+
// loudly rather than operating on a schema that was never actually upgraded.
|
|
701
|
+
it("rolls back and surfaces the error when a migration throws", () => {
|
|
702
|
+
const dbPath = newTempPath();
|
|
703
|
+
setupMigrationDB(dbPath, "0"); // stored version 0, has a section row
|
|
704
|
+
const cleanup = registerTestMigration("0", () => {
|
|
705
|
+
throw new Error("migration exploded");
|
|
706
|
+
});
|
|
707
|
+
try {
|
|
708
|
+
expect(() => open(dbPath, testModel, testDim)).toThrow(/apply migration/);
|
|
709
|
+
// Rolled back: schema_version was NOT bumped to the binary version.
|
|
710
|
+
expect(readMetaVersion(dbPath)).toBe("0");
|
|
711
|
+
// The pre-existing row survived the rollback.
|
|
712
|
+
expect(countSectionsRows(dbPath)).toBe(1);
|
|
713
|
+
}
|
|
714
|
+
finally {
|
|
715
|
+
cleanup();
|
|
716
|
+
}
|
|
717
|
+
});
|
|
718
|
+
});
|
|
719
|
+
describe("open — embedder purge failure", () => {
|
|
720
|
+
// The purge runs DELETE FROM sections in a transaction. If that statement
|
|
721
|
+
// fails (e.g. the sections table is gone), Open must rethrow after closing the
|
|
722
|
+
// connection rather than returning a Store on a DB it failed to reconcile.
|
|
723
|
+
it("rethrows when the sections delete fails during purge", () => {
|
|
724
|
+
const dbPath = newTempPath();
|
|
725
|
+
// Proper DB first, then corrupt: mismatch the model AND drop sections so the
|
|
726
|
+
// purge's DELETE FROM sections raises.
|
|
727
|
+
const { store } = open(dbPath, testModel, testDim);
|
|
728
|
+
store.close();
|
|
729
|
+
setMetaValue(dbPath, "embedder_model", "wrong-model");
|
|
730
|
+
const db = rawDB(dbPath);
|
|
731
|
+
try {
|
|
732
|
+
db.exec(`DROP TABLE sections`);
|
|
733
|
+
}
|
|
734
|
+
finally {
|
|
735
|
+
db.close();
|
|
736
|
+
}
|
|
737
|
+
expect(() => open(dbPath, testModel, testDim)).toThrow(/embedder purge/);
|
|
738
|
+
});
|
|
739
|
+
});
|