docsgov 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/README.md +242 -0
  2. package/dist/apispec/apispec.js +401 -0
  3. package/dist/apispec/apispec.test.js +444 -0
  4. package/dist/apispec/errors.js +17 -0
  5. package/dist/apispec/index.js +2 -0
  6. package/dist/check/doclinks.js +167 -0
  7. package/dist/check/index.js +8 -0
  8. package/dist/check/run.js +391 -0
  9. package/dist/check/run.test.js +513 -0
  10. package/dist/check/suggest.js +134 -0
  11. package/dist/check/suggest.test.js +92 -0
  12. package/dist/check/tokens.js +125 -0
  13. package/dist/cmd/main.js +330 -0
  14. package/dist/cmd/main.test.js +422 -0
  15. package/dist/codeq/cache.js +71 -0
  16. package/dist/codeq/cache.test.js +67 -0
  17. package/dist/codeq/errors.js +52 -0
  18. package/dist/codeq/grammars/tree-sitter-go.wasm +0 -0
  19. package/dist/codeq/grammars/tree-sitter-java.wasm +0 -0
  20. package/dist/codeq/grammars/tree-sitter-javascript.wasm +0 -0
  21. package/dist/codeq/grammars/tree-sitter-tsx.wasm +0 -0
  22. package/dist/codeq/grammars/tree-sitter-typescript.wasm +0 -0
  23. package/dist/codeq/index.js +11 -0
  24. package/dist/codeq/resolve.test.js +109 -0
  25. package/dist/codeq/resolver.js +128 -0
  26. package/dist/codeq/resolver.test.js +124 -0
  27. package/dist/codeq/resolvers/go.js +242 -0
  28. package/dist/codeq/resolvers/go.test.js +143 -0
  29. package/dist/codeq/resolvers/java.js +349 -0
  30. package/dist/codeq/resolvers/java.test.js +138 -0
  31. package/dist/codeq/resolvers/java_queries.js +63 -0
  32. package/dist/codeq/resolvers/javascript.js +412 -0
  33. package/dist/codeq/resolvers/javascript.test.js +125 -0
  34. package/dist/codeq/resolvers/javascript_queries.js +46 -0
  35. package/dist/codeq/resolvers/typescript.js +366 -0
  36. package/dist/codeq/resolvers/typescript.test.js +180 -0
  37. package/dist/codeq/resolvers/typescript_queries.js +78 -0
  38. package/dist/codeq/signature.js +50 -0
  39. package/dist/codeq/signature.test.js +50 -0
  40. package/dist/codeq/suggest.js +96 -0
  41. package/dist/codeq/treesitter.js +122 -0
  42. package/dist/codeq/treesitter.test.js +118 -0
  43. package/dist/config/config.js +74 -0
  44. package/dist/config/config.test.js +98 -0
  45. package/dist/config/fs.js +116 -0
  46. package/dist/config/glob.js +82 -0
  47. package/dist/config/glob.test.js +61 -0
  48. package/dist/config/index.js +4 -0
  49. package/dist/dedup/analyzer/analyzer.js +533 -0
  50. package/dist/dedup/analyzer/analyzer.test.js +530 -0
  51. package/dist/dedup/analyzer/canonical.js +74 -0
  52. package/dist/dedup/analyzer/canonical.test.js +70 -0
  53. package/dist/dedup/analyzer/cosine_clusters.js +169 -0
  54. package/dist/dedup/analyzer/cosine_clusters.test.js +131 -0
  55. package/dist/dedup/analyzer/distinctive.js +85 -0
  56. package/dist/dedup/analyzer/distinctive.test.js +49 -0
  57. package/dist/dedup/analyzer/exact_clusters.js +63 -0
  58. package/dist/dedup/analyzer/exact_clusters.test.js +81 -0
  59. package/dist/dedup/analyzer/index.js +14 -0
  60. package/dist/dedup/analyzer/multiplicity.js +110 -0
  61. package/dist/dedup/analyzer/multiplicity.test.js +123 -0
  62. package/dist/dedup/analyzer/order.js +22 -0
  63. package/dist/dedup/analyzer/partial_overlaps.js +65 -0
  64. package/dist/dedup/analyzer/partial_overlaps.test.js +161 -0
  65. package/dist/dedup/analyzer/preview.js +84 -0
  66. package/dist/dedup/analyzer/preview.test.js +46 -0
  67. package/dist/dedup/analyzer/safety.js +27 -0
  68. package/dist/dedup/analyzer/safety.test.js +39 -0
  69. package/dist/dedup/config.js +18 -0
  70. package/dist/dedup/configload.js +299 -0
  71. package/dist/dedup/configload.test.js +410 -0
  72. package/dist/dedup/dedup.index.test.js +203 -0
  73. package/dist/dedup/dedup.js +143 -0
  74. package/dist/dedup/dedup.test.js +212 -0
  75. package/dist/dedup/dedupcfg/config.js +112 -0
  76. package/dist/dedup/dedupcfg/config.test.js +70 -0
  77. package/dist/dedup/dedupcfg/index.js +1 -0
  78. package/dist/dedup/deduptypes/index.js +1 -0
  79. package/dist/dedup/deduptypes/types.js +9 -0
  80. package/dist/dedup/deduptypes/types.test.js +34 -0
  81. package/dist/dedup/embedder/cache.js +23 -0
  82. package/dist/dedup/embedder/cache.test.js +50 -0
  83. package/dist/dedup/embedder/constants.js +10 -0
  84. package/dist/dedup/embedder/embedder.js +76 -0
  85. package/dist/dedup/embedder/embedder.mock.test.js +128 -0
  86. package/dist/dedup/embedder/embedder.test.js +96 -0
  87. package/dist/dedup/embedder/errors.js +20 -0
  88. package/dist/dedup/embedder/errors.test.js +35 -0
  89. package/dist/dedup/embedder/index.js +4 -0
  90. package/dist/dedup/embedder/session.js +78 -0
  91. package/dist/dedup/embedder/session.test.js +172 -0
  92. package/dist/dedup/gitignore.js +97 -0
  93. package/dist/dedup/gitignore.test.js +98 -0
  94. package/dist/dedup/index.js +11 -0
  95. package/dist/dedup/indexdb/errors.js +48 -0
  96. package/dist/dedup/indexdb/index.js +6 -0
  97. package/dist/dedup/indexdb/indexdb.js +302 -0
  98. package/dist/dedup/indexdb/indexdb.test.js +739 -0
  99. package/dist/dedup/indexdb/load.js +110 -0
  100. package/dist/dedup/indexdb/migrations.js +58 -0
  101. package/dist/dedup/indexdb/schema.js +83 -0
  102. package/dist/dedup/indexer/index.js +9 -0
  103. package/dist/dedup/indexer/indexer.js +501 -0
  104. package/dist/dedup/indexer/indexer.test.js +510 -0
  105. package/dist/dedup/indexer/links.js +89 -0
  106. package/dist/dedup/mdsection/anchor.js +60 -0
  107. package/dist/dedup/mdsection/anchor.test.js +39 -0
  108. package/dist/dedup/mdsection/blocks.js +409 -0
  109. package/dist/dedup/mdsection/blocks.test.js +359 -0
  110. package/dist/dedup/mdsection/index.js +4 -0
  111. package/dist/dedup/mdsection/parse.js +21 -0
  112. package/dist/dedup/mdsection/section.js +234 -0
  113. package/dist/dedup/mdsection/section.test.js +221 -0
  114. package/dist/dedup/report/floatfmt.js +71 -0
  115. package/dist/dedup/report/floatfmt.test.js +42 -0
  116. package/dist/dedup/report/index.js +8 -0
  117. package/dist/dedup/report/quote.js +77 -0
  118. package/dist/dedup/report/quote.test.js +67 -0
  119. package/dist/dedup/report/text.js +251 -0
  120. package/dist/dedup/report/text.test.js +420 -0
  121. package/dist/dedup/report_types.js +8 -0
  122. package/dist/dedup/sectionid/index.js +1 -0
  123. package/dist/dedup/sectionid/sectionid.js +16 -0
  124. package/dist/dedup/sectionid/sectionid.test.js +49 -0
  125. package/dist/guard/api/errors.js +12 -0
  126. package/dist/guard/api/index.js +2 -0
  127. package/dist/guard/api/parser.js +81 -0
  128. package/dist/guard/api/parser.test.js +58 -0
  129. package/dist/guard/api/types.js +1 -0
  130. package/dist/guard/code/errors.js +16 -0
  131. package/dist/guard/code/index.js +2 -0
  132. package/dist/guard/code/parser.js +54 -0
  133. package/dist/guard/code/parser.test.js +111 -0
  134. package/dist/guard/code/types.js +6 -0
  135. package/dist/index.js +1 -0
  136. package/dist/index.test.js +5 -0
  137. package/dist/repo/boundary.js +92 -0
  138. package/dist/repo/boundary.test.js +65 -0
  139. package/dist/repo/errors.js +56 -0
  140. package/dist/repo/errors.test.js +85 -0
  141. package/dist/repo/exists.test.js +72 -0
  142. package/dist/repo/filename.js +46 -0
  143. package/dist/repo/filename.test.js +39 -0
  144. package/dist/repo/fs.js +53 -0
  145. package/dist/repo/index.js +7 -0
  146. package/dist/repo/overlay.js +36 -0
  147. package/dist/repo/overlay.test.js +80 -0
  148. package/dist/repo/repo.js +353 -0
  149. package/dist/repo/repo.test.js +255 -0
  150. package/dist/repo/testutil.js +27 -0
  151. package/dist/repo/write.test.js +125 -0
  152. package/dist/report/color.js +73 -0
  153. package/dist/report/index.js +1 -0
  154. package/dist/report/report.js +112 -0
  155. package/dist/report/report.test.js +368 -0
  156. package/dist/violation/index.js +1 -0
  157. package/dist/violation/types.js +22 -0
  158. package/dist/violation/types.test.js +70 -0
  159. package/package.json +48 -0
@@ -0,0 +1,97 @@
1
+ // Manages the docgov-owned block in .docgov/dedup/.gitignore that keeps the
2
+ // local SQLite index cache out of git.
3
+ //
4
+ // Ported from internal/dedup/gitignore.go. Go's os.ReadFile/WriteFile become
5
+ // node:fs/promises readFile/writeFile; a missing file (ENOENT) is treated as
6
+ // empty existing content (Go's os.ErrNotExist branch). Byte-for-byte output is a
7
+ // contract — the gitignore tests assert exact content — so the header string
8
+ // (em-dash and backtick included) and the blank-line / trailing-newline rules
9
+ // are preserved verbatim.
10
+ import { readFile, writeFile } from "node:fs/promises";
11
+ import * as path from "node:path";
12
+ /** dedupGitignoreHeader labels the block docgov manages in .docgov/dedup/.gitignore. */
13
+ export const dedupGitignoreHeader = "# docgov dedup index cache — local, rebuilt by `docgov dedup`.";
14
+ /**
15
+ * dedupGitignoreRules are the patterns that keep the local SQLite index cache out
16
+ * of git: the DB plus its WAL-mode sidecars. config.yml and the .gitignore itself
17
+ * stay tracked.
18
+ */
19
+ export const dedupGitignoreRules = ["index.db", "index.db-wal", "index.db-shm"];
20
+ /** isNotExist reports whether err is a Node ENOENT (Go's os.ErrNotExist). */
21
+ function isNotExist(err) {
22
+ return (typeof err === "object" &&
23
+ err !== null &&
24
+ err.code === "ENOENT");
25
+ }
26
+ /**
27
+ * ensureDedupGitignore makes sure .docgov/dedup/.gitignore excludes the index
28
+ * cache. It is a no-op when every rule is already present (matched as an exact,
29
+ * trimmed line — comments and blanks ignored); otherwise it creates the file or
30
+ * appends only the missing rules, leaving any existing content intact so a user's
31
+ * own entries survive. dedupDir must already exist.
32
+ */
33
+ export async function ensureDedupGitignore(dedupDir) {
34
+ const filePath = path.join(dedupDir, ".gitignore");
35
+ let existing = "";
36
+ try {
37
+ existing = await readFile(filePath, "utf8");
38
+ }
39
+ catch (err) {
40
+ if (!isNotExist(err)) {
41
+ throw new Error(`read ${quote(filePath)}: ${String(err)}`, { cause: err });
42
+ }
43
+ // Missing file: existing stays "" — same as Go's empty-on-ErrNotExist read.
44
+ }
45
+ const present = gitignoreLineSet(existing);
46
+ const missing = [];
47
+ for (const rule of dedupGitignoreRules) {
48
+ if (!present.has(rule)) {
49
+ missing.push(rule);
50
+ }
51
+ }
52
+ if (missing.length === 0) {
53
+ return; // rules already covered — nothing to write
54
+ }
55
+ try {
56
+ await writeFile(filePath, appendGitignoreBlock(existing, missing));
57
+ }
58
+ catch (err) {
59
+ throw new Error(`write ${quote(filePath)}: ${String(err)}`, { cause: err });
60
+ }
61
+ }
62
+ /**
63
+ * gitignoreLineSet returns the set of trimmed, non-comment, non-blank lines in a
64
+ * .gitignore body — the rules already in effect.
65
+ */
66
+ export function gitignoreLineSet(body) {
67
+ const set = new Set();
68
+ for (const raw of body.split("\n")) {
69
+ const line = raw.trim();
70
+ if (line === "" || line.startsWith("#")) {
71
+ continue;
72
+ }
73
+ set.add(line);
74
+ }
75
+ return set;
76
+ }
77
+ /**
78
+ * appendGitignoreBlock returns existing followed by the docgov header and the
79
+ * missing rules. When existing is empty the block stands alone; otherwise it is
80
+ * separated from prior content by a blank line, and a trailing newline is added
81
+ * first if needed so the header never glues onto a user's last line.
82
+ */
83
+ export function appendGitignoreBlock(existing, missing) {
84
+ const block = dedupGitignoreHeader + "\n" + missing.join("\n") + "\n";
85
+ if (existing.trim().length === 0) {
86
+ return block;
87
+ }
88
+ let prefix = existing;
89
+ if (!prefix.endsWith("\n")) {
90
+ prefix += "\n";
91
+ }
92
+ return prefix + "\n" + block;
93
+ }
94
+ /** quote renders a path as a Go %q-style double-quoted string for error messages. */
95
+ function quote(s) {
96
+ return JSON.stringify(s);
97
+ }
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Behavior-encoding tests for the docgov-owned .docgov/dedup/.gitignore block,
3
+ * ported from internal/dedup/gitignore_test.go.
4
+ *
5
+ * WHY each case matters: this file decides what stays out of git. The exact
6
+ * bytes are a contract — a user-tracked config.yml must never be ignored, the
7
+ * user's own entries must survive, and rules must never duplicate — so the tests
8
+ * assert on literal content, not just rule membership.
9
+ *
10
+ * Go used t.TempDir + os.ReadFile/WriteFile; vitest has neither, so we mkdtemp
11
+ * under os.tmpdir, clean each dir in afterEach, and read/write with node:fs.
12
+ */
13
+ import { mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
14
+ import { tmpdir } from "node:os";
15
+ import { join } from "node:path";
16
+ import { afterEach, describe, expect, it } from "vitest";
17
+ import { dedupGitignoreRules, ensureDedupGitignore, gitignoreLineSet, } from "./gitignore.js";
18
+ const tmpDirs = [];
19
+ afterEach(() => {
20
+ for (const d of tmpDirs.splice(0)) {
21
+ rmSync(d, { recursive: true, force: true });
22
+ }
23
+ });
24
+ function newDir() {
25
+ const dir = mkdtempSync(join(tmpdir(), "dedup-gitignore-"));
26
+ tmpDirs.push(dir);
27
+ return dir;
28
+ }
29
+ function readGitignore(dir) {
30
+ return readFileSync(join(dir, ".gitignore"), "utf8");
31
+ }
32
+ function writeGitignore(dir, content) {
33
+ writeFileSync(join(dir, ".gitignore"), content);
34
+ }
35
+ /** countLine counts trimmed lines in body that equal want (Go's countLine helper). */
36
+ function countLine(body, want) {
37
+ let n = 0;
38
+ for (const raw of body.split("\n")) {
39
+ if (raw.trim() === want) {
40
+ n++;
41
+ }
42
+ }
43
+ return n;
44
+ }
45
+ describe("ensureDedupGitignore", () => {
46
+ // WHY: first run on a fresh folder must drop a .gitignore that excludes the
47
+ // index.db cache + its WAL/SHM sidecars — but never the tracked config.yml.
48
+ it("creates the file with exactly the cache rules when absent", async () => {
49
+ const dir = newDir();
50
+ await ensureDedupGitignore(dir);
51
+ const got = readGitignore(dir);
52
+ const want = "# docgov dedup index cache — local, rebuilt by `docgov dedup`.\n" +
53
+ "index.db\nindex.db-wal\nindex.db-shm\n";
54
+ expect(got).toBe(want);
55
+ const set = gitignoreLineSet(got);
56
+ for (const rule of dedupGitignoreRules) {
57
+ expect(set.has(rule)).toBe(true);
58
+ }
59
+ // config.yml must stay tracked.
60
+ expect(set.has("config.yml")).toBe(false);
61
+ });
62
+ // WHY: the check is on the rules, not on file existence — when every cache
63
+ // rule is already present (alongside the user's own entries), the file must be
64
+ // left byte-for-byte untouched so we never churn a user's file.
65
+ it("leaves the file untouched when all rules are already present", async () => {
66
+ const dir = newDir();
67
+ const custom = "# my notes\nscratch/\nindex.db\nindex.db-wal\nindex.db-shm\n";
68
+ writeGitignore(dir, custom);
69
+ await ensureDedupGitignore(dir);
70
+ expect(readGitignore(dir)).toBe(custom);
71
+ });
72
+ // WHY: an existing .gitignore missing some cache rules must keep its content
73
+ // and gain ONLY the missing rules — index.db already there must not duplicate.
74
+ it("appends only the missing rules and keeps existing content", async () => {
75
+ const dir = newDir();
76
+ const custom = "# my notes\nindex.db\n";
77
+ writeGitignore(dir, custom);
78
+ await ensureDedupGitignore(dir);
79
+ const got = readGitignore(dir);
80
+ expect(got).toContain(custom);
81
+ const set = gitignoreLineSet(got);
82
+ for (const rule of dedupGitignoreRules) {
83
+ expect(set.has(rule)).toBe(true);
84
+ }
85
+ // index.db was already present — must appear exactly once (no duplicate).
86
+ expect(countLine(got, "index.db")).toBe(1);
87
+ });
88
+ // WHY: a second call after an append must be a no-op — once all rules are
89
+ // present, nothing more is written (idempotency keeps repeated runs clean).
90
+ it("is idempotent: a second call after an append changes nothing", async () => {
91
+ const dir = newDir();
92
+ writeGitignore(dir, "# my notes\nindex.db\n");
93
+ await ensureDedupGitignore(dir);
94
+ const after = readGitignore(dir);
95
+ await ensureDedupGitignore(dir);
96
+ expect(readGitignore(dir)).toBe(after);
97
+ });
98
+ });
@@ -0,0 +1,11 @@
1
+ // Barrel for the top-level dedup facade.
2
+ //
3
+ // Exports the two entry points the CLI calls (Index, Analyze) and the public
4
+ // report types (so cmd and the dedup/report subpackage can import them from the
5
+ // package root). Config types + Default and the config loader are re-exported
6
+ // too, matching the Go facade's surface. The subpackages (analyzer, indexer,
7
+ // indexdb, …) keep their own barrels and are not re-exported here.
8
+ export { Index, Analyze } from "./dedup.js";
9
+ export { Default } from "./config.js";
10
+ export { Load, YAMLTypeError } from "./configload.js";
11
+ export { ensureDedupGitignore, dedupGitignoreHeader, dedupGitignoreRules, } from "./gitignore.js";
@@ -0,0 +1,48 @@
1
+ // Sentinel error and the Open outcome enum for the dedup index DB.
2
+ //
3
+ // Go's `var ErrIndexMissing = errors.New(...)` becomes an Error subclass so
4
+ // callers match with `instanceof` instead of errors.Is.
5
+ /**
6
+ * ErrIndexMissing is thrown when a caller requests read-only mode but the index
7
+ * database does not exist. In v1, Open always creates the DB, so this sentinel
8
+ * is never thrown by v1 callers — it exists for the v2 read-only mode hook.
9
+ */
10
+ export class ErrIndexMissing extends Error {
11
+ constructor(message = "indexdb: index database does not exist") {
12
+ super(message);
13
+ this.name = "ErrIndexMissing";
14
+ }
15
+ }
16
+ /**
17
+ * OpenStatus indicates what Open did when it opened (or created) the DB.
18
+ * It is data, not state — callers read the second element of Open's return;
19
+ * there is no Store.status() accessor.
20
+ *
21
+ * The numeric values match Go's iota so StatusOpened stays the zero value
22
+ * ("opened, nothing to report").
23
+ */
24
+ export var OpenStatus;
25
+ (function (OpenStatus) {
26
+ /**
27
+ * The DB existed and all meta matched — the common case. Zero value so a
28
+ * freshly-declared status reads as "opened, nothing to report."
29
+ */
30
+ OpenStatus[OpenStatus["StatusOpened"] = 0] = "StatusOpened";
31
+ /** The DB was created for the first time. */
32
+ OpenStatus[OpenStatus["StatusFresh"] = 1] = "StatusFresh";
33
+ /**
34
+ * The stored schema_version was lower than the binary's SchemaVersion and
35
+ * registered additive migrations covered the gap. Embeddings are preserved.
36
+ */
37
+ OpenStatus[OpenStatus["StatusSchemaMigrated"] = 2] = "StatusSchemaMigrated";
38
+ /**
39
+ * The stored schema_version mismatched and no migration path existed — the
40
+ * DB file was deleted and recreated.
41
+ */
42
+ OpenStatus[OpenStatus["StatusSchemaRecreated"] = 3] = "StatusSchemaRecreated";
43
+ /**
44
+ * The stored embedder_model or embedder_dim did not match the binary's
45
+ * constants — the sections table was purged and a full re-embed is needed.
46
+ */
47
+ OpenStatus[OpenStatus["StatusEmbedderPurged"] = 4] = "StatusEmbedderPurged";
48
+ })(OpenStatus || (OpenStatus = {}));
@@ -0,0 +1,6 @@
1
+ // Barrel for the indexdb package: the SQLite-backed dedup index cache.
2
+ export { Store, open, openDB, } from "./indexdb.js";
3
+ export { ErrIndexMissing, OpenStatus } from "./errors.js";
4
+ export { SchemaVersion, SQLiteOpenParams, MaxOpenConns, createTableSQL, createBlocksSQL, } from "./schema.js";
5
+ export { migrations, findMigration, registerTestMigration, } from "./migrations.js";
6
+ export { loadAllSectionsWithEmbeddings, loadAllBlocksWithEmbeddings, decodeVec, } from "./load.js";
@@ -0,0 +1,302 @@
1
+ /**
2
+ * Manages the dedup section index stored in a SQLite database.
3
+ *
4
+ * The DB lives at <repo_root>/.docgov/dedup/index.db. Callers resolve the repo
5
+ * root and pass the absolute DB path to open — this package never walks for
6
+ * .docgov or touches the sentinel constant (that stays in internal/repo).
7
+ *
8
+ * Ported from internal/dedup/indexdb. node:sqlite is synchronous, so Go's
9
+ * context.Context / database/sql async plumbing is dropped: open/close/queries
10
+ * are plain synchronous calls. A *sql.Tx becomes explicit BEGIN/COMMIT/ROLLBACK
11
+ * statements on the single connection, and *sql.Rows (a lazy cursor) becomes an
12
+ * eagerly-materialized array (node:sqlite has no row cursor — stmt.all reads all
13
+ * rows at once).
14
+ */
15
+ import { existsSync, rmSync } from "node:fs";
16
+ import { DatabaseSync } from "node:sqlite";
17
+ import { OpenStatus } from "./errors.js";
18
+ import { loadAllBlocksWithEmbeddings, loadAllSectionsWithEmbeddings } from "./load.js";
19
+ import { findMigration } from "./migrations.js";
20
+ import { createTableSQL, SchemaVersion, SQLiteOpenParams } from "./schema.js";
21
+ /** Store is an open dedup index database. */
22
+ export class Store {
23
+ /** The underlying synchronous SQLite connection. */
24
+ db;
25
+ constructor(db) {
26
+ this.db = db;
27
+ }
28
+ /** Closes the underlying database connection. */
29
+ close() {
30
+ this.db.close();
31
+ }
32
+ /**
33
+ * loadAllSectionsWithEmbeddings loads every section row and its embedding.
34
+ * embeddings maps section ID to the decoded float32 vector. See load.ts.
35
+ */
36
+ loadAllSectionsWithEmbeddings() {
37
+ return loadAllSectionsWithEmbeddings(this.db);
38
+ }
39
+ /**
40
+ * loadAllBlocksWithEmbeddings loads every block row. embeddings maps
41
+ * content_hash to the decoded float32 vector; NULL-embedding (table) blocks
42
+ * are absent from the map. See load.ts.
43
+ */
44
+ loadAllBlocksWithEmbeddings() {
45
+ return loadAllBlocksWithEmbeddings(this.db);
46
+ }
47
+ /**
48
+ * QuerySections returns all rows from the sections table with the fields the
49
+ * indexer needs for diff-and-skip logic: id, content_hash, embedding.
50
+ *
51
+ * Go returned a lazy *sql.Rows the caller had to Close; node:sqlite has no
52
+ * cursor, so this materializes the full result set into an array.
53
+ */
54
+ querySections() {
55
+ const rows = this.db
56
+ .prepare(`SELECT id, content_hash, embedding FROM sections`)
57
+ .all();
58
+ return rows.map((r) => ({
59
+ id: r.id,
60
+ content_hash: r.content_hash,
61
+ embedding: r.embedding,
62
+ }));
63
+ }
64
+ /**
65
+ * QueryBlocks returns the four columns the indexer needs for diff/prune:
66
+ * section_id, block_index, content_hash, embedding.
67
+ *
68
+ * Like querySections, this materializes the whole result set (no *sql.Rows).
69
+ */
70
+ queryBlocks() {
71
+ const rows = this.db
72
+ .prepare(`SELECT section_id, block_index, content_hash, embedding FROM blocks`)
73
+ .all();
74
+ return rows.map((r) => ({
75
+ section_id: r.section_id,
76
+ block_index: r.block_index,
77
+ content_hash: r.content_hash,
78
+ embedding: r.embedding,
79
+ }));
80
+ }
81
+ /**
82
+ * ExecTx runs fn inside a single exclusive transaction. If fn throws, the
83
+ * transaction is rolled back and the error rethrown; otherwise it is committed.
84
+ *
85
+ * node:sqlite has no *sql.Tx object — fn receives the connection and must run
86
+ * its statements on it without issuing its own BEGIN/COMMIT/ROLLBACK.
87
+ */
88
+ execTx(fn) {
89
+ this.db.exec("BEGIN");
90
+ try {
91
+ fn(this.db);
92
+ }
93
+ catch (err) {
94
+ this.db.exec("ROLLBACK");
95
+ throw err;
96
+ }
97
+ this.db.exec("COMMIT");
98
+ }
99
+ }
100
+ /**
101
+ * Open opens (or creates) the dedup index DB at path.
102
+ *
103
+ * embedderModel and embedderDim are the expected embedder identity values. They
104
+ * are stored in the meta table on first open and compared on subsequent opens;
105
+ * a mismatch triggers a sections purge (StatusEmbedderPurged).
106
+ *
107
+ * Reconciliation logic (returns { store, status }):
108
+ * - New file → create schema + write meta → StatusFresh
109
+ * - Existing, all meta matches → StatusOpened (zero value)
110
+ * - schema_version mismatch + registered migration → apply in tx → StatusSchemaMigrated
111
+ * - schema_version mismatch + no migration → delete file + recreate → StatusSchemaRecreated
112
+ * - embedder_model or embedder_dim mismatch → DELETE FROM sections → StatusEmbedderPurged
113
+ *
114
+ * path must be an absolute OS path.
115
+ */
116
+ export function open(path, embedderModel, embedderDim) {
117
+ // Distinguish fresh vs existing by checking the file.
118
+ const isFresh = !existsSync(path);
119
+ let db = openDB(path);
120
+ const embedderDimStr = String(embedderDim);
121
+ if (isFresh) {
122
+ // First-time open: create schema and write meta.
123
+ try {
124
+ createSchema(db);
125
+ writeMetaValues(db, embedderModel, embedderDimStr);
126
+ }
127
+ catch (err) {
128
+ db.close();
129
+ throw new Error(`indexdb.open: initialize ${quote(path)}: ${String(err)}`, { cause: err });
130
+ }
131
+ return { store: new Store(db), status: OpenStatus.StatusFresh };
132
+ }
133
+ // Existing DB: read stored meta.
134
+ let stored;
135
+ try {
136
+ stored = readMeta(db);
137
+ }
138
+ catch (err) {
139
+ db.close();
140
+ // Meta table might not exist in a very old or corrupt DB — treat as recreate.
141
+ try {
142
+ return recreateDB(path, embedderModel, embedderDimStr);
143
+ }
144
+ catch (rerr) {
145
+ throw new Error(`indexdb.open: recreate after bad meta ${quote(path)}: ${String(rerr)}`, {
146
+ cause: rerr,
147
+ });
148
+ }
149
+ }
150
+ // Check schema_version.
151
+ if (stored.version !== SchemaVersion) {
152
+ db.close();
153
+ const mig = findMigration(stored.version);
154
+ if (mig !== undefined) {
155
+ db = openDB(path);
156
+ try {
157
+ applyMigration(db, mig);
158
+ }
159
+ catch (err) {
160
+ db.close();
161
+ throw new Error(`indexdb.open: apply migration ${quote(path)}: ${String(err)}`, {
162
+ cause: err,
163
+ });
164
+ }
165
+ return { store: new Store(db), status: OpenStatus.StatusSchemaMigrated };
166
+ }
167
+ // No migration path — delete and recreate.
168
+ try {
169
+ return recreateDB(path, embedderModel, embedderDimStr);
170
+ }
171
+ catch (err) {
172
+ throw new Error(`indexdb.open: recreate ${quote(path)}: ${String(err)}`, { cause: err });
173
+ }
174
+ }
175
+ // Check embedder identity.
176
+ if (stored.model !== embedderModel || stored.dim !== embedderDimStr) {
177
+ try {
178
+ purgeEmbedderValues(db, embedderModel, embedderDimStr);
179
+ }
180
+ catch (err) {
181
+ db.close();
182
+ throw new Error(`indexdb.open: embedder purge ${quote(path)}: ${String(err)}`, { cause: err });
183
+ }
184
+ return { store: new Store(db), status: OpenStatus.StatusEmbedderPurged };
185
+ }
186
+ return { store: new Store(db), status: OpenStatus.StatusOpened };
187
+ }
188
+ /**
189
+ * openDB opens the SQLite database at path and applies the locked connection
190
+ * PRAGMAs (the database/sql URI params have no node:sqlite equivalent).
191
+ */
192
+ export function openDB(path) {
193
+ const db = new DatabaseSync(path);
194
+ // Equivalent of SQLiteOpenParams (?_journal_mode=WAL&_busy_timeout=5000&_txlock=immediate).
195
+ // txlock=immediate has no PRAGMA form; ExecTx/applyMigration use plain BEGIN,
196
+ // and the single-connection model makes the lock-upgrade deadlock it guarded
197
+ // against impossible here.
198
+ db.exec("PRAGMA journal_mode = WAL;");
199
+ db.exec("PRAGMA busy_timeout = 5000;");
200
+ return db;
201
+ }
202
+ /** createSchema runs the DDL for sections + meta + blocks tables. */
203
+ function createSchema(db) {
204
+ db.exec(createTableSQL);
205
+ }
206
+ /** writeMetaValues inserts schema_version, embedder_model, and embedder_dim into meta. */
207
+ function writeMetaValues(db, embedderModel, embedderDimStr) {
208
+ const stmt = db.prepare(`INSERT OR REPLACE INTO meta(key, value) VALUES(?, ?)`);
209
+ for (const [k, v] of [
210
+ ["schema_version", SchemaVersion],
211
+ ["embedder_model", embedderModel],
212
+ ["embedder_dim", embedderDimStr],
213
+ ]) {
214
+ stmt.run(k, v);
215
+ }
216
+ }
217
+ /** readMeta returns the stored schema_version, embedder_model, embedder_dim. */
218
+ function readMeta(db) {
219
+ const rows = db.prepare(`SELECT key, value FROM meta`).all();
220
+ let version = "";
221
+ let model = "";
222
+ let dim = "";
223
+ for (const { key, value } of rows) {
224
+ switch (key) {
225
+ case "schema_version":
226
+ version = value;
227
+ break;
228
+ case "embedder_model":
229
+ model = value;
230
+ break;
231
+ case "embedder_dim":
232
+ dim = value;
233
+ break;
234
+ }
235
+ }
236
+ return { version, model, dim };
237
+ }
238
+ /**
239
+ * recreateDB deletes the DB file at path, re-creates it, and returns
240
+ * StatusSchemaRecreated. The SQLite sidecar files (-wal, -shm) are removed too
241
+ * so a recreate does not resurrect stale WAL pages from the deleted DB.
242
+ */
243
+ function recreateDB(path, embedderModel, embedderDimStr) {
244
+ rmSync(path, { force: true });
245
+ rmSync(`${path}-wal`, { force: true });
246
+ rmSync(`${path}-shm`, { force: true });
247
+ const db = openDB(path);
248
+ try {
249
+ createSchema(db);
250
+ writeMetaValues(db, embedderModel, embedderDimStr);
251
+ }
252
+ catch (err) {
253
+ db.close();
254
+ throw err;
255
+ }
256
+ return { store: new Store(db), status: OpenStatus.StatusSchemaRecreated };
257
+ }
258
+ /**
259
+ * applyMigration runs a migration inside one exclusive transaction and bumps the
260
+ * stored schema_version to SchemaVersion.
261
+ */
262
+ function applyMigration(db, mig) {
263
+ db.exec("BEGIN");
264
+ try {
265
+ const deps = {};
266
+ mig.apply(db, deps);
267
+ db.prepare(`INSERT OR REPLACE INTO meta(key, value) VALUES('schema_version', ?)`).run(SchemaVersion);
268
+ }
269
+ catch (err) {
270
+ db.exec("ROLLBACK");
271
+ throw new Error(`migration from ${quote(mig.fromVersion)}: ${String(err)}`, { cause: err });
272
+ }
273
+ db.exec("COMMIT");
274
+ }
275
+ /**
276
+ * purgeEmbedderValues deletes all rows from the sections table and updates the
277
+ * embedder meta fields to the provided values, all in one transaction.
278
+ */
279
+ function purgeEmbedderValues(db, embedderModel, embedderDimStr) {
280
+ db.exec("BEGIN");
281
+ try {
282
+ db.exec(`DELETE FROM sections`);
283
+ const stmt = db.prepare(`INSERT OR REPLACE INTO meta(key, value) VALUES(?, ?)`);
284
+ for (const [k, v] of [
285
+ ["embedder_model", embedderModel],
286
+ ["embedder_dim", embedderDimStr],
287
+ ]) {
288
+ stmt.run(k, v);
289
+ }
290
+ }
291
+ catch (err) {
292
+ db.exec("ROLLBACK");
293
+ throw err;
294
+ }
295
+ db.exec("COMMIT");
296
+ }
297
+ /** quote wraps a value in double quotes for error messages (Go's %q for paths). */
298
+ function quote(s) {
299
+ return `"${s}"`;
300
+ }
301
+ // Re-export so SQLiteOpenParams stays reachable for tests that open a raw DB.
302
+ export { SQLiteOpenParams };