docsgov 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/README.md +242 -0
  2. package/dist/apispec/apispec.js +401 -0
  3. package/dist/apispec/apispec.test.js +444 -0
  4. package/dist/apispec/errors.js +17 -0
  5. package/dist/apispec/index.js +2 -0
  6. package/dist/check/doclinks.js +167 -0
  7. package/dist/check/index.js +8 -0
  8. package/dist/check/run.js +391 -0
  9. package/dist/check/run.test.js +513 -0
  10. package/dist/check/suggest.js +134 -0
  11. package/dist/check/suggest.test.js +92 -0
  12. package/dist/check/tokens.js +125 -0
  13. package/dist/cmd/main.js +330 -0
  14. package/dist/cmd/main.test.js +422 -0
  15. package/dist/codeq/cache.js +71 -0
  16. package/dist/codeq/cache.test.js +67 -0
  17. package/dist/codeq/errors.js +52 -0
  18. package/dist/codeq/grammars/tree-sitter-go.wasm +0 -0
  19. package/dist/codeq/grammars/tree-sitter-java.wasm +0 -0
  20. package/dist/codeq/grammars/tree-sitter-javascript.wasm +0 -0
  21. package/dist/codeq/grammars/tree-sitter-tsx.wasm +0 -0
  22. package/dist/codeq/grammars/tree-sitter-typescript.wasm +0 -0
  23. package/dist/codeq/index.js +11 -0
  24. package/dist/codeq/resolve.test.js +109 -0
  25. package/dist/codeq/resolver.js +128 -0
  26. package/dist/codeq/resolver.test.js +124 -0
  27. package/dist/codeq/resolvers/go.js +242 -0
  28. package/dist/codeq/resolvers/go.test.js +143 -0
  29. package/dist/codeq/resolvers/java.js +349 -0
  30. package/dist/codeq/resolvers/java.test.js +138 -0
  31. package/dist/codeq/resolvers/java_queries.js +63 -0
  32. package/dist/codeq/resolvers/javascript.js +412 -0
  33. package/dist/codeq/resolvers/javascript.test.js +125 -0
  34. package/dist/codeq/resolvers/javascript_queries.js +46 -0
  35. package/dist/codeq/resolvers/typescript.js +366 -0
  36. package/dist/codeq/resolvers/typescript.test.js +180 -0
  37. package/dist/codeq/resolvers/typescript_queries.js +78 -0
  38. package/dist/codeq/signature.js +50 -0
  39. package/dist/codeq/signature.test.js +50 -0
  40. package/dist/codeq/suggest.js +96 -0
  41. package/dist/codeq/treesitter.js +122 -0
  42. package/dist/codeq/treesitter.test.js +118 -0
  43. package/dist/config/config.js +74 -0
  44. package/dist/config/config.test.js +98 -0
  45. package/dist/config/fs.js +116 -0
  46. package/dist/config/glob.js +82 -0
  47. package/dist/config/glob.test.js +61 -0
  48. package/dist/config/index.js +4 -0
  49. package/dist/dedup/analyzer/analyzer.js +533 -0
  50. package/dist/dedup/analyzer/analyzer.test.js +530 -0
  51. package/dist/dedup/analyzer/canonical.js +74 -0
  52. package/dist/dedup/analyzer/canonical.test.js +70 -0
  53. package/dist/dedup/analyzer/cosine_clusters.js +169 -0
  54. package/dist/dedup/analyzer/cosine_clusters.test.js +131 -0
  55. package/dist/dedup/analyzer/distinctive.js +85 -0
  56. package/dist/dedup/analyzer/distinctive.test.js +49 -0
  57. package/dist/dedup/analyzer/exact_clusters.js +63 -0
  58. package/dist/dedup/analyzer/exact_clusters.test.js +81 -0
  59. package/dist/dedup/analyzer/index.js +14 -0
  60. package/dist/dedup/analyzer/multiplicity.js +110 -0
  61. package/dist/dedup/analyzer/multiplicity.test.js +123 -0
  62. package/dist/dedup/analyzer/order.js +22 -0
  63. package/dist/dedup/analyzer/partial_overlaps.js +65 -0
  64. package/dist/dedup/analyzer/partial_overlaps.test.js +161 -0
  65. package/dist/dedup/analyzer/preview.js +84 -0
  66. package/dist/dedup/analyzer/preview.test.js +46 -0
  67. package/dist/dedup/analyzer/safety.js +27 -0
  68. package/dist/dedup/analyzer/safety.test.js +39 -0
  69. package/dist/dedup/config.js +18 -0
  70. package/dist/dedup/configload.js +299 -0
  71. package/dist/dedup/configload.test.js +410 -0
  72. package/dist/dedup/dedup.index.test.js +203 -0
  73. package/dist/dedup/dedup.js +143 -0
  74. package/dist/dedup/dedup.test.js +212 -0
  75. package/dist/dedup/dedupcfg/config.js +112 -0
  76. package/dist/dedup/dedupcfg/config.test.js +70 -0
  77. package/dist/dedup/dedupcfg/index.js +1 -0
  78. package/dist/dedup/deduptypes/index.js +1 -0
  79. package/dist/dedup/deduptypes/types.js +9 -0
  80. package/dist/dedup/deduptypes/types.test.js +34 -0
  81. package/dist/dedup/embedder/cache.js +23 -0
  82. package/dist/dedup/embedder/cache.test.js +50 -0
  83. package/dist/dedup/embedder/constants.js +10 -0
  84. package/dist/dedup/embedder/embedder.js +76 -0
  85. package/dist/dedup/embedder/embedder.mock.test.js +128 -0
  86. package/dist/dedup/embedder/embedder.test.js +96 -0
  87. package/dist/dedup/embedder/errors.js +20 -0
  88. package/dist/dedup/embedder/errors.test.js +35 -0
  89. package/dist/dedup/embedder/index.js +4 -0
  90. package/dist/dedup/embedder/session.js +78 -0
  91. package/dist/dedup/embedder/session.test.js +172 -0
  92. package/dist/dedup/gitignore.js +97 -0
  93. package/dist/dedup/gitignore.test.js +98 -0
  94. package/dist/dedup/index.js +11 -0
  95. package/dist/dedup/indexdb/errors.js +48 -0
  96. package/dist/dedup/indexdb/index.js +6 -0
  97. package/dist/dedup/indexdb/indexdb.js +302 -0
  98. package/dist/dedup/indexdb/indexdb.test.js +739 -0
  99. package/dist/dedup/indexdb/load.js +110 -0
  100. package/dist/dedup/indexdb/migrations.js +58 -0
  101. package/dist/dedup/indexdb/schema.js +83 -0
  102. package/dist/dedup/indexer/index.js +9 -0
  103. package/dist/dedup/indexer/indexer.js +501 -0
  104. package/dist/dedup/indexer/indexer.test.js +510 -0
  105. package/dist/dedup/indexer/links.js +89 -0
  106. package/dist/dedup/mdsection/anchor.js +60 -0
  107. package/dist/dedup/mdsection/anchor.test.js +39 -0
  108. package/dist/dedup/mdsection/blocks.js +409 -0
  109. package/dist/dedup/mdsection/blocks.test.js +359 -0
  110. package/dist/dedup/mdsection/index.js +4 -0
  111. package/dist/dedup/mdsection/parse.js +21 -0
  112. package/dist/dedup/mdsection/section.js +234 -0
  113. package/dist/dedup/mdsection/section.test.js +221 -0
  114. package/dist/dedup/report/floatfmt.js +71 -0
  115. package/dist/dedup/report/floatfmt.test.js +42 -0
  116. package/dist/dedup/report/index.js +8 -0
  117. package/dist/dedup/report/quote.js +77 -0
  118. package/dist/dedup/report/quote.test.js +67 -0
  119. package/dist/dedup/report/text.js +251 -0
  120. package/dist/dedup/report/text.test.js +420 -0
  121. package/dist/dedup/report_types.js +8 -0
  122. package/dist/dedup/sectionid/index.js +1 -0
  123. package/dist/dedup/sectionid/sectionid.js +16 -0
  124. package/dist/dedup/sectionid/sectionid.test.js +49 -0
  125. package/dist/guard/api/errors.js +12 -0
  126. package/dist/guard/api/index.js +2 -0
  127. package/dist/guard/api/parser.js +81 -0
  128. package/dist/guard/api/parser.test.js +58 -0
  129. package/dist/guard/api/types.js +1 -0
  130. package/dist/guard/code/errors.js +16 -0
  131. package/dist/guard/code/index.js +2 -0
  132. package/dist/guard/code/parser.js +54 -0
  133. package/dist/guard/code/parser.test.js +111 -0
  134. package/dist/guard/code/types.js +6 -0
  135. package/dist/index.js +1 -0
  136. package/dist/index.test.js +5 -0
  137. package/dist/repo/boundary.js +92 -0
  138. package/dist/repo/boundary.test.js +65 -0
  139. package/dist/repo/errors.js +56 -0
  140. package/dist/repo/errors.test.js +85 -0
  141. package/dist/repo/exists.test.js +72 -0
  142. package/dist/repo/filename.js +46 -0
  143. package/dist/repo/filename.test.js +39 -0
  144. package/dist/repo/fs.js +53 -0
  145. package/dist/repo/index.js +7 -0
  146. package/dist/repo/overlay.js +36 -0
  147. package/dist/repo/overlay.test.js +80 -0
  148. package/dist/repo/repo.js +353 -0
  149. package/dist/repo/repo.test.js +255 -0
  150. package/dist/repo/testutil.js +27 -0
  151. package/dist/repo/write.test.js +125 -0
  152. package/dist/report/color.js +73 -0
  153. package/dist/report/index.js +1 -0
  154. package/dist/report/report.js +112 -0
  155. package/dist/report/report.test.js +368 -0
  156. package/dist/violation/index.js +1 -0
  157. package/dist/violation/types.js +22 -0
  158. package/dist/violation/types.test.js +70 -0
  159. package/package.json +48 -0
@@ -0,0 +1,110 @@
1
+ /**
2
+ * loadAllSectionsWithEmbeddings loads every section row and its embedding.
3
+ *
4
+ * Returns { sections, embeddings } where embeddings maps section ID to the
5
+ * L2-normalized float32 vector. The returned slice is in DB scan order; callers
6
+ * that need a deterministic order must sort after this call.
7
+ */
8
+ export function loadAllSectionsWithEmbeddings(db) {
9
+ const rows = db
10
+ .prepare(`
11
+ SELECT id, file_path, heading, heading_level, anchor, start_line, end_line,
12
+ content_hash, raw_content, embed_text, prose_word_count,
13
+ has_table, has_code, inbound_count, embedding
14
+ FROM sections
15
+ `)
16
+ .all();
17
+ const sections = [];
18
+ const embeddings = new Map();
19
+ for (const r of rows) {
20
+ const sec = {
21
+ id: r.id,
22
+ file_path: r.file_path,
23
+ heading: r.heading,
24
+ heading_level: r.heading_level,
25
+ anchor: r.anchor,
26
+ start_line: r.start_line,
27
+ end_line: r.end_line,
28
+ content_hash: r.content_hash,
29
+ raw_content: r.raw_content,
30
+ embed_text: r.embed_text,
31
+ prose_word_count: r.prose_word_count,
32
+ // Go scans INTEGER 0/1 into an int then compares != 0.
33
+ has_table: r.has_table !== 0,
34
+ has_code: r.has_code !== 0,
35
+ inbound_count: r.inbound_count,
36
+ };
37
+ sections.push(sec);
38
+ embeddings.set(sec.id, decodeVec(r.embedding));
39
+ }
40
+ return { sections, embeddings };
41
+ }
42
+ /**
43
+ * loadAllBlocksWithEmbeddings loads every block row.
44
+ *
45
+ * Returns { blocks, embeddings } where embeddings maps content_hash to the
46
+ * L2-normalized float32 vector. A NULL or empty embedding BLOB means the block
47
+ * has no vector (tables are exact-hash only); those content_hashes are absent
48
+ * from the map — no entry is inserted. When multiple block rows share a
49
+ * content_hash the map holds one entry (identical vectors).
50
+ */
51
+ export function loadAllBlocksWithEmbeddings(db) {
52
+ const rows = db
53
+ .prepare(`
54
+ SELECT section_id, block_index, file_path, heading, kind,
55
+ start_line, end_line, content_hash, embedding
56
+ FROM blocks
57
+ `)
58
+ .all();
59
+ const blocks = [];
60
+ const embeddings = new Map();
61
+ for (const r of rows) {
62
+ const rec = {
63
+ SectionID: r.section_id,
64
+ Index: r.block_index,
65
+ FilePath: r.file_path,
66
+ Heading: r.heading,
67
+ Kind: r.kind,
68
+ StartLine: r.start_line,
69
+ EndLine: r.end_line,
70
+ ContentHash: r.content_hash,
71
+ // Text is not persisted; leave it empty on load.
72
+ Text: "",
73
+ // TableRows is not persisted; Go leaves the zero value on load.
74
+ TableRows: 0,
75
+ };
76
+ blocks.push(rec);
77
+ const vec = decodeVecOrNull(r.embedding);
78
+ if (vec !== null) {
79
+ embeddings.set(rec.ContentHash, vec);
80
+ }
81
+ }
82
+ return { blocks, embeddings };
83
+ }
84
+ /**
85
+ * decodeVec decodes a little-endian float32 BLOB to a Float32Array. An empty or
86
+ * NULL blob yields an empty Float32Array (Go returned nil; callers treat a
87
+ * zero-length vector the same way). Mirrors the indexer's encodeVec inverse and
88
+ * is duplicated here to keep indexdb a leaf package.
89
+ */
90
+ export function decodeVec(buf) {
91
+ return decodeVecOrNull(buf) ?? new Float32Array(0);
92
+ }
93
+ /**
94
+ * decodeVecOrNull decodes a little-endian float32 BLOB, returning null for an
95
+ * empty/NULL blob. Used by block loading where "no vector" must stay absent from
96
+ * the embeddings map (NULL → absent, not a zero-length entry), matching Go's
97
+ * `if vec := decodeVec(blob); vec != nil`.
98
+ */
99
+ function decodeVecOrNull(buf) {
100
+ if (buf === null || buf.length === 0) {
101
+ return null;
102
+ }
103
+ const v = new Float32Array(Math.floor(buf.length / 4));
104
+ // DataView reads at the BLOB's own byte offset; little-endian per Go.
105
+ const view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength);
106
+ for (let i = 0; i < v.length; i++) {
107
+ v[i] = view.getFloat32(i * 4, true);
108
+ }
109
+ return v;
110
+ }
@@ -0,0 +1,58 @@
1
+ // The schema-migration registry.
2
+ //
3
+ // node:sqlite has no transaction object: a migration runs on the open
4
+ // connection while the caller holds an explicit BEGIN/COMMIT. So where Go's
5
+ // `apply` took a *sql.Tx, the TS version takes the DatabaseSync directly and
6
+ // must NOT issue BEGIN/COMMIT/ROLLBACK itself — applyMigration owns that.
7
+ import { createBlocksSQL } from "./schema.js";
8
+ /**
9
+ * createBlocksTableV2 is the v1->v2 additive migration: it creates the blocks
10
+ * table and its indexes on the passed connection, leaving sections/meta (and
11
+ * their embeddings) untouched. The blocks DDL is shared with createTableSQL via
12
+ * createBlocksSQL so it is defined exactly once.
13
+ */
14
+ function createBlocksTableV2(db, _deps) {
15
+ db.exec(createBlocksSQL);
16
+ }
17
+ /**
18
+ * migrations is the ordered registry of available migrations.
19
+ *
20
+ * Usage: Open looks for a migration whose fromVersion matches the stored
21
+ * schema_version. If found, it applies the migration inside one exclusive tx
22
+ * and bumps the stored version to SchemaVersion. If not found, it falls back to
23
+ * delete-and-recreate (StatusSchemaRecreated).
24
+ *
25
+ * Exported (vs. Go's package-private slice) so the test helper registerTestMigration
26
+ * can append/restore it from the *.test.ts file; production code should not mutate it.
27
+ */
28
+ export const migrations = [
29
+ // v1->v2: additive — add the blocks table + indexes; existing section rows
30
+ // and embeddings are preserved.
31
+ { fromVersion: "1", apply: createBlocksTableV2 },
32
+ ];
33
+ /** findMigration returns the migration that can upgrade fromVersion, or undefined. */
34
+ export function findMigration(fromVersion) {
35
+ return migrations.find((m) => m.fromVersion === fromVersion);
36
+ }
37
+ /**
38
+ * registerTestMigration appends a test-only migration to the registry and
39
+ * returns a cleanup function that removes it again. Mirrors Go's
40
+ * RegisterTestMigration(*testing.T, ...) which registered t.Cleanup; vitest has
41
+ * no per-test cleanup hook, so the caller invokes the returned function (e.g. in
42
+ * a finally block or afterEach) to keep the production registry unpolluted.
43
+ *
44
+ * The inner `apply` is wrapped to drop the MigrationDeps argument, matching the
45
+ * Go helper's narrower test signature.
46
+ */
47
+ export function registerTestMigration(fromVersion, apply) {
48
+ migrations.push({
49
+ fromVersion,
50
+ apply: (db, _deps) => apply(db),
51
+ });
52
+ return () => {
53
+ // Remove the last migration (the one we just added).
54
+ if (migrations.length > 0) {
55
+ migrations.pop();
56
+ }
57
+ };
58
+ }
@@ -0,0 +1,83 @@
1
+ // DDL and open parameters for the dedup index DB.
2
+ //
3
+ // The CREATE TABLE strings are the persisted on-disk format — they MUST match
4
+ // the Go source byte-for-byte (column names, order, types, indexes) so a DB
5
+ // written by the Go binary and one written here are interchangeable.
6
+ /**
7
+ * SchemaVersion is the current schema version stored in the meta table.
8
+ * Bump this constant when making a schema change; pair the bump with a
9
+ * registered migration if the change is additive and embeddings should
10
+ * be preserved.
11
+ */
12
+ export const SchemaVersion = "2";
13
+ /**
14
+ * SQLiteOpenParams are the SQLite connection parameters, locked per the plan:
15
+ * - WAL journaling: standard for single-writer, low-concurrency workloads
16
+ * - busy_timeout=5000ms: gives concurrent readers from another process time
17
+ * to bail rather than returning an immediate error
18
+ * - txlock=immediate: prevents deadlocks when multiple statements run on the
19
+ * same connection
20
+ *
21
+ * Go appended these to the sql.Open URI string for the mattn driver. node:sqlite
22
+ * takes a plain path, so openDB applies the equivalent via PRAGMAs (see indexdb.ts).
23
+ * The string is retained as documentation of the intended params.
24
+ */
25
+ export const SQLiteOpenParams = "?_journal_mode=WAL&_busy_timeout=5000&_txlock=immediate";
26
+ /**
27
+ * MaxOpenConns mirrors Go's pool cap of 1 (required there to avoid deadlocks
28
+ * with go-sqlite3 + exclusive transactions). node:sqlite is a single synchronous
29
+ * connection by construction, so this is documentation only — there is no pool.
30
+ */
31
+ export const MaxOpenConns = 1;
32
+ /**
33
+ * createBlocksSQL is the DDL for the v2 blocks table + its indexes. It is shared
34
+ * by createTableSQL (fresh create) and the v1->v2 migration (existing DBs) so the
35
+ * blocks schema is defined exactly once.
36
+ */
37
+ export const createBlocksSQL = `
38
+ CREATE TABLE IF NOT EXISTS blocks (
39
+ section_id TEXT NOT NULL,
40
+ block_index INTEGER NOT NULL,
41
+ file_path TEXT NOT NULL,
42
+ heading TEXT NOT NULL,
43
+ kind TEXT NOT NULL,
44
+ start_line INTEGER NOT NULL,
45
+ end_line INTEGER NOT NULL,
46
+ content_hash TEXT NOT NULL,
47
+ embedding BLOB,
48
+ PRIMARY KEY (section_id, block_index)
49
+ );
50
+ CREATE INDEX IF NOT EXISTS idx_blocks_hash ON blocks(content_hash);
51
+ CREATE INDEX IF NOT EXISTS idx_blocks_file ON blocks(file_path);
52
+ `;
53
+ /**
54
+ * createTableSQL is the DDL for the dedup index schema (sections + meta + blocks).
55
+ * Exported for tests that need to set up a DB with a specific state.
56
+ */
57
+ export const createTableSQL = `
58
+ CREATE TABLE IF NOT EXISTS sections (
59
+ id TEXT PRIMARY KEY,
60
+ file_path TEXT NOT NULL,
61
+ heading TEXT NOT NULL,
62
+ heading_level INTEGER NOT NULL,
63
+ anchor TEXT NOT NULL,
64
+ start_line INTEGER NOT NULL,
65
+ end_line INTEGER NOT NULL,
66
+ content_hash TEXT NOT NULL,
67
+ raw_content TEXT NOT NULL,
68
+ embed_text TEXT NOT NULL,
69
+ prose_word_count INTEGER NOT NULL,
70
+ has_table INTEGER NOT NULL,
71
+ has_code INTEGER NOT NULL,
72
+ inbound_count INTEGER NOT NULL DEFAULT 0,
73
+ embedding BLOB NOT NULL,
74
+ updated_at TEXT NOT NULL
75
+ );
76
+ CREATE INDEX IF NOT EXISTS idx_sections_file ON sections(file_path);
77
+ CREATE INDEX IF NOT EXISTS idx_sections_hash ON sections(content_hash);
78
+
79
+ CREATE TABLE IF NOT EXISTS meta (
80
+ key TEXT PRIMARY KEY,
81
+ value TEXT NOT NULL
82
+ );
83
+ ` + createBlocksSQL;
@@ -0,0 +1,9 @@
1
+ // Barrel for the indexer package: the two-pass dedup indexing pipeline.
2
+ //
3
+ // The production entry point is `run` (Go's Run). The remaining exports are the
4
+ // pipeline primitives Go exposed via export_test.go for white-box tests
5
+ // (blockEligible, blockEmbeddable, embedBlocks, collectSectionsAndBlocks) plus
6
+ // parseLinks; they have no other callers but stay exported so the wiring-contract
7
+ // tests can target them directly.
8
+ export { run, collectSectionsAndBlocks, computeInboundCounts, embedBlocks, blockEligible, blockEmbeddable, encodeVec, decodeVec, } from "./indexer.js";
9
+ export { parseLinks, isExternal, resolveLink } from "./links.js";