docsgov 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/README.md +242 -0
  2. package/dist/apispec/apispec.js +401 -0
  3. package/dist/apispec/apispec.test.js +444 -0
  4. package/dist/apispec/errors.js +17 -0
  5. package/dist/apispec/index.js +2 -0
  6. package/dist/check/doclinks.js +167 -0
  7. package/dist/check/index.js +8 -0
  8. package/dist/check/run.js +391 -0
  9. package/dist/check/run.test.js +513 -0
  10. package/dist/check/suggest.js +134 -0
  11. package/dist/check/suggest.test.js +92 -0
  12. package/dist/check/tokens.js +125 -0
  13. package/dist/cmd/main.js +330 -0
  14. package/dist/cmd/main.test.js +422 -0
  15. package/dist/codeq/cache.js +71 -0
  16. package/dist/codeq/cache.test.js +67 -0
  17. package/dist/codeq/errors.js +52 -0
  18. package/dist/codeq/grammars/tree-sitter-go.wasm +0 -0
  19. package/dist/codeq/grammars/tree-sitter-java.wasm +0 -0
  20. package/dist/codeq/grammars/tree-sitter-javascript.wasm +0 -0
  21. package/dist/codeq/grammars/tree-sitter-tsx.wasm +0 -0
  22. package/dist/codeq/grammars/tree-sitter-typescript.wasm +0 -0
  23. package/dist/codeq/index.js +11 -0
  24. package/dist/codeq/resolve.test.js +109 -0
  25. package/dist/codeq/resolver.js +128 -0
  26. package/dist/codeq/resolver.test.js +124 -0
  27. package/dist/codeq/resolvers/go.js +242 -0
  28. package/dist/codeq/resolvers/go.test.js +143 -0
  29. package/dist/codeq/resolvers/java.js +349 -0
  30. package/dist/codeq/resolvers/java.test.js +138 -0
  31. package/dist/codeq/resolvers/java_queries.js +63 -0
  32. package/dist/codeq/resolvers/javascript.js +412 -0
  33. package/dist/codeq/resolvers/javascript.test.js +125 -0
  34. package/dist/codeq/resolvers/javascript_queries.js +46 -0
  35. package/dist/codeq/resolvers/typescript.js +366 -0
  36. package/dist/codeq/resolvers/typescript.test.js +180 -0
  37. package/dist/codeq/resolvers/typescript_queries.js +78 -0
  38. package/dist/codeq/signature.js +50 -0
  39. package/dist/codeq/signature.test.js +50 -0
  40. package/dist/codeq/suggest.js +96 -0
  41. package/dist/codeq/treesitter.js +122 -0
  42. package/dist/codeq/treesitter.test.js +118 -0
  43. package/dist/config/config.js +74 -0
  44. package/dist/config/config.test.js +98 -0
  45. package/dist/config/fs.js +116 -0
  46. package/dist/config/glob.js +82 -0
  47. package/dist/config/glob.test.js +61 -0
  48. package/dist/config/index.js +4 -0
  49. package/dist/dedup/analyzer/analyzer.js +533 -0
  50. package/dist/dedup/analyzer/analyzer.test.js +530 -0
  51. package/dist/dedup/analyzer/canonical.js +74 -0
  52. package/dist/dedup/analyzer/canonical.test.js +70 -0
  53. package/dist/dedup/analyzer/cosine_clusters.js +169 -0
  54. package/dist/dedup/analyzer/cosine_clusters.test.js +131 -0
  55. package/dist/dedup/analyzer/distinctive.js +85 -0
  56. package/dist/dedup/analyzer/distinctive.test.js +49 -0
  57. package/dist/dedup/analyzer/exact_clusters.js +63 -0
  58. package/dist/dedup/analyzer/exact_clusters.test.js +81 -0
  59. package/dist/dedup/analyzer/index.js +14 -0
  60. package/dist/dedup/analyzer/multiplicity.js +110 -0
  61. package/dist/dedup/analyzer/multiplicity.test.js +123 -0
  62. package/dist/dedup/analyzer/order.js +22 -0
  63. package/dist/dedup/analyzer/partial_overlaps.js +65 -0
  64. package/dist/dedup/analyzer/partial_overlaps.test.js +161 -0
  65. package/dist/dedup/analyzer/preview.js +84 -0
  66. package/dist/dedup/analyzer/preview.test.js +46 -0
  67. package/dist/dedup/analyzer/safety.js +27 -0
  68. package/dist/dedup/analyzer/safety.test.js +39 -0
  69. package/dist/dedup/config.js +18 -0
  70. package/dist/dedup/configload.js +299 -0
  71. package/dist/dedup/configload.test.js +410 -0
  72. package/dist/dedup/dedup.index.test.js +203 -0
  73. package/dist/dedup/dedup.js +143 -0
  74. package/dist/dedup/dedup.test.js +212 -0
  75. package/dist/dedup/dedupcfg/config.js +112 -0
  76. package/dist/dedup/dedupcfg/config.test.js +70 -0
  77. package/dist/dedup/dedupcfg/index.js +1 -0
  78. package/dist/dedup/deduptypes/index.js +1 -0
  79. package/dist/dedup/deduptypes/types.js +9 -0
  80. package/dist/dedup/deduptypes/types.test.js +34 -0
  81. package/dist/dedup/embedder/cache.js +23 -0
  82. package/dist/dedup/embedder/cache.test.js +50 -0
  83. package/dist/dedup/embedder/constants.js +10 -0
  84. package/dist/dedup/embedder/embedder.js +76 -0
  85. package/dist/dedup/embedder/embedder.mock.test.js +128 -0
  86. package/dist/dedup/embedder/embedder.test.js +96 -0
  87. package/dist/dedup/embedder/errors.js +20 -0
  88. package/dist/dedup/embedder/errors.test.js +35 -0
  89. package/dist/dedup/embedder/index.js +4 -0
  90. package/dist/dedup/embedder/session.js +78 -0
  91. package/dist/dedup/embedder/session.test.js +172 -0
  92. package/dist/dedup/gitignore.js +97 -0
  93. package/dist/dedup/gitignore.test.js +98 -0
  94. package/dist/dedup/index.js +11 -0
  95. package/dist/dedup/indexdb/errors.js +48 -0
  96. package/dist/dedup/indexdb/index.js +6 -0
  97. package/dist/dedup/indexdb/indexdb.js +302 -0
  98. package/dist/dedup/indexdb/indexdb.test.js +739 -0
  99. package/dist/dedup/indexdb/load.js +110 -0
  100. package/dist/dedup/indexdb/migrations.js +58 -0
  101. package/dist/dedup/indexdb/schema.js +83 -0
  102. package/dist/dedup/indexer/index.js +9 -0
  103. package/dist/dedup/indexer/indexer.js +501 -0
  104. package/dist/dedup/indexer/indexer.test.js +510 -0
  105. package/dist/dedup/indexer/links.js +89 -0
  106. package/dist/dedup/mdsection/anchor.js +60 -0
  107. package/dist/dedup/mdsection/anchor.test.js +39 -0
  108. package/dist/dedup/mdsection/blocks.js +409 -0
  109. package/dist/dedup/mdsection/blocks.test.js +359 -0
  110. package/dist/dedup/mdsection/index.js +4 -0
  111. package/dist/dedup/mdsection/parse.js +21 -0
  112. package/dist/dedup/mdsection/section.js +234 -0
  113. package/dist/dedup/mdsection/section.test.js +221 -0
  114. package/dist/dedup/report/floatfmt.js +71 -0
  115. package/dist/dedup/report/floatfmt.test.js +42 -0
  116. package/dist/dedup/report/index.js +8 -0
  117. package/dist/dedup/report/quote.js +77 -0
  118. package/dist/dedup/report/quote.test.js +67 -0
  119. package/dist/dedup/report/text.js +251 -0
  120. package/dist/dedup/report/text.test.js +420 -0
  121. package/dist/dedup/report_types.js +8 -0
  122. package/dist/dedup/sectionid/index.js +1 -0
  123. package/dist/dedup/sectionid/sectionid.js +16 -0
  124. package/dist/dedup/sectionid/sectionid.test.js +49 -0
  125. package/dist/guard/api/errors.js +12 -0
  126. package/dist/guard/api/index.js +2 -0
  127. package/dist/guard/api/parser.js +81 -0
  128. package/dist/guard/api/parser.test.js +58 -0
  129. package/dist/guard/api/types.js +1 -0
  130. package/dist/guard/code/errors.js +16 -0
  131. package/dist/guard/code/index.js +2 -0
  132. package/dist/guard/code/parser.js +54 -0
  133. package/dist/guard/code/parser.test.js +111 -0
  134. package/dist/guard/code/types.js +6 -0
  135. package/dist/index.js +1 -0
  136. package/dist/index.test.js +5 -0
  137. package/dist/repo/boundary.js +92 -0
  138. package/dist/repo/boundary.test.js +65 -0
  139. package/dist/repo/errors.js +56 -0
  140. package/dist/repo/errors.test.js +85 -0
  141. package/dist/repo/exists.test.js +72 -0
  142. package/dist/repo/filename.js +46 -0
  143. package/dist/repo/filename.test.js +39 -0
  144. package/dist/repo/fs.js +53 -0
  145. package/dist/repo/index.js +7 -0
  146. package/dist/repo/overlay.js +36 -0
  147. package/dist/repo/overlay.test.js +80 -0
  148. package/dist/repo/repo.js +353 -0
  149. package/dist/repo/repo.test.js +255 -0
  150. package/dist/repo/testutil.js +27 -0
  151. package/dist/repo/write.test.js +125 -0
  152. package/dist/report/color.js +73 -0
  153. package/dist/report/index.js +1 -0
  154. package/dist/report/report.js +112 -0
  155. package/dist/report/report.test.js +368 -0
  156. package/dist/violation/index.js +1 -0
  157. package/dist/violation/types.js +22 -0
  158. package/dist/violation/types.test.js +70 -0
  159. package/package.json +48 -0
@@ -0,0 +1,50 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { normalizeType, signaturesMatch } from "./signature.js";
3
+ // WHY: overload identity (in Java/Go/TS) is by erased parameter types; the doc
4
+ // author and the source may spell a type differently (whitespace, generic args,
5
+ // varargs vs array). Matching MUST normalize both sides the same way or overload
6
+ // disambiguation is brittle and rejects refs that should resolve. These cases
7
+ // are the port of internal/codeq/signature_test.go and pin the normalization
8
+ // rules that the language resolvers depend on.
9
+ describe("normalizeType", () => {
10
+ // Each pair encodes one normalization rule the resolvers rely on.
11
+ const cases = {
12
+ "String[]": "String[]", // already canonical → unchanged
13
+ " String [ ] ": "String[]", // whitespace is dropped entirely
14
+ "List<String>": "List", // generic args erased to the base name
15
+ "Map<K, V>": "Map", // multi-arg generics erased, whitespace gone
16
+ "String...": "String[]", // varargs map to an array suffix
17
+ "java.util.List": "java.util.List", // qualified names preserved verbatim
18
+ int: "int", // primitives preserved verbatim
19
+ };
20
+ for (const [input, want] of Object.entries(cases)) {
21
+ it(`normalizes ${JSON.stringify(input)} → ${JSON.stringify(want)}`, () => {
22
+ expect(normalizeType(input)).toBe(want);
23
+ });
24
+ }
25
+ it("re-attaches an array suffix after erasing generics (List<String>[] → List[])", () => {
26
+ // WHY: a generic array's [] follows the closing '>'; erasing the generic
27
+ // must keep the arrayness or List<T>[] would wrongly match List.
28
+ expect(normalizeType("List<String>[]")).toBe("List[]");
29
+ });
30
+ });
31
+ describe("signaturesMatch", () => {
32
+ it("matches on a whitespace-only difference", () => {
33
+ expect(signaturesMatch(["String[]", "Integer"], ["String [ ]", "Integer"])).toBe(true);
34
+ });
35
+ it("matches after generic erasure (List<String> ~ List)", () => {
36
+ expect(signaturesMatch(["List<String>"], ["List"])).toBe(true);
37
+ });
38
+ it("matches zero-arg against zero-arg", () => {
39
+ // WHY: the "()" overload is a real, distinct overload — [] must match [].
40
+ expect(signaturesMatch([], [])).toBe(true);
41
+ });
42
+ it("rejects an arity mismatch", () => {
43
+ // WHY: arity is the first discriminator between overloads; (int) is not (int,int).
44
+ expect(signaturesMatch(["int"], ["int", "int"])).toBe(false);
45
+ });
46
+ it("rejects different types of equal arity", () => {
47
+ // WHY: matching is case/text sensitive after normalization — int is not long.
48
+ expect(signaturesMatch(["int"], ["long"])).toBe(false);
49
+ });
50
+ });
@@ -0,0 +1,96 @@
1
+ // suggest.ts is the candidate-listing companion to the boolean Resolver oracle.
2
+ //
3
+ // The oracle (resolve) answers ONLY "does this ref exist?" — true/false. When it
4
+ // returns false the check layer wants to help the author: which same-kind names
5
+ // DO exist in the file (so a typo can become a "did you mean")? That is what a
6
+ // Suggestion carries. Extraction needs tree-sitter, so it lives here behind the
7
+ // codeq boundary; the check layer owns the ranking + wording (check/suggest.ts).
8
+ //
9
+ // This is BEST-EFFORT and has no Go analogue: the Go binary's oracle is boolean
10
+ // only. The suggestion path is invoked solely on the not-found branch, so it
11
+ // never affects whether a violation fires — only how it reads.
12
+ import { nodeText, runQuery } from "./treesitter.js";
13
+ /**
14
+ * refKind classifies a CodeRef the same way the resolvers' dispatch does, but
15
+ * only as far as suggestions care: a Member selector means we're after a member,
16
+ * else a Param selector means a parameter, else the top-level symbol. A bare
17
+ * symbol + signature (a Java/JS/TS constructor overload) stays "symbol" — its
18
+ * candidate space is the type names, and an exact-name match is suppressed by the
19
+ * ranker, so a pure signature mismatch yields no misleading suggestion.
20
+ */
21
+ export function refKind(ref) {
22
+ if (ref.Member !== "") {
23
+ return "member";
24
+ }
25
+ if (ref.Param !== "") {
26
+ return "param";
27
+ }
28
+ return "symbol";
29
+ }
30
+ /**
31
+ * suggestFromExtractors computes a Suggestion for `ref` from a parsed tree using
32
+ * the language's extractors. The dispatch mirrors refKind:
33
+ *
34
+ * member: list the owner's members. If the owner has none AND is not declared
35
+ * at all, report ownerMissing instead (the type name is the typo).
36
+ * param: list the function's params, with the same owner-missing fallback.
37
+ * symbol: list every top-level name.
38
+ *
39
+ * The owner-existence fallback uses symbolNames as the "is this declared?" oracle;
40
+ * a present-but-memberless owner therefore yields an empty candidate list rather
41
+ * than a false ownerMissing.
42
+ */
43
+ export function suggestFromExtractors(root, ref, ex) {
44
+ if (ref.Member !== "") {
45
+ const members = ex.memberNames(root, ref.Symbol);
46
+ if (members.length > 0) {
47
+ return { kind: "member", candidates: members };
48
+ }
49
+ if (ex.symbolNames(root).includes(ref.Symbol)) {
50
+ return { kind: "member", candidates: [] };
51
+ }
52
+ return { kind: "member", ownerMissing: ref.Symbol, candidates: [] };
53
+ }
54
+ if (ref.Param !== "") {
55
+ const params = ex.paramNames(root, ref.Symbol);
56
+ if (params.length > 0) {
57
+ return { kind: "param", candidates: params };
58
+ }
59
+ if (ex.symbolNames(root).includes(ref.Symbol)) {
60
+ return { kind: "param", candidates: [] };
61
+ }
62
+ return { kind: "param", ownerMissing: ref.Symbol, candidates: [] };
63
+ }
64
+ return { kind: "symbol", candidates: ex.symbolNames(root) };
65
+ }
66
+ /**
67
+ * collectCapture returns the text of every `cap` capture across a query's
68
+ * matches — e.g. every declaration name. Shared by the language extractors,
69
+ * which use the SAME queries the oracle uses, so a candidate list is exactly the
70
+ * symbol space resolve() searched.
71
+ */
72
+ export function collectCapture(root, query, cap) {
73
+ const out = [];
74
+ for (const m of runQuery(root, query)) {
75
+ const n = m.captures[cap];
76
+ if (n !== undefined) {
77
+ out.push(nodeText(n));
78
+ }
79
+ }
80
+ return out;
81
+ }
82
+ /**
83
+ * collectOwned returns the `nameCap` capture of every match whose `ownerCap`
84
+ * capture equals `want` — the members of a type, or the params of a function.
85
+ */
86
+ export function collectOwned(root, query, want, ownerCap, nameCap) {
87
+ const out = [];
88
+ for (const m of runQuery(root, query)) {
89
+ const owner = m.captures[ownerCap];
90
+ const name = m.captures[nameCap];
91
+ if (owner !== undefined && name !== undefined && nodeText(owner) === want) {
92
+ out.push(nodeText(name));
93
+ }
94
+ }
95
+ return out;
96
+ }
@@ -0,0 +1,122 @@
1
+ import { readFile } from "node:fs/promises";
2
+ import { fileURLToPath } from "node:url";
3
+ import { Language, Node, Parser, Query } from "web-tree-sitter";
4
+ // treesitter.ts is the ONLY module that imports web-tree-sitter. It insulates
5
+ // the resolvers from the binding: resolver code is identical regardless of which
6
+ // tree-sitter binding sits underneath. It exposes:
7
+ // - initParser() — one-time process-wide Parser.init (idempotent)
8
+ // - loadGrammar(name) — async, cached Language loader reading vendored wasm
9
+ // - compileQuery(lang,src) — compile once, reuse (mirrors Go's mustQuery vars)
10
+ // - runQuery(root, q) — binding-neutral match shape (no cursor API leak)
11
+ // - parseTree(parser,src)— parse + null-guard
12
+ // - nodeText(n) — n.text (UTF-16 correct; no []byte math)
13
+ //
14
+ // Re-export the binding's value/type names so resolvers import them from HERE,
15
+ // keeping the web-tree-sitter import surface in one file.
16
+ export { Language, Node, Parser };
17
+ // Vendored wasm files live alongside this module in ./grammars/. They were
18
+ // copied once from the upstream tree-sitter-* npm packages at vendor time (see
19
+ // the port notes); those packages are NOT runtime deps.
20
+ const WASM_FILE = {
21
+ go: "tree-sitter-go.wasm",
22
+ java: "tree-sitter-java.wasm",
23
+ javascript: "tree-sitter-javascript.wasm",
24
+ typescript: "tree-sitter-typescript.wasm",
25
+ tsx: "tree-sitter-tsx.wasm",
26
+ };
27
+ // One-time Parser.init guard. Parser.init() must run exactly once process-wide
28
+ // before any Language.load / parse. Cache the Promise so concurrent callers
29
+ // share the single init.
30
+ let initPromise;
31
+ /** initParser runs Parser.init() exactly once process-wide. Idempotent. */
32
+ export function initParser() {
33
+ if (initPromise === undefined) {
34
+ initPromise = Parser.init();
35
+ }
36
+ return initPromise;
37
+ }
38
+ // Cache loaded Language objects by grammar name so each wasm is read+loaded once.
39
+ const languageCache = new Map();
40
+ /**
41
+ * loadGrammar loads (and caches) the Language for a vendored grammar. It runs
42
+ * initParser() first, then reads the wasm bytes and Language.load()s them.
43
+ * Concurrent calls for the same grammar share one load.
44
+ */
45
+ export function loadGrammar(name) {
46
+ const cached = languageCache.get(name);
47
+ if (cached !== undefined) {
48
+ return cached;
49
+ }
50
+ const p = (async () => {
51
+ await initParser();
52
+ const wasmPath = fileURLToPath(new URL(`./grammars/${WASM_FILE[name]}`, import.meta.url));
53
+ const bytes = await readFile(wasmPath);
54
+ return Language.load(bytes);
55
+ })();
56
+ languageCache.set(name, p);
57
+ p.catch(() => {
58
+ // Evict a failed load so a retry is possible (mirrors the cache.ts policy).
59
+ if (languageCache.get(name) === p) {
60
+ languageCache.delete(name);
61
+ }
62
+ });
63
+ return p;
64
+ }
65
+ /**
66
+ * compileQuery compiles a tree-sitter S-expression query against a language.
67
+ * THROWS on a bad query — this mirrors Go's mustQuery panic: a hard-coded query
68
+ * literal that fails to compile is a programming error, surfaced at resolver
69
+ * construction time. Compile once and reuse the returned Query across trees
70
+ * (parse/matches are read-only on the Query), mirroring Go's package-level
71
+ * mustQuery vars / per-grammar buildTSQueries.
72
+ */
73
+ export function compileQuery(language, src) {
74
+ // new Query throws on a syntax error in src; let it propagate (= the panic).
75
+ return new Query(language, src);
76
+ }
77
+ /**
78
+ * runQuery runs `query` over `root` and returns an array of binding-neutral
79
+ * match views. Each view groups captures by name so resolver code never touches
80
+ * the binding's capture-array shape. `captures[name]` is the FIRST node captured
81
+ * under that name in the match; `all[name]` is every node under that name.
82
+ *
83
+ * web-tree-sitter's query.matches returns a plain array (no cursor / NextMatch /
84
+ * CaptureNameForId); each capture object carries .name and .node directly. We
85
+ * normalise that here so the cursor-vs-array difference between bindings never
86
+ * leaks into resolvers.
87
+ */
88
+ export function runQuery(root, query) {
89
+ const matches = query.matches(root);
90
+ const out = [];
91
+ for (const m of matches) {
92
+ const captures = {};
93
+ const all = {};
94
+ for (const c of m.captures) {
95
+ if (!(c.name in captures)) {
96
+ captures[c.name] = c.node;
97
+ }
98
+ (all[c.name] ??= []).push(c.node);
99
+ }
100
+ out.push({ captures, all, patternIndex: m.patternIndex });
101
+ }
102
+ return out;
103
+ }
104
+ /**
105
+ * parseTree parses `src` with `parser` and returns the root Node, or throws if
106
+ * the parser produced no tree (parse() returns null when no language is set —
107
+ * a programming error here, since callers set the language first).
108
+ *
109
+ * Note: this does NOT check rootNode.hasError; that maps to ParseFailedError and
110
+ * is the resolver's call (it owns the file path for the error message).
111
+ */
112
+ export function parseTree(parser, src) {
113
+ const tree = parser.parse(src);
114
+ if (tree === null) {
115
+ throw new Error("codeq: parser produced no tree (no language set)");
116
+ }
117
+ return tree.rootNode;
118
+ }
119
+ /** nodeText returns a node's source text. UTF-16 correct — no []byte slicing. */
120
+ export function nodeText(n) {
121
+ return n.text;
122
+ }
@@ -0,0 +1,118 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { Parser, compileQuery, loadGrammar, nodeText, parseTree, runQuery, } from "./treesitter.js";
3
+ // WHY: the treesitter helper is the seam every language resolver builds on. If
4
+ // loadGrammar can't read a vendored wasm, compileQuery doesn't surface a bad
5
+ // query, or runQuery's capture-grouping is wrong, EVERY resolver is broken in a
6
+ // way no resolver test would localise. These tests pin: (1) a vendored grammar
7
+ // loads and parses real source, (2) the SAME S-expression query text from the Go
8
+ // package runs verbatim and yields the expected named captures, (3) a bad query
9
+ // throws (the mustQuery-panic contract resolvers rely on), (4) the binding's
10
+ // UTF-16 offsets don't corrupt nodeText, and (5) .ts and .tsx are distinct langs.
11
+ // One shared parser is fine across tests: parse() is sync post-init, and each
12
+ // parse returns its own tree. We set the language per test.
13
+ function parserFor(lang) {
14
+ const p = new Parser();
15
+ p.setLanguage(lang);
16
+ return p;
17
+ }
18
+ describe("treesitter helper", () => {
19
+ it("loads the Go grammar and parses a sample without errors", async () => {
20
+ const go = await loadGrammar("go");
21
+ const root = parseTree(parserFor(go), "package pkg\nfunc Bar() {}\ntype T struct{}\n");
22
+ // A clean parse of valid Go must not report ERROR nodes — this is what the
23
+ // resolver maps to ParseFailedError, so a false positive here would reject
24
+ // valid source.
25
+ expect(root.hasError).toBe(false);
26
+ expect(root.type).toBe("source_file");
27
+ });
28
+ it("runs the verbatim Go symbol query and returns @name captures", async () => {
29
+ const go = await loadGrammar("go");
30
+ // This is the exact goSymbolQuery text from internal/codeq/go_queries.go —
31
+ // proving the node-type names match the upstream wasm grammar so the Go
32
+ // queries port verbatim (a core assumption of the whole port).
33
+ const q = compileQuery(go, `
34
+ (function_declaration name: (identifier) @name)
35
+ (type_declaration (type_spec name: (type_identifier) @name))
36
+ (var_declaration (var_spec name: (identifier) @name))
37
+ (const_declaration (const_spec name: (identifier) @name))
38
+ `);
39
+ try {
40
+ const root = parseTree(parserFor(go), "package pkg\nfunc Bar() {}\ntype Baz struct{}\nvar Qux = 1\nconst K = 2\n");
41
+ const names = runQuery(root, q)
42
+ .map((m) => m.captures["name"])
43
+ .filter((n) => n !== undefined)
44
+ .map(nodeText)
45
+ .sort();
46
+ // All four top-level declaration kinds must be captured by @name.
47
+ expect(names).toEqual(["Bar", "Baz", "K", "Qux"]);
48
+ }
49
+ finally {
50
+ q.delete();
51
+ }
52
+ });
53
+ it("groups multi-capture matches: captures has first, all has every node", async () => {
54
+ const go = await loadGrammar("go");
55
+ // The Go member query yields TWO captures per match (@owner and @name); this
56
+ // verifies runQuery's per-name grouping that resolvers depend on.
57
+ const q = compileQuery(go, `(method_declaration receiver: (parameter_list (parameter_declaration type: (type_identifier) @owner)) name: (field_identifier) @name)`);
58
+ try {
59
+ const root = parseTree(parserFor(go), "package pkg\ntype T struct{}\nfunc (t T) Do() {}\n");
60
+ const matches = runQuery(root, q);
61
+ expect(matches).toHaveLength(1);
62
+ const m = matches[0];
63
+ expect(m).toBeDefined();
64
+ expect(nodeText(m.captures["owner"])).toBe("T");
65
+ expect(nodeText(m.captures["name"])).toBe("Do");
66
+ // all[name] is an array even for a single capture.
67
+ expect(m.all["name"]).toHaveLength(1);
68
+ }
69
+ finally {
70
+ q.delete();
71
+ }
72
+ });
73
+ it("throws on a syntactically invalid query (mustQuery-panic contract)", async () => {
74
+ const go = await loadGrammar("go");
75
+ // A hard-coded bad query is a programming error; compileQuery must throw so
76
+ // resolver construction fails loudly rather than silently matching nothing.
77
+ expect(() => compileQuery(go, "(this is not a valid query")).toThrow();
78
+ });
79
+ it("reports hasError on invalid source (the ParseFailedError trigger)", async () => {
80
+ const go = await loadGrammar("go");
81
+ // Invalid Go yields ERROR nodes, NOT a parse() null/throw — the resolver
82
+ // checks hasError to decide ParseFailedError, so this must be true here.
83
+ const root = parseTree(parserFor(go), "package pkg\nfunc Bar( {{{ \n");
84
+ expect(root.hasError).toBe(true);
85
+ });
86
+ it("returns UTF-16-correct text via nodeText (no byte-offset corruption)", async () => {
87
+ const go = await loadGrammar("go");
88
+ // A multibyte identifier comment would desync []byte offsets; node.text is
89
+ // UTF-16 native, so nodeText must return the identifier intact even with a
90
+ // non-ASCII string literal earlier in the file.
91
+ const root = parseTree(parserFor(go), 'package pkg\nvar S = "café→ünïcödé"\nfunc Função() {}\n');
92
+ const q = compileQuery(go, `(function_declaration name: (identifier) @name)`);
93
+ try {
94
+ const names = runQuery(root, q).map((m) => nodeText(m.captures["name"]));
95
+ expect(names).toEqual(["Função"]);
96
+ }
97
+ finally {
98
+ q.delete();
99
+ }
100
+ });
101
+ it("loads .ts and .tsx as DISTINCT languages", async () => {
102
+ // WHY: a Query compiled for typescript cannot run on a tsx tree and vice
103
+ // versa; the dispatch routes .ts→typescript and .tsx→tsx. Confirm they are
104
+ // not the same object so per-grammar query sets stay separate.
105
+ const [ts, tsx] = await Promise.all([
106
+ loadGrammar("typescript"),
107
+ loadGrammar("tsx"),
108
+ ]);
109
+ expect(ts).not.toBe(tsx);
110
+ });
111
+ it("caches the loaded Language (same grammar → same object)", async () => {
112
+ // WHY: loadGrammar must read+load each wasm once (Go caches the Language at
113
+ // package init); a second call must return the cached Language, not reload.
114
+ const a = await loadGrammar("java");
115
+ const b = await loadGrammar("java");
116
+ expect(a).toBe(b);
117
+ });
118
+ });
@@ -0,0 +1,74 @@
1
+ // Port of internal/config/config.go.
2
+ //
3
+ // Document-config / manifest YAML types and their loader. YAML field spellings
4
+ // are preserved exactly ("boundary", "source"). Validation/parse failures
5
+ // throw; the parsed Config is returned data.
6
+ import { parse as parseYaml } from "yaml";
7
+ /**
8
+ * decodeScope turns a parsed YAML value (the value under `code:`/`doc:`/`api:`)
9
+ * into a Scope. A null value (present-but-empty section) yields an empty Scope,
10
+ * matching Go's "decode null into zero-value struct" behaviour. Non-list
11
+ * boundary/source values are coerced/ignored the way yaml.v3 leaves the field
12
+ * at its zero value when the node is absent.
13
+ */
14
+ function decodeScope(value) {
15
+ const scope = { boundary: [], source: [] };
16
+ if (value === null || value === undefined) {
17
+ return scope;
18
+ }
19
+ if (typeof value !== "object" || Array.isArray(value)) {
20
+ return scope;
21
+ }
22
+ const rec = value;
23
+ scope.boundary = toStringArray(rec["boundary"]);
24
+ scope.source = toStringArray(rec["source"]);
25
+ return scope;
26
+ }
27
+ /** toStringArray coerces a YAML sequence node into a string[] (empty if absent). */
28
+ function toStringArray(value) {
29
+ if (!Array.isArray(value)) {
30
+ return [];
31
+ }
32
+ return value.map((item) => String(item));
33
+ }
34
+ /**
35
+ * loadConfig reads .docgov/docgov.yaml from fsys and returns the parsed Config.
36
+ *
37
+ * A missing file propagates a NotExistError (the CLI maps this to its exit code
38
+ * in a later stage). A malformed YAML file throws a descriptive error. No
39
+ * section defaults are applied: absent sections remain undefined.
40
+ *
41
+ * The custom decoding distinguishes "key present with null value" (yields a
42
+ * non-undefined empty Scope) from "key absent" (leaves the Scope undefined),
43
+ * matching the Go UnmarshalYAML implementation. Unknown top-level keys are
44
+ * silently ignored.
45
+ */
46
+ export async function loadConfig(fsys) {
47
+ // readFile throws NotExistError for a missing file; let it propagate.
48
+ const data = await fsys.readFile(".docgov/docgov.yaml");
49
+ const text = new TextDecoder().decode(data);
50
+ // parseYaml throws on malformed YAML (descriptive error); let it propagate.
51
+ const parsed = parseYaml(text);
52
+ const cfg = {};
53
+ // An empty/null document is valid — no sections → all undefined.
54
+ if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) {
55
+ return cfg;
56
+ }
57
+ const root = parsed;
58
+ for (const key of Object.keys(root)) {
59
+ const scope = decodeScope(root[key]);
60
+ switch (key) {
61
+ case "code":
62
+ cfg.Code = scope;
63
+ break;
64
+ case "doc":
65
+ cfg.Doc = scope;
66
+ break;
67
+ case "api":
68
+ cfg.API = scope;
69
+ break;
70
+ // Unknown top-level keys are silently ignored per the plan.
71
+ }
72
+ }
73
+ return cfg;
74
+ }
@@ -0,0 +1,98 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { loadConfig } from "./config.js";
3
+ import { isNotExist, MapFS } from "./fs.js";
4
+ describe("loadConfig", () => {
5
+ // All three sections must load when present — this is the happy path the
6
+ // whole check pipeline depends on.
7
+ it("loads code, doc, and api scopes with their exact glob lists", async () => {
8
+ const fsys = new MapFS({
9
+ ".docgov/docgov.yaml": `
10
+ code:
11
+ boundary: [docs/**]
12
+ source: [internal/**, cmd/**]
13
+ doc:
14
+ boundary: [docs/**]
15
+ api:
16
+ boundary: [docs/api/contract/**]
17
+ source: [docs/api/openapi/**]
18
+ `,
19
+ });
20
+ const cfg = await loadConfig(fsys);
21
+ expect(cfg.Code).toBeDefined();
22
+ expect(cfg.Doc).toBeDefined();
23
+ expect(cfg.API).toBeDefined();
24
+ expect(cfg.Code?.boundary).toEqual(["docs/**"]);
25
+ expect(cfg.Code?.source).toEqual(["internal/**", "cmd/**"]);
26
+ expect(cfg.Doc?.boundary).toEqual(["docs/**"]);
27
+ expect(cfg.API?.boundary).toEqual(["docs/api/contract/**"]);
28
+ expect(cfg.API?.source).toEqual(["docs/api/openapi/**"]);
29
+ });
30
+ // An absent section must stay undefined so the pipeline can skip that guard
31
+ // entirely (undefined vs defined is the "is this guard configured?" signal).
32
+ it("leaves absent sections undefined", async () => {
33
+ const fsys = new MapFS({
34
+ ".docgov/docgov.yaml": `
35
+ doc:
36
+ boundary: [docs/**]
37
+ `,
38
+ });
39
+ const cfg = await loadConfig(fsys);
40
+ expect(cfg.Code).toBeUndefined();
41
+ expect(cfg.API).toBeUndefined();
42
+ expect(cfg.Doc).toBeDefined();
43
+ expect(cfg.Doc?.boundary).toEqual(["docs/**"]);
44
+ });
45
+ // A present-but-empty `code:` key must yield a DEFINED scope with empty
46
+ // arrays — this is the load-bearing distinction between "omitted" (skip the
47
+ // guard) and "explicitly configured but empty" (run the guard, match nothing).
48
+ it("yields a defined empty scope for a present-but-null section", async () => {
49
+ const fsys = new MapFS({
50
+ ".docgov/docgov.yaml": `
51
+ code:
52
+ `,
53
+ });
54
+ const cfg = await loadConfig(fsys);
55
+ expect(cfg.Code).toBeDefined();
56
+ expect(cfg.Code?.boundary).toEqual([]);
57
+ expect(cfg.Code?.source).toEqual([]);
58
+ });
59
+ // A missing config file must propagate a not-exist error so the CLI can map
60
+ // it to its dedicated exit code rather than crashing opaquely.
61
+ it("throws a not-exist error when the config file is missing", async () => {
62
+ const fsys = new MapFS({});
63
+ let thrown;
64
+ try {
65
+ await loadConfig(fsys);
66
+ }
67
+ catch (err) {
68
+ thrown = err;
69
+ }
70
+ expect(thrown).toBeDefined();
71
+ expect(isNotExist(thrown)).toBe(true);
72
+ });
73
+ // Malformed YAML must throw — a silently-empty config would let governed
74
+ // docs go unchecked.
75
+ it("throws on malformed YAML", async () => {
76
+ const fsys = new MapFS({
77
+ ".docgov/docgov.yaml": `
78
+ code: {boundary: [not closed
79
+ `,
80
+ });
81
+ await expect(loadConfig(fsys)).rejects.toThrow();
82
+ });
83
+ // Unknown top-level keys are silently ignored (forward-compat per the plan).
84
+ it("silently ignores unknown top-level keys", async () => {
85
+ const fsys = new MapFS({
86
+ ".docgov/docgov.yaml": `
87
+ future:
88
+ boundary: [x/**]
89
+ doc:
90
+ boundary: [docs/**]
91
+ `,
92
+ });
93
+ const cfg = await loadConfig(fsys);
94
+ expect(cfg.Doc?.boundary).toEqual(["docs/**"]);
95
+ expect(cfg.Code).toBeUndefined();
96
+ expect(cfg.API).toBeUndefined();
97
+ });
98
+ });
@@ -0,0 +1,116 @@
1
+ // Filesystem abstraction for the config package.
2
+ //
3
+ // FS RECONCILIATION (minimize-docgov port): the config package and the repo
4
+ // package originally carried two different FS abstractions — config's was
5
+ // synchronous (readFile + walk), repo's is asynchronous (readFile + readDir +
6
+ // sub). The check orchestrator composes both, so they are unified here onto
7
+ // repo's ASYNC FS: config re-exports repo's `FS`/`DirEntry`/`isNotExist`, and
8
+ // `MapFS` (config's in-memory test FS) now implements that async interface.
9
+ //
10
+ // All paths are forward-slash ("/") relative paths, following the io/fs
11
+ // convention. There is no leading "/" and no "." prefix ("." is the root).
12
+ export { isNotExist } from "../repo/fs.js";
13
+ /**
14
+ * NotExistError is the TS analogue of Go's fs.ErrNotExist. LoadConfig
15
+ * propagates it for a missing file so the CLI can map it to its dedicated exit
16
+ * code (the same way the Go code relies on errors.Is(err, fs.ErrNotExist)).
17
+ *
18
+ * Its `code` is set to "ENOENT" so repo's {@link isNotExist} (which keys on the
19
+ * Node error code) recognises it too — keeping a single not-exist predicate
20
+ * across the unified FS.
21
+ */
22
+ export class NotExistError extends Error {
23
+ /** Node-style error code so the shared isNotExist predicate matches. */
24
+ code = "ENOENT";
25
+ constructor(path) {
26
+ super(`file does not exist: ${path}`);
27
+ this.name = "NotExistError";
28
+ }
29
+ }
30
+ /** A directory entry of {@link MapFS}, implementing repo's DirEntry. */
31
+ class MapDirEntry {
32
+ entryName;
33
+ dir;
34
+ constructor(entryName, dir) {
35
+ this.entryName = entryName;
36
+ this.dir = dir;
37
+ }
38
+ name() {
39
+ return this.entryName;
40
+ }
41
+ isDir() {
42
+ return this.dir;
43
+ }
44
+ }
45
+ /**
46
+ * MapFS is an in-memory FS keyed by file path, mirroring testing/fstest.MapFS.
47
+ * It implements repo's async {@link FS} so config (and the check orchestrator)
48
+ * can drive it the same way they drive a real on-disk repo. Directory entries
49
+ * are synthesised from the file paths' parent segments.
50
+ */
51
+ export class MapFS {
52
+ files;
53
+ /** Slash-path of the subtree this view is rooted at ("" for the real root). */
54
+ root;
55
+ constructor(files, root = "") {
56
+ this.files = new Map();
57
+ for (const [name, data] of Object.entries(files)) {
58
+ this.files.set(name, typeof data === "string" ? new TextEncoder().encode(data) : data);
59
+ }
60
+ this.root = root;
61
+ }
62
+ /** Joins a name onto this view's root, normalising the "." root. */
63
+ resolve(name) {
64
+ const rel = name === "." || name === "" ? "" : name;
65
+ if (this.root === "") {
66
+ return rel;
67
+ }
68
+ return rel === "" ? this.root : `${this.root}/${rel}`;
69
+ }
70
+ async readFile(name) {
71
+ const key = this.resolve(name);
72
+ const data = this.files.get(key);
73
+ if (data === undefined) {
74
+ throw new NotExistError(key);
75
+ }
76
+ return data;
77
+ }
78
+ async readDir(name) {
79
+ const dir = this.resolve(name);
80
+ const prefix = dir === "" ? "" : `${dir}/`;
81
+ const childNames = new Map(); // name -> isDir
82
+ let dirExists = dir === "";
83
+ for (const filePath of this.files.keys()) {
84
+ if (dir !== "" && filePath === dir) {
85
+ // A file path that equals the requested dir means it's not a directory.
86
+ continue;
87
+ }
88
+ if (!filePath.startsWith(prefix)) {
89
+ continue;
90
+ }
91
+ dirExists = true;
92
+ const rest = filePath.slice(prefix.length);
93
+ const slash = rest.indexOf("/");
94
+ if (slash < 0) {
95
+ childNames.set(rest, false);
96
+ }
97
+ else {
98
+ const childDir = rest.slice(0, slash);
99
+ if (!childNames.has(childDir)) {
100
+ childNames.set(childDir, true);
101
+ }
102
+ }
103
+ }
104
+ if (!dirExists) {
105
+ throw new NotExistError(dir);
106
+ }
107
+ return [...childNames].map(([n, isDir]) => new MapDirEntry(n, isDir));
108
+ }
109
+ sub(name) {
110
+ const dir = this.resolve(name);
111
+ const view = new MapFS({}, dir);
112
+ // Share the same backing map so the sub-view reads the same files.
113
+ view.files = this.files;
114
+ return view;
115
+ }
116
+ }