docverity 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -69,7 +69,9 @@ docverity --no-llm
69
69
  | `--format <fmt>` | `pretty` (default), `json`, or `github`. |
70
70
 
71
71
  Docverity exits non-zero when it finds drift above the confidence threshold, so
72
- it fails CI the way a linter would.
72
+ it fails CI the way a linter would. Exit codes: `0` clean, `1` drift found,
73
+ `2` a configuration error (e.g. an invalid `--fail-confidence` or a missing doc
74
+ file) so a typo can never mask real drift with a green build.
73
75
 
74
76
  ## In CI (GitHub Actions)
75
77
 
package/dist/cli.js CHANGED
@@ -1,19 +1,19 @@
1
1
  #!/usr/bin/env node
2
2
  import { Command } from "commander";
3
3
  import path from "node:path";
4
+ import { existsSync } from "node:fs";
4
5
  import kleur from "kleur";
5
6
  import { extractClaims } from "./extract.js";
6
7
  import { verifyReference } from "./verify-reference.js";
7
- import { verifyLlm } from "./verify-llm.js";
8
8
  import { hasApiKey } from "./llm.js";
9
9
  import { printReport, printGithubAnnotations, toJson, summarize } from "./report.js";
10
10
  import { discoverDocs } from "./discover.js";
11
- import { runMcpServer } from "./mcp.js";
11
+ // verify-llm and mcp pull in heavy SDKs; they are imported lazily, only when used.
12
12
  const program = new Command();
13
13
  program
14
14
  .name("docverity")
15
15
  .description("Catch documentation that lies about your code.")
16
- .version("0.1.0");
16
+ .version("0.2.0");
17
17
  program
18
18
  .command("check", { isDefault: true })
19
19
  .description("Check docs for claims that no longer match the code.")
@@ -26,7 +26,26 @@ program
26
26
  .option("--format <fmt>", "output format: pretty | json | github", "pretty")
27
27
  .action(async (docs, rawOpts) => {
28
28
  const root = path.resolve(rawOpts.root);
29
- const docFiles = docs.length ? docs : discoverDocs(root);
29
+ // A non-numeric threshold must never silently pass CI. Exit 2 = config error
30
+ // (distinct from 1 = drift found).
31
+ const failConfidence = Number(rawOpts.failConfidence);
32
+ if (!Number.isFinite(failConfidence) || failConfidence < 0 || failConfidence > 1) {
33
+ console.error(kleur.red(`Invalid --fail-confidence: ${rawOpts.failConfidence} (expected a number between 0 and 1).`));
34
+ process.exit(2);
35
+ }
36
+ // Resolve explicit doc args relative to root (path.resolve handles
37
+ // absolute/cwd-relative); path.relative makes them root-relative for the
38
+ // extractor and verifier, fixing the old root+arg double-join.
39
+ const docFiles = docs.length
40
+ ? docs.map((d) => path.relative(root, path.resolve(d)))
41
+ : discoverDocs(root);
42
+ if (docs.length) {
43
+ const missing = docFiles.filter((d) => !existsSync(path.join(root, d)));
44
+ if (missing.length) {
45
+ console.error(kleur.red(`Doc file(s) not found: ${missing.join(", ")}`));
46
+ process.exit(2);
47
+ }
48
+ }
30
49
  if (!docFiles.length) {
31
50
  console.error(kleur.yellow("No documentation files found."));
32
51
  process.exit(0);
@@ -40,14 +59,21 @@ program
40
59
  docFiles,
41
60
  useLlm,
42
61
  model: rawOpts.model,
43
- failConfidence: Number(rawOpts.failConfidence),
62
+ failConfidence,
44
63
  strict: Boolean(rawOpts.strict),
45
64
  };
65
+ // Lazy-load the LLM engine (and its SDK) only when actually used.
66
+ const verifyLlm = useLlm ? (await import("./verify-llm.js")).verifyLlm : null;
46
67
  const verdicts = [];
47
68
  for (const doc of docFiles) {
48
- const claims = extractClaims(root, doc);
49
- verdicts.push(...(await verifyReference(root, claims)));
50
- if (useLlm) {
69
+ try {
70
+ verdicts.push(...(await verifyReference(root, extractClaims(root, doc))));
71
+ }
72
+ catch (err) {
73
+ console.error(kleur.yellow(`Cannot check ${doc}: ${err?.message ?? err}`));
74
+ continue;
75
+ }
76
+ if (verifyLlm) {
51
77
  try {
52
78
  verdicts.push(...(await verifyLlm(root, doc, opts.model)));
53
79
  }
@@ -74,6 +100,7 @@ program
74
100
  .command("mcp")
75
101
  .description("Run as an MCP server (stdio) so agents can check docs as a tool.")
76
102
  .action(async () => {
103
+ const { runMcpServer } = await import("./mcp.js");
77
104
  await runMcpServer();
78
105
  });
79
106
  program.parseAsync();
package/dist/extract.js CHANGED
@@ -4,7 +4,9 @@ import path from "node:path";
4
4
  // deterministically against the source tree.
5
5
  const FLAG_RE = /(^|[\s(`"'])(--[a-zA-Z][a-zA-Z0-9-]+)/g;
6
6
  const ENV_RE = /\b([A-Z][A-Z0-9]*(?:_[A-Z0-9]+){1,})\b/g;
7
- const PATH_RE = /([\w./-]+\/[\w./-]+\.[a-zA-Z0-9]+|[\w-]+\.[a-zA-Z]{2,4})/g;
7
+ // Either a slash path (a/b.ext) or a (possibly multi-dot) filename (app.config.ts).
8
+ // The multi-dot form keeps whole filenames intact instead of fragmenting them.
9
+ const PATH_RE = /([\w./-]+\/[\w./-]+\.[a-zA-Z0-9]+|[\w-]+(?:\.[\w-]+)*\.[a-zA-Z][a-zA-Z0-9]{0,8})\b/g;
8
10
  // Common English ALL_CAPS that are not env vars.
9
11
  const ENV_STOPWORDS = new Set([
10
12
  "NOTE",
@@ -20,8 +22,19 @@ const ENV_STOPWORDS = new Set([
20
22
  "MIT",
21
23
  "README",
22
24
  ]);
23
- // File-ish tokens that are usually prose, not real paths.
24
- const PATH_STOPWORDS = new Set(["e.g.", "i.e.", "etc.", "vs.", "a.k.a."]);
25
+ // File-ish tokens that are usually prose, not real paths. Both the
26
+ // trailing-dot and bare forms, since PATH_RE can match either.
27
+ const PATH_STOPWORDS = new Set([
28
+ "e.g.",
29
+ "i.e.",
30
+ "etc.",
31
+ "vs.",
32
+ "a.k.a.",
33
+ "e.g",
34
+ "i.e",
35
+ "a.k.a",
36
+ "vs",
37
+ ]);
25
38
  /** Extract deterministically-checkable claims from a single doc file. */
26
39
  export function extractClaims(root, docFile) {
27
40
  const abs = path.isAbsolute(docFile) ? docFile : path.join(root, docFile);
@@ -73,28 +86,29 @@ export function extractClaims(root, docFile) {
73
86
  }
74
87
  continue;
75
88
  }
76
- // Outside code: scan inline code spans plus the raw line for tokens.
89
+ // Flags are distinctive (the -- prefix), so they can be claimed from prose.
90
+ // Env vars, paths, and symbols only count inside inline code spans — raw
91
+ // prose has too many ALL_CAPS words and dotted phrases to scan safely.
77
92
  const inlineSpans = [...line.matchAll(/`([^`]+)`/g)].map((m) => m[1]);
78
- const scanText = line;
79
- for (const m of scanText.matchAll(FLAG_RE)) {
93
+ for (const m of line.matchAll(FLAG_RE)) {
80
94
  const flag = m[2];
81
95
  push("flag", lineNo, flag, `the CLI flag ${flag} exists`, [flag]);
82
96
  }
83
- for (const m of scanText.matchAll(ENV_RE)) {
84
- const env = m[1];
85
- if (ENV_STOPWORDS.has(env))
86
- continue;
87
- push("env", lineNo, env, `the environment variable ${env} is used`, [env]);
88
- }
89
- // Only treat path-looking tokens inside inline code as path claims, to
90
- // avoid matching ordinary prose words with dots.
91
97
  for (const span of inlineSpans) {
98
+ for (const m of span.matchAll(ENV_RE)) {
99
+ const env = m[1];
100
+ if (ENV_STOPWORDS.has(env))
101
+ continue;
102
+ push("env", lineNo, env, `the environment variable ${env} is used`, [env]);
103
+ }
92
104
  for (const m of span.matchAll(PATH_RE)) {
93
105
  const p = m[1];
94
106
  if (PATH_STOPWORDS.has(p))
95
107
  continue;
96
108
  if (p.startsWith("--"))
97
109
  continue;
110
+ if (p.startsWith("/"))
111
+ continue; // absolute/home path, not a repo file
98
112
  push("file", lineNo, p, `the path ${p} exists`, [p]);
99
113
  }
100
114
  // A bare identifier in backticks used like a function call.
package/dist/llm.js CHANGED
@@ -1,11 +1,13 @@
1
- import Anthropic from "@anthropic-ai/sdk";
1
+ // The Anthropic SDK is imported lazily so the default deterministic path (and
2
+ // `npx docverity` cold start) never pays to load it.
2
3
  let client = null;
3
4
  export function hasApiKey() {
4
5
  return Boolean(process.env.ANTHROPIC_API_KEY || process.env.ANTHROPIC_AUTH_TOKEN);
5
6
  }
6
- function getClient() {
7
+ async function getClient() {
7
8
  if (client)
8
9
  return client;
10
+ const { default: Anthropic } = await import("@anthropic-ai/sdk");
9
11
  // Prefer an API key; otherwise fall back to a Bearer/OAuth token (e.g. from
10
12
  // `ant auth login`), which needs the oauth beta header on every request.
11
13
  if (!process.env.ANTHROPIC_API_KEY && process.env.ANTHROPIC_AUTH_TOKEN) {
@@ -21,11 +23,9 @@ function getClient() {
21
23
  }
22
24
  /**
23
25
  * Call the model with a forced JSON schema and return the parsed object.
24
- * Uses output_config.format so the first text block is guaranteed valid JSON.
26
+ * output_config.format guarantees the first text block is valid JSON.
25
27
  */
26
28
  export async function structuredCall(model, system, user, schema) {
27
- // Built as an untyped param: `adaptive` thinking and `output_config` are
28
- // supported by the API but newer than this SDK version's type definitions.
29
29
  const params = {
30
30
  model,
31
31
  max_tokens: 8000,
@@ -34,9 +34,15 @@ export async function structuredCall(model, system, user, schema) {
34
34
  messages: [{ role: "user", content: user }],
35
35
  output_config: { format: { type: "json_schema", schema } },
36
36
  };
37
- const res = await getClient().messages.create(params);
37
+ const c = await getClient();
38
+ const res = await c.messages.create(params);
38
39
  const block = res.content.find((b) => b.type === "text");
39
40
  if (!block)
40
41
  throw new Error("Model returned no text content.");
41
- return JSON.parse(block.text);
42
+ try {
43
+ return JSON.parse(block.text);
44
+ }
45
+ catch {
46
+ throw new Error("LLM returned truncated or non-JSON output (may have exceeded max_tokens).");
47
+ }
42
48
  }
package/dist/mcp.js CHANGED
@@ -45,14 +45,19 @@ async function runCheck(args) {
45
45
  const wantLlm = Boolean(args.llm);
46
46
  const useLlm = wantLlm && hasApiKey();
47
47
  const verdicts = [];
48
+ let llmRan = false;
49
+ let llmError;
48
50
  for (const doc of docFiles) {
49
51
  verdicts.push(...(await verifyReference(root, extractClaims(root, doc))));
50
52
  if (useLlm) {
51
53
  try {
52
54
  verdicts.push(...(await verifyLlm(root, doc, "claude-opus-4-8")));
55
+ llmRan = true;
53
56
  }
54
- catch {
55
- // Surface as a note rather than failing the whole call.
57
+ catch (err) {
58
+ // Don't fail the whole tool call; surface it as a note so the agent
59
+ // knows it got deterministic-only results, not a clean pass.
60
+ llmError = err?.message ?? String(err);
56
61
  }
57
62
  }
58
63
  }
@@ -67,9 +72,9 @@ async function runCheck(args) {
67
72
  drifted++;
68
73
  else
69
74
  unverifiable++;
70
- const include = (v.status === "drifted" && v.confidence >= failConfidence) ||
71
- v.status === "unverifiable";
72
- if (!include || v.status === "ok")
75
+ // Only surface actionable drift; unverifiable claims stay in the counts but
76
+ // would be noise for an agent to act on.
77
+ if (!(v.status === "drifted" && v.confidence >= failConfidence))
73
78
  continue;
74
79
  findings.push({
75
80
  doc: v.claim.docFile,
@@ -93,11 +98,17 @@ async function runCheck(args) {
93
98
  const truncated = findings.length > MAX_FINDINGS;
94
99
  const shown = findings.slice(0, MAX_FINDINGS);
95
100
  let note;
101
+ const addNote = (s) => {
102
+ note = note ? `${note} ${s}` : s;
103
+ };
96
104
  if (wantLlm && !hasApiKey()) {
97
- note = "llm=true was requested but no ANTHROPIC_API_KEY/ANTHROPIC_AUTH_TOKEN is set; ran deterministic checks only.";
105
+ addNote("llm=true was requested but no ANTHROPIC_API_KEY/ANTHROPIC_AUTH_TOKEN is set; ran deterministic checks only.");
106
+ }
107
+ if (llmError) {
108
+ addNote(`LLM prose verifier failed: ${llmError}; reported deterministic results only.`);
98
109
  }
99
110
  if (truncated) {
100
- note = `${note ? note + " " : ""}Showing ${MAX_FINDINGS} of ${findings.length} findings.`;
111
+ addNote(`Showing ${MAX_FINDINGS} of ${findings.length} findings.`);
101
112
  }
102
113
  return {
103
114
  summary: {
@@ -105,7 +116,7 @@ async function runCheck(args) {
105
116
  ok,
106
117
  drifted,
107
118
  unverifiable,
108
- engine: useLlm ? "reference+llm" : "reference",
119
+ engine: useLlm && llmRan ? "reference+llm" : "reference",
109
120
  },
110
121
  findings: shown,
111
122
  note,
@@ -113,7 +124,7 @@ async function runCheck(args) {
113
124
  }
114
125
  /** Start the stdio MCP server. Only protocol messages go to stdout. */
115
126
  export async function runMcpServer() {
116
- const server = new Server({ name: "docverity", version: "0.1.0" }, { capabilities: { tools: {} } });
127
+ const server = new Server({ name: "docverity", version: "0.2.0" }, { capabilities: { tools: {} } });
117
128
  server.setRequestHandler(ListToolsRequestSchema, async () => ({
118
129
  tools: [
119
130
  {
@@ -130,15 +141,25 @@ export async function runMcpServer() {
130
141
  content: [{ type: "text", text: `Unknown tool: ${req.params.name}` }],
131
142
  };
132
143
  }
133
- const result = await runCheck((req.params.arguments ?? {}));
134
- const headline = result.findings.length === 0
135
- ? "No doc drift detected."
136
- : `${result.findings.length} documentation claim(s) need attention.`;
137
- return {
138
- content: [
139
- { type: "text", text: `${headline}\n\n${JSON.stringify(result, null, 2)}` },
140
- ],
141
- };
144
+ try {
145
+ const result = await runCheck((req.params.arguments ?? {}));
146
+ const headline = result.findings.length === 0
147
+ ? "No doc drift detected."
148
+ : `${result.findings.length} documentation claim(s) need attention.`;
149
+ return {
150
+ content: [
151
+ { type: "text", text: `${headline}\n\n${JSON.stringify(result, null, 2)}` },
152
+ ],
153
+ };
154
+ }
155
+ catch (err) {
156
+ return {
157
+ isError: true,
158
+ content: [
159
+ { type: "text", text: `docverity check failed: ${err?.message ?? err}` },
160
+ ],
161
+ };
162
+ }
142
163
  });
143
164
  await server.connect(new StdioServerTransport());
144
165
  }
package/dist/report.js CHANGED
@@ -1,5 +1,10 @@
1
1
  import kleur from "kleur";
2
+ /** A non-finite threshold must never silently pass all drift; fall back to 0.7. */
3
+ export function effectiveFailConfidence(opts) {
4
+ return Number.isFinite(opts.failConfidence) ? opts.failConfidence : 0.7;
5
+ }
2
6
  export function summarize(verdicts, opts) {
7
+ const failConfidence = effectiveFailConfidence(opts);
3
8
  let ok = 0;
4
9
  let drifted = 0;
5
10
  let unverifiable = 0;
@@ -9,7 +14,7 @@ export function summarize(verdicts, opts) {
9
14
  ok++;
10
15
  else if (v.status === "drifted") {
11
16
  drifted++;
12
- if (v.confidence >= opts.failConfidence)
17
+ if (v.confidence >= failConfidence)
13
18
  failures.push(v);
14
19
  }
15
20
  else {
@@ -23,8 +28,9 @@ export function summarize(verdicts, opts) {
23
28
  /** Pretty terminal report. Returns true if the check should fail the build. */
24
29
  export function printReport(verdicts, opts) {
25
30
  const summary = summarize(verdicts, opts);
31
+ const failConfidence = effectiveFailConfidence(opts);
26
32
  const drifts = verdicts
27
- .filter((v) => v.status === "drifted" && v.confidence >= opts.failConfidence)
33
+ .filter((v) => v.status === "drifted" && v.confidence >= failConfidence)
28
34
  .sort((a, b) => b.confidence - a.confidence);
29
35
  if (drifts.length === 0) {
30
36
  console.log(kleur.green(`\n✓ No doc drift detected.`) +
@@ -51,13 +57,18 @@ export function printReport(verdicts, opts) {
51
57
  console.log(kleur.dim(`${summary.ok} ok · ${summary.drifted} drifted · ${summary.unverifiable} unverifiable\n`));
52
58
  return true;
53
59
  }
60
+ // GitHub workflow commands need %/CR/LF escaped in data, and additionally
61
+ // :/, escaped in property values, or the annotation truncates or mis-targets.
62
+ const escData = (s) => s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A");
63
+ const escProp = (s) => escData(s).replace(/:/g, "%3A").replace(/,/g, "%2C");
54
64
  /** GitHub Actions workflow-command annotations. */
55
65
  export function printGithubAnnotations(verdicts, opts) {
66
+ const failConfidence = effectiveFailConfidence(opts);
56
67
  for (const v of verdicts) {
57
- if (v.status !== "drifted" || v.confidence < opts.failConfidence)
68
+ if (v.status !== "drifted" || v.confidence < failConfidence)
58
69
  continue;
59
- const msg = `${v.claim.text} — ${v.explanation}`.replace(/\n/g, " ");
60
- console.log(`::error file=${v.claim.docFile},line=${v.claim.line}::doc drift: ${msg}`);
70
+ const msg = `doc drift: ${v.claim.text} — ${v.explanation}`;
71
+ console.log(`::error file=${escProp(v.claim.docFile)},line=${v.claim.line},title=docverity::${escData(msg)}`);
61
72
  }
62
73
  }
63
74
  export function toJson(verdicts, opts) {
package/dist/search.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { execFile } from "node:child_process";
2
2
  import { promisify } from "node:util";
3
- import { existsSync } from "node:fs";
3
+ import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
4
4
  import path from "node:path";
5
5
  const execFileAsync = promisify(execFile);
6
6
  // Directories never worth searching for evidence of documented behavior.
@@ -23,6 +23,12 @@ function isDocFile(file) {
23
23
  const lower = file.toLowerCase();
24
24
  return DOC_EXTENSIONS.some((ext) => lower.endsWith(ext));
25
25
  }
26
+ function escapeRegex(s) {
27
+ return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
28
+ }
29
+ function flagPattern(token) {
30
+ return `(^|[^A-Za-z0-9-])${escapeRegex(token)}([^A-Za-z0-9-]|$)`;
31
+ }
26
32
  let rgChecked = false;
27
33
  let rgAvailable = false;
28
34
  async function hasRipgrep() {
@@ -38,43 +44,57 @@ async function hasRipgrep() {
38
44
  }
39
45
  return rgAvailable;
40
46
  }
41
- /**
42
- * Search the repo for a literal string. Returns up to `limit` evidence hits.
43
- * Uses ripgrep when available, falling back to a Node-based walk otherwise.
44
- */
45
- export async function searchLiteral(root, needle, limit = 8) {
47
+ /** Plain substring search. Used to gather evidence for the LLM engine. */
48
+ export function searchLiteral(root, needle, limit = 8) {
49
+ return runSearch(root, needle, "literal", limit);
50
+ }
51
+ /** Boundary-aware search used by the deterministic verifier. */
52
+ export function searchToken(root, token, mode, limit = 8) {
53
+ return runSearch(root, token, mode, limit);
54
+ }
55
+ async function runSearch(root, needle, mode, limit) {
46
56
  if (!needle.trim())
47
57
  return [];
48
- if (await hasRipgrep()) {
49
- const args = [
50
- "--fixed-strings",
51
- "--line-number",
52
- "--no-heading",
53
- "--color",
54
- "never",
55
- "--max-count",
56
- String(limit),
57
- ];
58
- for (const dir of IGNORE_DIRS)
59
- args.push("--glob", `!${dir}/`);
60
- for (const ext of DOC_EXTENSIONS)
61
- args.push("--glob", `!*${ext}`);
62
- args.push("--", needle, ".");
63
- try {
64
- const { stdout } = await execFileAsync("rg", args, {
65
- cwd: root,
66
- maxBuffer: 8 * 1024 * 1024,
67
- });
68
- return parseRgOutput(stdout, limit);
69
- }
70
- catch (err) {
71
- // rg exits 1 when there are no matches; that is not an error for us.
72
- if (err?.code === 1)
73
- return [];
74
- throw err;
75
- }
58
+ if (await hasRipgrep())
59
+ return rgSearch(root, needle, mode, limit);
60
+ return fallbackSearch(root, needle, mode, limit);
61
+ }
62
+ async function rgSearch(root, needle, mode, limit) {
63
+ const args = [
64
+ "--line-number",
65
+ "--no-heading",
66
+ "--color",
67
+ "never",
68
+ "--max-count",
69
+ String(limit),
70
+ ];
71
+ for (const dir of IGNORE_DIRS)
72
+ args.push("--glob", `!${dir}/`);
73
+ for (const ext of DOC_EXTENSIONS)
74
+ args.push("--glob", `!*${ext}`);
75
+ if (mode === "flag") {
76
+ args.push("--regexp", flagPattern(needle));
77
+ }
78
+ else if (mode === "word") {
79
+ args.push("--fixed-strings", "--word-regexp", "--regexp", needle);
80
+ }
81
+ else {
82
+ args.push("--fixed-strings", "--regexp", needle);
83
+ }
84
+ args.push("--", ".");
85
+ try {
86
+ const { stdout } = await execFileAsync("rg", args, {
87
+ cwd: root,
88
+ maxBuffer: 8 * 1024 * 1024,
89
+ });
90
+ return parseRgOutput(stdout, limit);
91
+ }
92
+ catch (err) {
93
+ // rg exits 1 when there are no matches; that is not an error for us.
94
+ if (err?.code === 1)
95
+ return [];
96
+ throw err;
76
97
  }
77
- return fallbackSearch(root, needle, limit);
78
98
  }
79
99
  function parseRgOutput(stdout, limit) {
80
100
  const out = [];
@@ -86,7 +106,7 @@ function parseRgOutput(stdout, limit) {
86
106
  const second = raw.indexOf(":", first + 1);
87
107
  if (first < 0 || second < 0)
88
108
  continue;
89
- const file = raw.slice(0, first);
109
+ const file = raw.slice(0, first).replace(/^\.\//, "");
90
110
  const line = Number(raw.slice(first + 1, second));
91
111
  const snippet = raw.slice(second + 1).trim();
92
112
  out.push({ file, line, snippet: snippet.slice(0, 200) });
@@ -95,8 +115,16 @@ function parseRgOutput(stdout, limit) {
95
115
  }
96
116
  return out;
97
117
  }
98
- import { readdirSync, readFileSync, statSync } from "node:fs";
99
- function fallbackSearch(root, needle, limit) {
118
+ function matcherFor(needle, mode) {
119
+ if (mode === "literal")
120
+ return (line) => line.includes(needle);
121
+ const re = mode === "flag"
122
+ ? new RegExp(flagPattern(needle))
123
+ : new RegExp(`\\b${escapeRegex(needle)}\\b`);
124
+ return (line) => re.test(line);
125
+ }
126
+ function fallbackSearch(root, needle, mode, limit) {
127
+ const matches = matcherFor(needle, mode);
100
128
  const out = [];
101
129
  const walk = (dir) => {
102
130
  if (out.length >= limit)
@@ -134,7 +162,7 @@ function fallbackSearch(root, needle, limit) {
134
162
  }
135
163
  const lines = content.split("\n");
136
164
  for (let i = 0; i < lines.length; i++) {
137
- if (lines[i].includes(needle)) {
165
+ if (matches(lines[i])) {
138
166
  out.push({
139
167
  file: path.relative(root, full),
140
168
  line: i + 1,
@@ -150,8 +178,13 @@ function fallbackSearch(root, needle, limit) {
150
178
  walk(root);
151
179
  return out;
152
180
  }
153
- /** Resolve a documented path claim against the filesystem. */
181
+ /** Resolve a documented path claim against the filesystem, contained to the repo. */
154
182
  export function fileExists(root, relPath) {
155
183
  const clean = relPath.replace(/^\.\//, "").replace(/[`*]/g, "");
156
- return existsSync(path.join(root, clean));
184
+ const base = path.resolve(root);
185
+ const target = path.resolve(base, clean);
186
+ // Don't let "../../etc/passwd" style references probe outside the repo.
187
+ if (target !== base && !target.startsWith(base + path.sep))
188
+ return false;
189
+ return existsSync(target);
157
190
  }
@@ -6,6 +6,8 @@ const EXTRACT_SYSTEM = `You extract verifiable factual claims that a documentati
6
6
 
7
7
  A claim is a specific, checkable assertion: a default value, a return type, a parameter name, a config key, an install step, a behavior ("by default X happens"), an output shape. Ignore marketing copy, aspirational statements, and anything not checkable against source code.
8
8
 
9
+ Do NOT emit bare flag/env-var/file-path/function-name existence claims (e.g. "the --json flag exists", "see src/foo.ts") — a separate deterministic engine already checks those, and re-reporting them causes duplicates. Focus on semantic prose claims: values, behaviors, types, defaults.
10
+
9
11
  For each claim, provide search terms (identifiers, strings, file names) that would help locate the relevant code. Be precise; prefer fewer high-quality claims over many vague ones.`;
10
12
  const VERIFY_SYSTEM = `You verify whether documentation claims still match the codebase, given source-code evidence.
11
13
 
@@ -14,7 +16,7 @@ For each claim, decide:
14
16
  - "drifted": the evidence contradicts the claim (the docs are now wrong).
15
17
  - "unverifiable": the evidence is insufficient to decide.
16
18
 
17
- Be conservative. Only mark "drifted" when the evidence clearly contradicts the claim. When unsure, choose "unverifiable". A false "drifted" verdict is worse than a missed one. Give the specific contradiction and, when drifted, a concrete suggested doc fix.`;
19
+ Be conservative. Only mark "drifted" when the evidence affirmatively shows a DIFFERENT value or behavior than the claim states. Absence of evidence is NEVER drift: if the evidence array is empty, or does not actually mention the claim's subject, you MUST return "unverifiable". A false "drifted" verdict is worse than a missed one. Give the specific contradiction and, when drifted, a concrete suggested doc fix.`;
18
20
  const EXTRACT_SCHEMA = {
19
21
  type: "object",
20
22
  additionalProperties: false,
@@ -88,15 +90,36 @@ export async function verifyLlm(root, docFile, model) {
88
90
  }
89
91
  evidenceByClaim.set(claim.id, dedupeEvidence(found).slice(0, 8));
90
92
  }
91
- const verifyPayload = claims.map((c) => ({
93
+ const out = [];
94
+ // Claims with no located evidence are NOT sent to the model: handing it an
95
+ // empty evidence array invites "absent, therefore drifted" hallucinations.
96
+ // Without evidence the claim is unverifiable by definition.
97
+ const grounded = [];
98
+ for (const claim of claims) {
99
+ if ((evidenceByClaim.get(claim.id) ?? []).length === 0) {
100
+ out.push({
101
+ claim,
102
+ status: "unverifiable",
103
+ confidence: 0.3,
104
+ explanation: "No code evidence located for this claim.",
105
+ evidence: [],
106
+ engine: "llm",
107
+ });
108
+ }
109
+ else {
110
+ grounded.push(claim);
111
+ }
112
+ }
113
+ if (grounded.length === 0)
114
+ return out;
115
+ const verifyPayload = grounded.map((c) => ({
92
116
  id: c.id,
93
117
  claim: c.assertion,
94
118
  docText: c.text,
95
119
  evidence: evidenceByClaim.get(c.id) ?? [],
96
120
  }));
97
121
  const { verdicts: rawVerdicts } = await structuredCall(model, VERIFY_SYSTEM, `Verify these claims against the evidence:\n\n${JSON.stringify(verifyPayload, null, 2)}`, VERIFY_SCHEMA);
98
- const byId = new Map(claims.map((c) => [c.id, c]));
99
- const out = [];
122
+ const byId = new Map(grounded.map((c) => [c.id, c]));
100
123
  for (const v of rawVerdicts) {
101
124
  const claim = byId.get(v.id);
102
125
  if (!claim)
@@ -1,4 +1,4 @@
1
- import { searchLiteral, fileExists } from "./search.js";
1
+ import { searchToken, fileExists } from "./search.js";
2
2
  /**
3
3
  * The deterministic engine: verify each claim by looking for hard evidence in
4
4
  * the source tree. No model, no API key. High precision by design — when in
@@ -15,26 +15,38 @@ async function verifyOne(root, claim) {
15
15
  const base = { claim, evidence: [], engine: "reference" };
16
16
  switch (claim.kind) {
17
17
  case "file": {
18
- const exists = fileExists(root, claim.text);
19
- return exists
20
- ? {
18
+ if (fileExists(root, claim.text)) {
19
+ return {
21
20
  ...base,
22
21
  status: "ok",
23
22
  confidence: 0.95,
24
23
  explanation: `${claim.text} exists on disk.`,
25
- }
26
- : {
24
+ };
25
+ }
26
+ // A bare filename with no path separator is often a library or framework
27
+ // name in code formatting (Node.js, config.js), not a repo file. Don't
28
+ // assert drift on those; downgrade to unverifiable.
29
+ if (!claim.text.includes("/")) {
30
+ return {
27
31
  ...base,
28
- status: "drifted",
29
- confidence: 0.9,
30
- explanation: `The docs reference ${claim.text}, but no such file or directory exists.`,
31
- suggestedFix: `Update or remove the reference to ${claim.text}.`,
32
+ status: "unverifiable",
33
+ confidence: 0.3,
34
+ explanation: `${claim.text} is not a file in the repo; it may be a library or framework name rather than a path.`,
32
35
  };
36
+ }
37
+ return {
38
+ ...base,
39
+ status: "drifted",
40
+ confidence: 0.9,
41
+ explanation: `The docs reference ${claim.text}, but no such file or directory exists.`,
42
+ suggestedFix: `Update or remove the reference to ${claim.text}.`,
43
+ };
33
44
  }
34
45
  case "flag":
35
46
  case "env":
36
47
  case "symbol": {
37
- const hits = await searchLiteral(root, claim.text);
48
+ const mode = claim.kind === "flag" ? "flag" : "word";
49
+ const hits = await searchToken(root, claim.text, mode);
38
50
  if (hits.length > 0) {
39
51
  return {
40
52
  ...base,
@@ -49,10 +61,12 @@ async function verifyOne(root, claim) {
49
61
  : claim.kind === "env"
50
62
  ? "environment variable"
51
63
  : "symbol";
64
+ // Boundary-aware search (below) means a hit is a real, whole-token match,
65
+ // so a miss is trustworthy enough to fail CI on, symbols included.
52
66
  return {
53
67
  ...base,
54
68
  status: "drifted",
55
- confidence: claim.kind === "symbol" ? 0.6 : 0.8,
69
+ confidence: 0.8,
56
70
  explanation: `The docs mention the ${noun} ${claim.text}, but it does not appear anywhere in the source.`,
57
71
  suggestedFix: `Verify ${claim.text} still exists; it may have been renamed or removed.`,
58
72
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "docverity",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "Catch documentation that lies about your code. Verify that your docs' claims still match the source, in CI.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -44,7 +44,7 @@
44
44
  "url": "https://github.com/deveshagarwal/docverity/issues"
45
45
  },
46
46
  "dependencies": {
47
- "@anthropic-ai/sdk": "^0.70.0",
47
+ "@anthropic-ai/sdk": "^0.106.0",
48
48
  "@modelcontextprotocol/sdk": "^1.29.0",
49
49
  "commander": "^12.1.0",
50
50
  "kleur": "^4.1.5"