docverity 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/dist/cli.js +35 -8
- package/dist/extract.js +28 -14
- package/dist/llm.js +13 -7
- package/dist/mcp.js +39 -18
- package/dist/report.js +16 -5
- package/dist/search.js +74 -41
- package/dist/verify-llm.js +27 -4
- package/dist/verify-reference.js +26 -12
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -69,7 +69,9 @@ docverity --no-llm
|
|
|
69
69
|
| `--format <fmt>` | `pretty` (default), `json`, or `github`. |
|
|
70
70
|
|
|
71
71
|
Docverity exits non-zero when it finds drift above the confidence threshold, so
|
|
72
|
-
it fails CI the way a linter would.
|
|
72
|
+
it fails CI the way a linter would. Exit codes: `0` clean, `1` drift found,
|
|
73
|
+
`2` a configuration error (e.g. an invalid `--fail-confidence` or a missing doc
|
|
74
|
+
file) so a typo can never mask real drift with a green build.
|
|
73
75
|
|
|
74
76
|
## In CI (GitHub Actions)
|
|
75
77
|
|
package/dist/cli.js
CHANGED
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { Command } from "commander";
|
|
3
3
|
import path from "node:path";
|
|
4
|
+
import { existsSync } from "node:fs";
|
|
4
5
|
import kleur from "kleur";
|
|
5
6
|
import { extractClaims } from "./extract.js";
|
|
6
7
|
import { verifyReference } from "./verify-reference.js";
|
|
7
|
-
import { verifyLlm } from "./verify-llm.js";
|
|
8
8
|
import { hasApiKey } from "./llm.js";
|
|
9
9
|
import { printReport, printGithubAnnotations, toJson, summarize } from "./report.js";
|
|
10
10
|
import { discoverDocs } from "./discover.js";
|
|
11
|
-
|
|
11
|
+
// verify-llm and mcp pull in heavy SDKs; they are imported lazily, only when used.
|
|
12
12
|
const program = new Command();
|
|
13
13
|
program
|
|
14
14
|
.name("docverity")
|
|
15
15
|
.description("Catch documentation that lies about your code.")
|
|
16
|
-
.version("0.
|
|
16
|
+
.version("0.2.0");
|
|
17
17
|
program
|
|
18
18
|
.command("check", { isDefault: true })
|
|
19
19
|
.description("Check docs for claims that no longer match the code.")
|
|
@@ -26,7 +26,26 @@ program
|
|
|
26
26
|
.option("--format <fmt>", "output format: pretty | json | github", "pretty")
|
|
27
27
|
.action(async (docs, rawOpts) => {
|
|
28
28
|
const root = path.resolve(rawOpts.root);
|
|
29
|
-
|
|
29
|
+
// A non-numeric threshold must never silently pass CI. Exit 2 = config error
|
|
30
|
+
// (distinct from 1 = drift found).
|
|
31
|
+
const failConfidence = Number(rawOpts.failConfidence);
|
|
32
|
+
if (!Number.isFinite(failConfidence) || failConfidence < 0 || failConfidence > 1) {
|
|
33
|
+
console.error(kleur.red(`Invalid --fail-confidence: ${rawOpts.failConfidence} (expected a number between 0 and 1).`));
|
|
34
|
+
process.exit(2);
|
|
35
|
+
}
|
|
36
|
+
// Resolve explicit doc args relative to root (path.resolve handles
|
|
37
|
+
// absolute/cwd-relative); path.relative makes them root-relative for the
|
|
38
|
+
// extractor and verifier, fixing the old root+arg double-join.
|
|
39
|
+
const docFiles = docs.length
|
|
40
|
+
? docs.map((d) => path.relative(root, path.resolve(d)))
|
|
41
|
+
: discoverDocs(root);
|
|
42
|
+
if (docs.length) {
|
|
43
|
+
const missing = docFiles.filter((d) => !existsSync(path.join(root, d)));
|
|
44
|
+
if (missing.length) {
|
|
45
|
+
console.error(kleur.red(`Doc file(s) not found: ${missing.join(", ")}`));
|
|
46
|
+
process.exit(2);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
30
49
|
if (!docFiles.length) {
|
|
31
50
|
console.error(kleur.yellow("No documentation files found."));
|
|
32
51
|
process.exit(0);
|
|
@@ -40,14 +59,21 @@ program
|
|
|
40
59
|
docFiles,
|
|
41
60
|
useLlm,
|
|
42
61
|
model: rawOpts.model,
|
|
43
|
-
failConfidence
|
|
62
|
+
failConfidence,
|
|
44
63
|
strict: Boolean(rawOpts.strict),
|
|
45
64
|
};
|
|
65
|
+
// Lazy-load the LLM engine (and its SDK) only when actually used.
|
|
66
|
+
const verifyLlm = useLlm ? (await import("./verify-llm.js")).verifyLlm : null;
|
|
46
67
|
const verdicts = [];
|
|
47
68
|
for (const doc of docFiles) {
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
69
|
+
try {
|
|
70
|
+
verdicts.push(...(await verifyReference(root, extractClaims(root, doc))));
|
|
71
|
+
}
|
|
72
|
+
catch (err) {
|
|
73
|
+
console.error(kleur.yellow(`Cannot check ${doc}: ${err?.message ?? err}`));
|
|
74
|
+
continue;
|
|
75
|
+
}
|
|
76
|
+
if (verifyLlm) {
|
|
51
77
|
try {
|
|
52
78
|
verdicts.push(...(await verifyLlm(root, doc, opts.model)));
|
|
53
79
|
}
|
|
@@ -74,6 +100,7 @@ program
|
|
|
74
100
|
.command("mcp")
|
|
75
101
|
.description("Run as an MCP server (stdio) so agents can check docs as a tool.")
|
|
76
102
|
.action(async () => {
|
|
103
|
+
const { runMcpServer } = await import("./mcp.js");
|
|
77
104
|
await runMcpServer();
|
|
78
105
|
});
|
|
79
106
|
program.parseAsync();
|
package/dist/extract.js
CHANGED
|
@@ -4,7 +4,9 @@ import path from "node:path";
|
|
|
4
4
|
// deterministically against the source tree.
|
|
5
5
|
const FLAG_RE = /(^|[\s(`"'])(--[a-zA-Z][a-zA-Z0-9-]+)/g;
|
|
6
6
|
const ENV_RE = /\b([A-Z][A-Z0-9]*(?:_[A-Z0-9]+){1,})\b/g;
|
|
7
|
-
|
|
7
|
+
// Either a slash path (a/b.ext) or a (possibly multi-dot) filename (app.config.ts).
|
|
8
|
+
// The multi-dot form keeps whole filenames intact instead of fragmenting them.
|
|
9
|
+
const PATH_RE = /([\w./-]+\/[\w./-]+\.[a-zA-Z0-9]+|[\w-]+(?:\.[\w-]+)*\.[a-zA-Z][a-zA-Z0-9]{0,8})\b/g;
|
|
8
10
|
// Common English ALL_CAPS that are not env vars.
|
|
9
11
|
const ENV_STOPWORDS = new Set([
|
|
10
12
|
"NOTE",
|
|
@@ -20,8 +22,19 @@ const ENV_STOPWORDS = new Set([
|
|
|
20
22
|
"MIT",
|
|
21
23
|
"README",
|
|
22
24
|
]);
|
|
23
|
-
// File-ish tokens that are usually prose, not real paths.
|
|
24
|
-
|
|
25
|
+
// File-ish tokens that are usually prose, not real paths. Both the
|
|
26
|
+
// trailing-dot and bare forms, since PATH_RE can match either.
|
|
27
|
+
const PATH_STOPWORDS = new Set([
|
|
28
|
+
"e.g.",
|
|
29
|
+
"i.e.",
|
|
30
|
+
"etc.",
|
|
31
|
+
"vs.",
|
|
32
|
+
"a.k.a.",
|
|
33
|
+
"e.g",
|
|
34
|
+
"i.e",
|
|
35
|
+
"a.k.a",
|
|
36
|
+
"vs",
|
|
37
|
+
]);
|
|
25
38
|
/** Extract deterministically-checkable claims from a single doc file. */
|
|
26
39
|
export function extractClaims(root, docFile) {
|
|
27
40
|
const abs = path.isAbsolute(docFile) ? docFile : path.join(root, docFile);
|
|
@@ -73,28 +86,29 @@ export function extractClaims(root, docFile) {
|
|
|
73
86
|
}
|
|
74
87
|
continue;
|
|
75
88
|
}
|
|
76
|
-
//
|
|
89
|
+
// Flags are distinctive (the -- prefix), so they can be claimed from prose.
|
|
90
|
+
// Env vars, paths, and symbols only count inside inline code spans — raw
|
|
91
|
+
// prose has too many ALL_CAPS words and dotted phrases to scan safely.
|
|
77
92
|
const inlineSpans = [...line.matchAll(/`([^`]+)`/g)].map((m) => m[1]);
|
|
78
|
-
const
|
|
79
|
-
for (const m of scanText.matchAll(FLAG_RE)) {
|
|
93
|
+
for (const m of line.matchAll(FLAG_RE)) {
|
|
80
94
|
const flag = m[2];
|
|
81
95
|
push("flag", lineNo, flag, `the CLI flag ${flag} exists`, [flag]);
|
|
82
96
|
}
|
|
83
|
-
for (const m of scanText.matchAll(ENV_RE)) {
|
|
84
|
-
const env = m[1];
|
|
85
|
-
if (ENV_STOPWORDS.has(env))
|
|
86
|
-
continue;
|
|
87
|
-
push("env", lineNo, env, `the environment variable ${env} is used`, [env]);
|
|
88
|
-
}
|
|
89
|
-
// Only treat path-looking tokens inside inline code as path claims, to
|
|
90
|
-
// avoid matching ordinary prose words with dots.
|
|
91
97
|
for (const span of inlineSpans) {
|
|
98
|
+
for (const m of span.matchAll(ENV_RE)) {
|
|
99
|
+
const env = m[1];
|
|
100
|
+
if (ENV_STOPWORDS.has(env))
|
|
101
|
+
continue;
|
|
102
|
+
push("env", lineNo, env, `the environment variable ${env} is used`, [env]);
|
|
103
|
+
}
|
|
92
104
|
for (const m of span.matchAll(PATH_RE)) {
|
|
93
105
|
const p = m[1];
|
|
94
106
|
if (PATH_STOPWORDS.has(p))
|
|
95
107
|
continue;
|
|
96
108
|
if (p.startsWith("--"))
|
|
97
109
|
continue;
|
|
110
|
+
if (p.startsWith("/"))
|
|
111
|
+
continue; // absolute/home path, not a repo file
|
|
98
112
|
push("file", lineNo, p, `the path ${p} exists`, [p]);
|
|
99
113
|
}
|
|
100
114
|
// A bare identifier in backticks used like a function call.
|
package/dist/llm.js
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
|
-
|
|
1
|
+
// The Anthropic SDK is imported lazily so the default deterministic path (and
|
|
2
|
+
// `npx docverity` cold start) never pays to load it.
|
|
2
3
|
let client = null;
|
|
3
4
|
export function hasApiKey() {
|
|
4
5
|
return Boolean(process.env.ANTHROPIC_API_KEY || process.env.ANTHROPIC_AUTH_TOKEN);
|
|
5
6
|
}
|
|
6
|
-
function getClient() {
|
|
7
|
+
async function getClient() {
|
|
7
8
|
if (client)
|
|
8
9
|
return client;
|
|
10
|
+
const { default: Anthropic } = await import("@anthropic-ai/sdk");
|
|
9
11
|
// Prefer an API key; otherwise fall back to a Bearer/OAuth token (e.g. from
|
|
10
12
|
// `ant auth login`), which needs the oauth beta header on every request.
|
|
11
13
|
if (!process.env.ANTHROPIC_API_KEY && process.env.ANTHROPIC_AUTH_TOKEN) {
|
|
@@ -21,11 +23,9 @@ function getClient() {
|
|
|
21
23
|
}
|
|
22
24
|
/**
|
|
23
25
|
* Call the model with a forced JSON schema and return the parsed object.
|
|
24
|
-
*
|
|
26
|
+
* output_config.format guarantees the first text block is valid JSON.
|
|
25
27
|
*/
|
|
26
28
|
export async function structuredCall(model, system, user, schema) {
|
|
27
|
-
// Built as an untyped param: `adaptive` thinking and `output_config` are
|
|
28
|
-
// supported by the API but newer than this SDK version's type definitions.
|
|
29
29
|
const params = {
|
|
30
30
|
model,
|
|
31
31
|
max_tokens: 8000,
|
|
@@ -34,9 +34,15 @@ export async function structuredCall(model, system, user, schema) {
|
|
|
34
34
|
messages: [{ role: "user", content: user }],
|
|
35
35
|
output_config: { format: { type: "json_schema", schema } },
|
|
36
36
|
};
|
|
37
|
-
const
|
|
37
|
+
const c = await getClient();
|
|
38
|
+
const res = await c.messages.create(params);
|
|
38
39
|
const block = res.content.find((b) => b.type === "text");
|
|
39
40
|
if (!block)
|
|
40
41
|
throw new Error("Model returned no text content.");
|
|
41
|
-
|
|
42
|
+
try {
|
|
43
|
+
return JSON.parse(block.text);
|
|
44
|
+
}
|
|
45
|
+
catch {
|
|
46
|
+
throw new Error("LLM returned truncated or non-JSON output (may have exceeded max_tokens).");
|
|
47
|
+
}
|
|
42
48
|
}
|
package/dist/mcp.js
CHANGED
|
@@ -45,14 +45,19 @@ async function runCheck(args) {
|
|
|
45
45
|
const wantLlm = Boolean(args.llm);
|
|
46
46
|
const useLlm = wantLlm && hasApiKey();
|
|
47
47
|
const verdicts = [];
|
|
48
|
+
let llmRan = false;
|
|
49
|
+
let llmError;
|
|
48
50
|
for (const doc of docFiles) {
|
|
49
51
|
verdicts.push(...(await verifyReference(root, extractClaims(root, doc))));
|
|
50
52
|
if (useLlm) {
|
|
51
53
|
try {
|
|
52
54
|
verdicts.push(...(await verifyLlm(root, doc, "claude-opus-4-8")));
|
|
55
|
+
llmRan = true;
|
|
53
56
|
}
|
|
54
|
-
catch {
|
|
55
|
-
//
|
|
57
|
+
catch (err) {
|
|
58
|
+
// Don't fail the whole tool call; surface it as a note so the agent
|
|
59
|
+
// knows it got deterministic-only results, not a clean pass.
|
|
60
|
+
llmError = err?.message ?? String(err);
|
|
56
61
|
}
|
|
57
62
|
}
|
|
58
63
|
}
|
|
@@ -67,9 +72,9 @@ async function runCheck(args) {
|
|
|
67
72
|
drifted++;
|
|
68
73
|
else
|
|
69
74
|
unverifiable++;
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
if (!
|
|
75
|
+
// Only surface actionable drift; unverifiable claims stay in the counts but
|
|
76
|
+
// would be noise for an agent to act on.
|
|
77
|
+
if (!(v.status === "drifted" && v.confidence >= failConfidence))
|
|
73
78
|
continue;
|
|
74
79
|
findings.push({
|
|
75
80
|
doc: v.claim.docFile,
|
|
@@ -93,11 +98,17 @@ async function runCheck(args) {
|
|
|
93
98
|
const truncated = findings.length > MAX_FINDINGS;
|
|
94
99
|
const shown = findings.slice(0, MAX_FINDINGS);
|
|
95
100
|
let note;
|
|
101
|
+
const addNote = (s) => {
|
|
102
|
+
note = note ? `${note} ${s}` : s;
|
|
103
|
+
};
|
|
96
104
|
if (wantLlm && !hasApiKey()) {
|
|
97
|
-
|
|
105
|
+
addNote("llm=true was requested but no ANTHROPIC_API_KEY/ANTHROPIC_AUTH_TOKEN is set; ran deterministic checks only.");
|
|
106
|
+
}
|
|
107
|
+
if (llmError) {
|
|
108
|
+
addNote(`LLM prose verifier failed: ${llmError}; reported deterministic results only.`);
|
|
98
109
|
}
|
|
99
110
|
if (truncated) {
|
|
100
|
-
|
|
111
|
+
addNote(`Showing ${MAX_FINDINGS} of ${findings.length} findings.`);
|
|
101
112
|
}
|
|
102
113
|
return {
|
|
103
114
|
summary: {
|
|
@@ -105,7 +116,7 @@ async function runCheck(args) {
|
|
|
105
116
|
ok,
|
|
106
117
|
drifted,
|
|
107
118
|
unverifiable,
|
|
108
|
-
engine: useLlm ? "reference+llm" : "reference",
|
|
119
|
+
engine: useLlm && llmRan ? "reference+llm" : "reference",
|
|
109
120
|
},
|
|
110
121
|
findings: shown,
|
|
111
122
|
note,
|
|
@@ -113,7 +124,7 @@ async function runCheck(args) {
|
|
|
113
124
|
}
|
|
114
125
|
/** Start the stdio MCP server. Only protocol messages go to stdout. */
|
|
115
126
|
export async function runMcpServer() {
|
|
116
|
-
const server = new Server({ name: "docverity", version: "0.
|
|
127
|
+
const server = new Server({ name: "docverity", version: "0.2.0" }, { capabilities: { tools: {} } });
|
|
117
128
|
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
118
129
|
tools: [
|
|
119
130
|
{
|
|
@@ -130,15 +141,25 @@ export async function runMcpServer() {
|
|
|
130
141
|
content: [{ type: "text", text: `Unknown tool: ${req.params.name}` }],
|
|
131
142
|
};
|
|
132
143
|
}
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
144
|
+
try {
|
|
145
|
+
const result = await runCheck((req.params.arguments ?? {}));
|
|
146
|
+
const headline = result.findings.length === 0
|
|
147
|
+
? "No doc drift detected."
|
|
148
|
+
: `${result.findings.length} documentation claim(s) need attention.`;
|
|
149
|
+
return {
|
|
150
|
+
content: [
|
|
151
|
+
{ type: "text", text: `${headline}\n\n${JSON.stringify(result, null, 2)}` },
|
|
152
|
+
],
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
catch (err) {
|
|
156
|
+
return {
|
|
157
|
+
isError: true,
|
|
158
|
+
content: [
|
|
159
|
+
{ type: "text", text: `docverity check failed: ${err?.message ?? err}` },
|
|
160
|
+
],
|
|
161
|
+
};
|
|
162
|
+
}
|
|
142
163
|
});
|
|
143
164
|
await server.connect(new StdioServerTransport());
|
|
144
165
|
}
|
package/dist/report.js
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
import kleur from "kleur";
|
|
2
|
+
/** A non-finite threshold must never silently pass all drift; fall back to 0.7. */
|
|
3
|
+
export function effectiveFailConfidence(opts) {
|
|
4
|
+
return Number.isFinite(opts.failConfidence) ? opts.failConfidence : 0.7;
|
|
5
|
+
}
|
|
2
6
|
export function summarize(verdicts, opts) {
|
|
7
|
+
const failConfidence = effectiveFailConfidence(opts);
|
|
3
8
|
let ok = 0;
|
|
4
9
|
let drifted = 0;
|
|
5
10
|
let unverifiable = 0;
|
|
@@ -9,7 +14,7 @@ export function summarize(verdicts, opts) {
|
|
|
9
14
|
ok++;
|
|
10
15
|
else if (v.status === "drifted") {
|
|
11
16
|
drifted++;
|
|
12
|
-
if (v.confidence >=
|
|
17
|
+
if (v.confidence >= failConfidence)
|
|
13
18
|
failures.push(v);
|
|
14
19
|
}
|
|
15
20
|
else {
|
|
@@ -23,8 +28,9 @@ export function summarize(verdicts, opts) {
|
|
|
23
28
|
/** Pretty terminal report. Returns true if the check should fail the build. */
|
|
24
29
|
export function printReport(verdicts, opts) {
|
|
25
30
|
const summary = summarize(verdicts, opts);
|
|
31
|
+
const failConfidence = effectiveFailConfidence(opts);
|
|
26
32
|
const drifts = verdicts
|
|
27
|
-
.filter((v) => v.status === "drifted" && v.confidence >=
|
|
33
|
+
.filter((v) => v.status === "drifted" && v.confidence >= failConfidence)
|
|
28
34
|
.sort((a, b) => b.confidence - a.confidence);
|
|
29
35
|
if (drifts.length === 0) {
|
|
30
36
|
console.log(kleur.green(`\n✓ No doc drift detected.`) +
|
|
@@ -51,13 +57,18 @@ export function printReport(verdicts, opts) {
|
|
|
51
57
|
console.log(kleur.dim(`${summary.ok} ok · ${summary.drifted} drifted · ${summary.unverifiable} unverifiable\n`));
|
|
52
58
|
return true;
|
|
53
59
|
}
|
|
60
|
+
// GitHub workflow commands need %/CR/LF escaped in data, and additionally
|
|
61
|
+
// :/, escaped in property values, or the annotation truncates or mis-targets.
|
|
62
|
+
const escData = (s) => s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A");
|
|
63
|
+
const escProp = (s) => escData(s).replace(/:/g, "%3A").replace(/,/g, "%2C");
|
|
54
64
|
/** GitHub Actions workflow-command annotations. */
|
|
55
65
|
export function printGithubAnnotations(verdicts, opts) {
|
|
66
|
+
const failConfidence = effectiveFailConfidence(opts);
|
|
56
67
|
for (const v of verdicts) {
|
|
57
|
-
if (v.status !== "drifted" || v.confidence <
|
|
68
|
+
if (v.status !== "drifted" || v.confidence < failConfidence)
|
|
58
69
|
continue;
|
|
59
|
-
const msg =
|
|
60
|
-
console.log(`::error file=${v.claim.docFile},line=${v.claim.line}
|
|
70
|
+
const msg = `doc drift: ${v.claim.text} — ${v.explanation}`;
|
|
71
|
+
console.log(`::error file=${escProp(v.claim.docFile)},line=${v.claim.line},title=docverity::${escData(msg)}`);
|
|
61
72
|
}
|
|
62
73
|
}
|
|
63
74
|
export function toJson(verdicts, opts) {
|
package/dist/search.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { execFile } from "node:child_process";
|
|
2
2
|
import { promisify } from "node:util";
|
|
3
|
-
import { existsSync } from "node:fs";
|
|
3
|
+
import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
|
|
4
4
|
import path from "node:path";
|
|
5
5
|
const execFileAsync = promisify(execFile);
|
|
6
6
|
// Directories never worth searching for evidence of documented behavior.
|
|
@@ -23,6 +23,12 @@ function isDocFile(file) {
|
|
|
23
23
|
const lower = file.toLowerCase();
|
|
24
24
|
return DOC_EXTENSIONS.some((ext) => lower.endsWith(ext));
|
|
25
25
|
}
|
|
26
|
+
function escapeRegex(s) {
|
|
27
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
28
|
+
}
|
|
29
|
+
function flagPattern(token) {
|
|
30
|
+
return `(^|[^A-Za-z0-9-])${escapeRegex(token)}([^A-Za-z0-9-]|$)`;
|
|
31
|
+
}
|
|
26
32
|
let rgChecked = false;
|
|
27
33
|
let rgAvailable = false;
|
|
28
34
|
async function hasRipgrep() {
|
|
@@ -38,43 +44,57 @@ async function hasRipgrep() {
|
|
|
38
44
|
}
|
|
39
45
|
return rgAvailable;
|
|
40
46
|
}
|
|
41
|
-
/**
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
47
|
+
/** Plain substring search. Used to gather evidence for the LLM engine. */
|
|
48
|
+
export function searchLiteral(root, needle, limit = 8) {
|
|
49
|
+
return runSearch(root, needle, "literal", limit);
|
|
50
|
+
}
|
|
51
|
+
/** Boundary-aware search used by the deterministic verifier. */
|
|
52
|
+
export function searchToken(root, token, mode, limit = 8) {
|
|
53
|
+
return runSearch(root, token, mode, limit);
|
|
54
|
+
}
|
|
55
|
+
async function runSearch(root, needle, mode, limit) {
|
|
46
56
|
if (!needle.trim())
|
|
47
57
|
return [];
|
|
48
|
-
if (await hasRipgrep())
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
args.push("--",
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
58
|
+
if (await hasRipgrep())
|
|
59
|
+
return rgSearch(root, needle, mode, limit);
|
|
60
|
+
return fallbackSearch(root, needle, mode, limit);
|
|
61
|
+
}
|
|
62
|
+
async function rgSearch(root, needle, mode, limit) {
|
|
63
|
+
const args = [
|
|
64
|
+
"--line-number",
|
|
65
|
+
"--no-heading",
|
|
66
|
+
"--color",
|
|
67
|
+
"never",
|
|
68
|
+
"--max-count",
|
|
69
|
+
String(limit),
|
|
70
|
+
];
|
|
71
|
+
for (const dir of IGNORE_DIRS)
|
|
72
|
+
args.push("--glob", `!${dir}/`);
|
|
73
|
+
for (const ext of DOC_EXTENSIONS)
|
|
74
|
+
args.push("--glob", `!*${ext}`);
|
|
75
|
+
if (mode === "flag") {
|
|
76
|
+
args.push("--regexp", flagPattern(needle));
|
|
77
|
+
}
|
|
78
|
+
else if (mode === "word") {
|
|
79
|
+
args.push("--fixed-strings", "--word-regexp", "--regexp", needle);
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
args.push("--fixed-strings", "--regexp", needle);
|
|
83
|
+
}
|
|
84
|
+
args.push("--", ".");
|
|
85
|
+
try {
|
|
86
|
+
const { stdout } = await execFileAsync("rg", args, {
|
|
87
|
+
cwd: root,
|
|
88
|
+
maxBuffer: 8 * 1024 * 1024,
|
|
89
|
+
});
|
|
90
|
+
return parseRgOutput(stdout, limit);
|
|
91
|
+
}
|
|
92
|
+
catch (err) {
|
|
93
|
+
// rg exits 1 when there are no matches; that is not an error for us.
|
|
94
|
+
if (err?.code === 1)
|
|
95
|
+
return [];
|
|
96
|
+
throw err;
|
|
76
97
|
}
|
|
77
|
-
return fallbackSearch(root, needle, limit);
|
|
78
98
|
}
|
|
79
99
|
function parseRgOutput(stdout, limit) {
|
|
80
100
|
const out = [];
|
|
@@ -86,7 +106,7 @@ function parseRgOutput(stdout, limit) {
|
|
|
86
106
|
const second = raw.indexOf(":", first + 1);
|
|
87
107
|
if (first < 0 || second < 0)
|
|
88
108
|
continue;
|
|
89
|
-
const file = raw.slice(0, first);
|
|
109
|
+
const file = raw.slice(0, first).replace(/^\.\//, "");
|
|
90
110
|
const line = Number(raw.slice(first + 1, second));
|
|
91
111
|
const snippet = raw.slice(second + 1).trim();
|
|
92
112
|
out.push({ file, line, snippet: snippet.slice(0, 200) });
|
|
@@ -95,8 +115,16 @@ function parseRgOutput(stdout, limit) {
|
|
|
95
115
|
}
|
|
96
116
|
return out;
|
|
97
117
|
}
|
|
98
|
-
|
|
99
|
-
|
|
118
|
+
function matcherFor(needle, mode) {
|
|
119
|
+
if (mode === "literal")
|
|
120
|
+
return (line) => line.includes(needle);
|
|
121
|
+
const re = mode === "flag"
|
|
122
|
+
? new RegExp(flagPattern(needle))
|
|
123
|
+
: new RegExp(`\\b${escapeRegex(needle)}\\b`);
|
|
124
|
+
return (line) => re.test(line);
|
|
125
|
+
}
|
|
126
|
+
function fallbackSearch(root, needle, mode, limit) {
|
|
127
|
+
const matches = matcherFor(needle, mode);
|
|
100
128
|
const out = [];
|
|
101
129
|
const walk = (dir) => {
|
|
102
130
|
if (out.length >= limit)
|
|
@@ -134,7 +162,7 @@ function fallbackSearch(root, needle, limit) {
|
|
|
134
162
|
}
|
|
135
163
|
const lines = content.split("\n");
|
|
136
164
|
for (let i = 0; i < lines.length; i++) {
|
|
137
|
-
if (lines[i]
|
|
165
|
+
if (matches(lines[i])) {
|
|
138
166
|
out.push({
|
|
139
167
|
file: path.relative(root, full),
|
|
140
168
|
line: i + 1,
|
|
@@ -150,8 +178,13 @@ function fallbackSearch(root, needle, limit) {
|
|
|
150
178
|
walk(root);
|
|
151
179
|
return out;
|
|
152
180
|
}
|
|
153
|
-
/** Resolve a documented path claim against the filesystem. */
|
|
181
|
+
/** Resolve a documented path claim against the filesystem, contained to the repo. */
|
|
154
182
|
export function fileExists(root, relPath) {
|
|
155
183
|
const clean = relPath.replace(/^\.\//, "").replace(/[`*]/g, "");
|
|
156
|
-
|
|
184
|
+
const base = path.resolve(root);
|
|
185
|
+
const target = path.resolve(base, clean);
|
|
186
|
+
// Don't let "../../etc/passwd" style references probe outside the repo.
|
|
187
|
+
if (target !== base && !target.startsWith(base + path.sep))
|
|
188
|
+
return false;
|
|
189
|
+
return existsSync(target);
|
|
157
190
|
}
|
package/dist/verify-llm.js
CHANGED
|
@@ -6,6 +6,8 @@ const EXTRACT_SYSTEM = `You extract verifiable factual claims that a documentati
|
|
|
6
6
|
|
|
7
7
|
A claim is a specific, checkable assertion: a default value, a return type, a parameter name, a config key, an install step, a behavior ("by default X happens"), an output shape. Ignore marketing copy, aspirational statements, and anything not checkable against source code.
|
|
8
8
|
|
|
9
|
+
Do NOT emit bare flag/env-var/file-path/function-name existence claims (e.g. "the --json flag exists", "see src/foo.ts") — a separate deterministic engine already checks those, and re-reporting them causes duplicates. Focus on semantic prose claims: values, behaviors, types, defaults.
|
|
10
|
+
|
|
9
11
|
For each claim, provide search terms (identifiers, strings, file names) that would help locate the relevant code. Be precise; prefer fewer high-quality claims over many vague ones.`;
|
|
10
12
|
const VERIFY_SYSTEM = `You verify whether documentation claims still match the codebase, given source-code evidence.
|
|
11
13
|
|
|
@@ -14,7 +16,7 @@ For each claim, decide:
|
|
|
14
16
|
- "drifted": the evidence contradicts the claim (the docs are now wrong).
|
|
15
17
|
- "unverifiable": the evidence is insufficient to decide.
|
|
16
18
|
|
|
17
|
-
Be conservative. Only mark "drifted" when the evidence
|
|
19
|
+
Be conservative. Only mark "drifted" when the evidence affirmatively shows a DIFFERENT value or behavior than the claim states. Absence of evidence is NEVER drift: if the evidence array is empty, or does not actually mention the claim's subject, you MUST return "unverifiable". A false "drifted" verdict is worse than a missed one. Give the specific contradiction and, when drifted, a concrete suggested doc fix.`;
|
|
18
20
|
const EXTRACT_SCHEMA = {
|
|
19
21
|
type: "object",
|
|
20
22
|
additionalProperties: false,
|
|
@@ -88,15 +90,36 @@ export async function verifyLlm(root, docFile, model) {
|
|
|
88
90
|
}
|
|
89
91
|
evidenceByClaim.set(claim.id, dedupeEvidence(found).slice(0, 8));
|
|
90
92
|
}
|
|
91
|
-
const
|
|
93
|
+
const out = [];
|
|
94
|
+
// Claims with no located evidence are NOT sent to the model: handing it an
|
|
95
|
+
// empty evidence array invites "absent, therefore drifted" hallucinations.
|
|
96
|
+
// Without evidence the claim is unverifiable by definition.
|
|
97
|
+
const grounded = [];
|
|
98
|
+
for (const claim of claims) {
|
|
99
|
+
if ((evidenceByClaim.get(claim.id) ?? []).length === 0) {
|
|
100
|
+
out.push({
|
|
101
|
+
claim,
|
|
102
|
+
status: "unverifiable",
|
|
103
|
+
confidence: 0.3,
|
|
104
|
+
explanation: "No code evidence located for this claim.",
|
|
105
|
+
evidence: [],
|
|
106
|
+
engine: "llm",
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
else {
|
|
110
|
+
grounded.push(claim);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
if (grounded.length === 0)
|
|
114
|
+
return out;
|
|
115
|
+
const verifyPayload = grounded.map((c) => ({
|
|
92
116
|
id: c.id,
|
|
93
117
|
claim: c.assertion,
|
|
94
118
|
docText: c.text,
|
|
95
119
|
evidence: evidenceByClaim.get(c.id) ?? [],
|
|
96
120
|
}));
|
|
97
121
|
const { verdicts: rawVerdicts } = await structuredCall(model, VERIFY_SYSTEM, `Verify these claims against the evidence:\n\n${JSON.stringify(verifyPayload, null, 2)}`, VERIFY_SCHEMA);
|
|
98
|
-
const byId = new Map(
|
|
99
|
-
const out = [];
|
|
122
|
+
const byId = new Map(grounded.map((c) => [c.id, c]));
|
|
100
123
|
for (const v of rawVerdicts) {
|
|
101
124
|
const claim = byId.get(v.id);
|
|
102
125
|
if (!claim)
|
package/dist/verify-reference.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { searchToken, fileExists } from "./search.js";
|
|
2
2
|
/**
|
|
3
3
|
* The deterministic engine: verify each claim by looking for hard evidence in
|
|
4
4
|
* the source tree. No model, no API key. High precision by design — when in
|
|
@@ -15,26 +15,38 @@ async function verifyOne(root, claim) {
|
|
|
15
15
|
const base = { claim, evidence: [], engine: "reference" };
|
|
16
16
|
switch (claim.kind) {
|
|
17
17
|
case "file": {
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
? {
|
|
18
|
+
if (fileExists(root, claim.text)) {
|
|
19
|
+
return {
|
|
21
20
|
...base,
|
|
22
21
|
status: "ok",
|
|
23
22
|
confidence: 0.95,
|
|
24
23
|
explanation: `${claim.text} exists on disk.`,
|
|
25
|
-
}
|
|
26
|
-
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
// A bare filename with no path separator is often a library or framework
|
|
27
|
+
// name in code formatting (Node.js, config.js), not a repo file. Don't
|
|
28
|
+
// assert drift on those; downgrade to unverifiable.
|
|
29
|
+
if (!claim.text.includes("/")) {
|
|
30
|
+
return {
|
|
27
31
|
...base,
|
|
28
|
-
status: "
|
|
29
|
-
confidence: 0.
|
|
30
|
-
explanation:
|
|
31
|
-
suggestedFix: `Update or remove the reference to ${claim.text}.`,
|
|
32
|
+
status: "unverifiable",
|
|
33
|
+
confidence: 0.3,
|
|
34
|
+
explanation: `${claim.text} is not a file in the repo; it may be a library or framework name rather than a path.`,
|
|
32
35
|
};
|
|
36
|
+
}
|
|
37
|
+
return {
|
|
38
|
+
...base,
|
|
39
|
+
status: "drifted",
|
|
40
|
+
confidence: 0.9,
|
|
41
|
+
explanation: `The docs reference ${claim.text}, but no such file or directory exists.`,
|
|
42
|
+
suggestedFix: `Update or remove the reference to ${claim.text}.`,
|
|
43
|
+
};
|
|
33
44
|
}
|
|
34
45
|
case "flag":
|
|
35
46
|
case "env":
|
|
36
47
|
case "symbol": {
|
|
37
|
-
const
|
|
48
|
+
const mode = claim.kind === "flag" ? "flag" : "word";
|
|
49
|
+
const hits = await searchToken(root, claim.text, mode);
|
|
38
50
|
if (hits.length > 0) {
|
|
39
51
|
return {
|
|
40
52
|
...base,
|
|
@@ -49,10 +61,12 @@ async function verifyOne(root, claim) {
|
|
|
49
61
|
: claim.kind === "env"
|
|
50
62
|
? "environment variable"
|
|
51
63
|
: "symbol";
|
|
64
|
+
// Boundary-aware search (below) means a hit is a real, whole-token match,
|
|
65
|
+
// so a miss is trustworthy enough to fail CI on, symbols included.
|
|
52
66
|
return {
|
|
53
67
|
...base,
|
|
54
68
|
status: "drifted",
|
|
55
|
-
confidence:
|
|
69
|
+
confidence: 0.8,
|
|
56
70
|
explanation: `The docs mention the ${noun} ${claim.text}, but it does not appear anywhere in the source.`,
|
|
57
71
|
suggestedFix: `Verify ${claim.text} still exists; it may have been renamed or removed.`,
|
|
58
72
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "docverity",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "Catch documentation that lies about your code. Verify that your docs' claims still match the source, in CI.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
"url": "https://github.com/deveshagarwal/docverity/issues"
|
|
45
45
|
},
|
|
46
46
|
"dependencies": {
|
|
47
|
-
"@anthropic-ai/sdk": "^0.
|
|
47
|
+
"@anthropic-ai/sdk": "^0.106.0",
|
|
48
48
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
49
49
|
"commander": "^12.1.0",
|
|
50
50
|
"kleur": "^4.1.5"
|