claudemd-cli 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1386 -0
- package/LICENSE +21 -0
- package/README.md +298 -0
- package/bin/claudemd-lint.js +148 -0
- package/hooks/banned-vocab.patterns +98 -0
- package/package.json +18 -0
- package/scripts/lib/lint.js +128 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
// lint.js — pure-Node scanning functions for §10-V banned-vocab patterns.
|
|
2
|
+
// Shared by bin/claudemd-lint.js (CLI) and any future Node-side enforcement
|
|
3
|
+
// that doesn't want to shell out to bash. Mirrors the substantive matching
|
|
4
|
+
// rules of hooks/banned-vocab-check.sh + hooks/transcript-vocab-scan.sh
|
|
5
|
+
// without the shell-specific quoting + jq plumbing.
|
|
6
|
+
//
|
|
7
|
+
// The patterns file is the authoritative source — hooks/banned-vocab.patterns.
|
|
8
|
+
// One regex per non-blank, non-comment line. Format:
|
|
9
|
+
// <extended-regex>|<reason>
|
|
10
|
+
// <extended-regex>|@ratio <reason> ← ratio class, exempt under baseline
|
|
11
|
+
//
|
|
12
|
+
// JS regex notes:
|
|
13
|
+
// * Patterns were authored for grep -iE (POSIX ERE). Most carry over to
|
|
14
|
+
// JS regex unchanged. `\b` and `[0-9]` are equivalent. POSIX char classes
|
|
15
|
+
// like [[:space:]] are NOT in the .patterns file.
|
|
16
|
+
// * `\s` means whitespace in JS — also fine.
|
|
17
|
+
// * Case-insensitive matching is the contract; we always pass /i flag.
|
|
18
|
+
// * Invalid regex (a future bad pattern checked in by mistake) is skipped
|
|
19
|
+
// silently rather than crashing the scan — fail-open consistent with
|
|
20
|
+
// the bash hooks' design.
|
|
21
|
+
|
|
22
|
+
import fs from 'node:fs';
|
|
23
|
+
import path from 'node:path';
|
|
24
|
+
import { fileURLToPath } from 'node:url';
|
|
25
|
+
|
|
26
|
+
const HERE = path.dirname(fileURLToPath(import.meta.url));
|
|
27
|
+
export const DEFAULT_PATTERNS_FILE = path.resolve(HERE, '../../hooks/banned-vocab.patterns');
|
|
28
|
+
|
|
29
|
+
// Returns [{regex: string, reason: string, isRatio: boolean}, ...].
|
|
30
|
+
export function readPatterns(patternsFile = DEFAULT_PATTERNS_FILE) {
|
|
31
|
+
if (!fs.existsSync(patternsFile)) return [];
|
|
32
|
+
const lines = fs.readFileSync(patternsFile, 'utf8').split('\n');
|
|
33
|
+
const out = [];
|
|
34
|
+
for (const raw of lines) {
|
|
35
|
+
const line = raw.trim();
|
|
36
|
+
if (!line || line.startsWith('#')) continue;
|
|
37
|
+
// Right-most `|` is the separator: regex bodies can themselves contain `|`
|
|
38
|
+
// (alternation), but reason text doesn't. Same convention banned-vocab-
|
|
39
|
+
// check.sh uses (`${line%|*}` / `${line##*|}`).
|
|
40
|
+
const lastBar = line.lastIndexOf('|');
|
|
41
|
+
if (lastBar === -1) continue;
|
|
42
|
+
const regex = line.slice(0, lastBar);
|
|
43
|
+
let reason = line.slice(lastBar + 1);
|
|
44
|
+
let isRatio = false;
|
|
45
|
+
if (reason.startsWith('@ratio ')) {
|
|
46
|
+
isRatio = true;
|
|
47
|
+
reason = reason.slice('@ratio '.length);
|
|
48
|
+
}
|
|
49
|
+
out.push({ regex, reason, isRatio });
|
|
50
|
+
}
|
|
51
|
+
return out;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// scan(text, opts) → [{match, regex, reason, isRatio}, ...]
|
|
55
|
+
// opts.excludeRatio: skip @ratio-tagged patterns. The bash transcript scan
|
|
56
|
+
// does this because chat prose uses ratios with different baseline
|
|
57
|
+
// conventions than commit messages. CLI `lint` defaults to NO exclude
|
|
58
|
+
// (commit-message context is the most common use); CLI `audit` defaults
|
|
59
|
+
// to excludeRatio=true to mirror transcript-vocab-scan.sh behavior.
|
|
60
|
+
// opts.patterns: pre-loaded patterns array (lets callers cache the read).
|
|
61
|
+
export function scan(text, { excludeRatio = false, patterns } = {}) {
|
|
62
|
+
if (!text) return [];
|
|
63
|
+
const pats = patterns || readPatterns();
|
|
64
|
+
const hits = [];
|
|
65
|
+
for (const p of pats) {
|
|
66
|
+
if (excludeRatio && p.isRatio) continue;
|
|
67
|
+
let re;
|
|
68
|
+
try {
|
|
69
|
+
re = new RegExp(p.regex, 'i');
|
|
70
|
+
} catch {
|
|
71
|
+
continue; // bad regex — skip (fail-open)
|
|
72
|
+
}
|
|
73
|
+
const m = text.match(re);
|
|
74
|
+
if (m) hits.push({ match: m[0], regex: p.regex, reason: p.reason, isRatio: p.isRatio });
|
|
75
|
+
}
|
|
76
|
+
return hits;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// parseTranscript(jsonlText) → [{turnIndex, line, text}, ...]
|
|
80
|
+
// Iterates jsonl, returns one entry per assistant text-content turn. Each
|
|
81
|
+
// entry concatenates all .message.content[*].text blocks for that turn.
|
|
82
|
+
// Corrupt rows (unparseable JSON, missing fields) silently skipped — matches
|
|
83
|
+
// transcript-vocab-scan.sh's `try fromjson catch empty` design.
|
|
84
|
+
export function parseTranscript(jsonlText) {
|
|
85
|
+
const lines = jsonlText.split('\n');
|
|
86
|
+
const turns = [];
|
|
87
|
+
let turnIndex = 0;
|
|
88
|
+
for (let i = 0; i < lines.length; i++) {
|
|
89
|
+
if (!lines[i].trim()) continue;
|
|
90
|
+
let row;
|
|
91
|
+
try { row = JSON.parse(lines[i]); } catch { continue; }
|
|
92
|
+
if (row.type !== 'assistant') continue;
|
|
93
|
+
const content = row.message?.content || [];
|
|
94
|
+
const texts = [];
|
|
95
|
+
for (const b of content) {
|
|
96
|
+
if (b.type === 'text' && typeof b.text === 'string') texts.push(b.text);
|
|
97
|
+
}
|
|
98
|
+
if (texts.length === 0) continue;
|
|
99
|
+
turns.push({ turnIndex, line: i + 1, text: texts.join(' ') });
|
|
100
|
+
turnIndex++;
|
|
101
|
+
}
|
|
102
|
+
return turns;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Format helpers — keep the CLI thin.
|
|
106
|
+
export function formatHumanReadable({ scope, hits, turns }) {
|
|
107
|
+
if (scope === 'lint') {
|
|
108
|
+
if (hits.length === 0) return 'OK: no §10-V hits';
|
|
109
|
+
const lines = [`§10-V drift detected (${hits.length} hit${hits.length === 1 ? '' : 's'}):`];
|
|
110
|
+
for (const h of hits) lines.push(` - "${h.match}" (${h.reason})`);
|
|
111
|
+
return lines.join('\n');
|
|
112
|
+
}
|
|
113
|
+
if (scope === 'audit') {
|
|
114
|
+
const flagged = turns.filter(t => t.hits.length > 0);
|
|
115
|
+
if (flagged.length === 0) return `OK: no §10-V hits across ${turns.length} assistant turn(s)`;
|
|
116
|
+
const lines = [`§10-V drift detected in ${flagged.length} of ${turns.length} assistant turn(s):`];
|
|
117
|
+
for (const t of flagged) {
|
|
118
|
+
lines.push(` line ${t.line} (turn #${t.turnIndex}):`);
|
|
119
|
+
for (const h of t.hits) lines.push(` - "${h.match}" (${h.reason})`);
|
|
120
|
+
}
|
|
121
|
+
return lines.join('\n');
|
|
122
|
+
}
|
|
123
|
+
return '';
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export function formatJSON(payload) {
|
|
127
|
+
return JSON.stringify(payload, null, 2);
|
|
128
|
+
}
|