patina-cli 3.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.patina.default.yaml +211 -0
- package/CHANGELOG.md +265 -0
- package/LICENSE +21 -0
- package/README.md +319 -0
- package/README_JA.md +254 -0
- package/README_KR.md +253 -0
- package/README_ZH.md +254 -0
- package/SKILL-MAX.md +455 -0
- package/SKILL.md +730 -0
- package/assets/brand/patina-icon.svg +9 -0
- package/assets/brand/patina-logo.svg +17 -0
- package/assets/social/patina-before-after.svg +46 -0
- package/assets/social/patina-og.svg +31 -0
- package/bin/patina.js +9 -0
- package/core/scoring.md +657 -0
- package/core/standalone-prompt.md +364 -0
- package/core/stylometry.md +754 -0
- package/core/voice.md +163 -0
- package/docs/AUTHENTICATION.md +105 -0
- package/docs/AUTHENTICATION_KR.md +105 -0
- package/docs/BRANDING.md +37 -0
- package/docs/CLI.md +80 -0
- package/docs/COMPARISON.md +38 -0
- package/docs/COOKBOOK.md +173 -0
- package/docs/DEMO.md +40 -0
- package/docs/ETHICS.md +27 -0
- package/docs/EXAMPLES.md +130 -0
- package/docs/EXAMPLES_KR.md +130 -0
- package/docs/EXIT-CODES.md +25 -0
- package/docs/FAQ.md +67 -0
- package/docs/FAQ_KR.md +65 -0
- package/docs/FLAG-PARITY.md +53 -0
- package/docs/GLOSSARY.md +123 -0
- package/docs/PATTERNS-EN.md +718 -0
- package/docs/PATTERNS-JA.md +706 -0
- package/docs/PATTERNS-KO.md +707 -0
- package/docs/PATTERNS-ZH.md +706 -0
- package/docs/PATTERNS.md +22 -0
- package/docs/ROADMAP.md +315 -0
- package/docs/audits/2026-05-deep-research.md +290 -0
- package/docs/benchmarks/detector-comparison.json +442 -0
- package/docs/benchmarks/detector-comparison.md +65 -0
- package/docs/benchmarks/latest.json +988 -0
- package/docs/benchmarks/latest.md +112 -0
- package/docs/integrations/docker.md +19 -0
- package/docs/integrations/github-action.md +59 -0
- package/docs/integrations/pre-commit.md +77 -0
- package/docs/integrations/release.md +43 -0
- package/docs/internal/HARNESS.md +14 -0
- package/docs/internal/README.md +14 -0
- package/docs/internal/WARP.md +23 -0
- package/docs/research/2025-rebaseline-plan.md +89 -0
- package/docs/research/ai-human-metrics.md +380 -0
- package/docs/social/gstack-cardnews.html +236 -0
- package/docs/social/gstack-cardnews.md +88 -0
- package/docs/social/gstack-thread.md +106 -0
- package/docs/social/patina-launch-copy.md +227 -0
- package/docs/superpowers/specs/2026-04-03-meaning-preservation-design.md +299 -0
- package/lexicon/ai-en.md +162 -0
- package/lexicon/ai-ko.md +159 -0
- package/package.json +100 -0
- package/patina-max/SKILL.md +523 -0
- package/patina-max/composite.py +457 -0
- package/patterns/en-communication.md +89 -0
- package/patterns/en-content.md +133 -0
- package/patterns/en-filler.md +113 -0
- package/patterns/en-language.md +163 -0
- package/patterns/en-structure.md +173 -0
- package/patterns/en-style.md +139 -0
- package/patterns/en-viral-hook.md +211 -0
- package/patterns/ja-communication.md +101 -0
- package/patterns/ja-content.md +153 -0
- package/patterns/ja-filler.md +123 -0
- package/patterns/ja-language.md +190 -0
- package/patterns/ja-structure.md +142 -0
- package/patterns/ja-style.md +147 -0
- package/patterns/ja-viral-hook.md +216 -0
- package/patterns/ko-communication.md +98 -0
- package/patterns/ko-content.md +154 -0
- package/patterns/ko-filler.md +105 -0
- package/patterns/ko-language.md +182 -0
- package/patterns/ko-structure.md +147 -0
- package/patterns/ko-style.md +146 -0
- package/patterns/ko-viral-hook.md +211 -0
- package/patterns/zh-communication.md +101 -0
- package/patterns/zh-content.md +153 -0
- package/patterns/zh-filler.md +118 -0
- package/patterns/zh-language.md +173 -0
- package/patterns/zh-structure.md +145 -0
- package/patterns/zh-style.md +159 -0
- package/patterns/zh-viral-hook.md +216 -0
- package/profiles/academic.md +53 -0
- package/profiles/blog.md +81 -0
- package/profiles/casual-conversation.md +105 -0
- package/profiles/code-comment.md +104 -0
- package/profiles/commit-message.md +99 -0
- package/profiles/default.md +62 -0
- package/profiles/email.md +52 -0
- package/profiles/formal.md +98 -0
- package/profiles/instructional.md +80 -0
- package/profiles/legal.md +57 -0
- package/profiles/marketing.md +56 -0
- package/profiles/medical.md +53 -0
- package/profiles/narrative.md +79 -0
- package/profiles/release-notes.md +98 -0
- package/profiles/social.md +56 -0
- package/profiles/technical.md +53 -0
- package/scripts/benchmark-report.mjs +252 -0
- package/scripts/check-release-metadata.mjs +48 -0
- package/scripts/detector-comparison.mjs +267 -0
- package/scripts/lint.mjs +40 -0
- package/scripts/precommit-score.mjs +31 -0
- package/scripts/prose-score.mjs +186 -0
- package/scripts/update-benchmark-ranges.mjs +108 -0
- package/src/api.js +330 -0
- package/src/auth.js +105 -0
- package/src/backends/claude-cli.js +112 -0
- package/src/backends/codex-cli.js +121 -0
- package/src/backends/contract.js +21 -0
- package/src/backends/gemini-cli.js +135 -0
- package/src/backends/index.js +159 -0
- package/src/cache.js +106 -0
- package/src/cli.js +1280 -0
- package/src/commands/doctor.js +229 -0
- package/src/commands/init.js +208 -0
- package/src/config.js +126 -0
- package/src/errors.js +53 -0
- package/src/features/index.js +96 -0
- package/src/features/lexicon.js +90 -0
- package/src/features/segment.js +49 -0
- package/src/features/stylometry.js +50 -0
- package/src/loader.js +103 -0
- package/src/logger.js +70 -0
- package/src/manifest.js +162 -0
- package/src/max-mode.js +207 -0
- package/src/ouroboros.js +233 -0
- package/src/output.js +480 -0
- package/src/prompt-builder.js +409 -0
- package/src/providers.js +100 -0
- package/src/scoring.js +531 -0
- package/src/security.js +133 -0
- package/tests/fixtures/suspect-zones/en/ai/en-ai-01.md +16 -0
- package/tests/fixtures/suspect-zones/en/ai/en-ai-02.md +16 -0
- package/tests/fixtures/suspect-zones/en/ai/en-ai-03.md +17 -0
- package/tests/fixtures/suspect-zones/en/ai/en-ai-04.md +15 -0
- package/tests/fixtures/suspect-zones/en/ai/en-ai-05.md +16 -0
- package/tests/fixtures/suspect-zones/en/ai/en-ai-06-chat-register.md +16 -0
- package/tests/fixtures/suspect-zones/en/natural/en-nat-01.md +15 -0
- package/tests/fixtures/suspect-zones/en/natural/en-nat-02.md +15 -0
- package/tests/fixtures/suspect-zones/en/natural/en-nat-03.md +15 -0
- package/tests/fixtures/suspect-zones/en/natural/en-nat-04.md +15 -0
- package/tests/fixtures/suspect-zones/en/natural/en-nat-05.md +15 -0
- package/tests/fixtures/suspect-zones/expected-ranges.json +939 -0
- package/tests/fixtures/suspect-zones/ja/ai/ja-ai-01.md +11 -0
- package/tests/fixtures/suspect-zones/ja/ai/ja-ai-02.md +11 -0
- package/tests/fixtures/suspect-zones/ja/ai/ja-ai-03.md +11 -0
- package/tests/fixtures/suspect-zones/ja/natural/ja-nat-01.md +11 -0
- package/tests/fixtures/suspect-zones/ja/natural/ja-nat-02.md +11 -0
- package/tests/fixtures/suspect-zones/ja/natural/ja-nat-03.md +11 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-01.md +14 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-02.md +16 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-03.md +15 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-04.md +15 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-05.md +16 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-06-chat-register.md +16 -0
- package/tests/fixtures/suspect-zones/ko/natural/ko-nat-01.md +15 -0
- package/tests/fixtures/suspect-zones/ko/natural/ko-nat-02.md +15 -0
- package/tests/fixtures/suspect-zones/ko/natural/ko-nat-03.md +15 -0
- package/tests/fixtures/suspect-zones/ko/natural/ko-nat-04.md +14 -0
- package/tests/fixtures/suspect-zones/ko/natural/ko-nat-05.md +15 -0
- package/tests/fixtures/suspect-zones/zh/ai/zh-ai-01.md +11 -0
- package/tests/fixtures/suspect-zones/zh/ai/zh-ai-02.md +11 -0
- package/tests/fixtures/suspect-zones/zh/ai/zh-ai-03.md +11 -0
- package/tests/fixtures/suspect-zones/zh/natural/zh-nat-01.md +11 -0
- package/tests/fixtures/suspect-zones/zh/natural/zh-nat-02.md +11 -0
- package/tests/fixtures/suspect-zones/zh/natural/zh-nat-03.md +11 -0
- package/tests/quality/README.md +121 -0
- package/tests/quality/benchmark.mjs +306 -0
- package/tests/quality/detectors.manual.example.json +31 -0
- package/tests/quality/dogfood.mjs +44 -0
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Quality benchmark for the deterministic stylometry layer.
|
|
3
|
+
//
|
|
4
|
+
// Iterates every fixture under tests/fixtures/suspect-zones/{lang}/{class}/*.md,
|
|
5
|
+
// runs the in-tree analyzer (no LLM), and compares the predicted hot/cold
|
|
6
|
+
// decision against the fixture's expected_hot label. Emits a per-language
|
|
7
|
+
// confusion matrix + accuracy and writes the full per-fixture log to
|
|
8
|
+
// tests/quality/results.json.
|
|
9
|
+
//
|
|
10
|
+
// Usage: node tests/quality/benchmark.mjs [--quiet]
|
|
11
|
+
|
|
12
|
+
import { readFileSync, readdirSync, writeFileSync, statSync } from 'node:fs';
|
|
13
|
+
import { resolve, dirname } from 'node:path';
|
|
14
|
+
import { fileURLToPath } from 'node:url';
|
|
15
|
+
import yaml from 'js-yaml';
|
|
16
|
+
|
|
17
|
+
import { analyzeText } from '../../src/features/index.js';
|
|
18
|
+
import { loadLexicon } from '../../src/features/lexicon.js';
|
|
19
|
+
|
|
20
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
21
|
+
const REPO_ROOT = resolve(__dirname, '../..');
|
|
22
|
+
const FIXTURES_ROOT = resolve(REPO_ROOT, 'tests/fixtures/suspect-zones');
|
|
23
|
+
const RESULTS_PATH = resolve(__dirname, 'results.json');
|
|
24
|
+
const EXPECTED_RANGES_PATH = resolve(FIXTURES_ROOT, 'expected-ranges.json');
|
|
25
|
+
const FIXTURE_SCHEMA_VERSION = 1;
|
|
26
|
+
|
|
27
|
+
const FRONTMATTER_RE = /^---\n([\s\S]*?)\n---\s*\n([\s\S]*)$/;
|
|
28
|
+
|
|
29
|
+
function parseFixture(path) {
|
|
30
|
+
const raw = readFileSync(path, 'utf8');
|
|
31
|
+
const m = raw.match(FRONTMATTER_RE);
|
|
32
|
+
if (!m) throw new Error(`Missing frontmatter: ${path}`);
|
|
33
|
+
return { meta: yaml.load(m[1]), body: m[2].trim(), path };
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function listFixtures() {
|
|
37
|
+
const out = [];
|
|
38
|
+
for (const lang of readdirSync(FIXTURES_ROOT)) {
|
|
39
|
+
const langDir = resolve(FIXTURES_ROOT, lang);
|
|
40
|
+
if (!statSync(langDir).isDirectory()) continue;
|
|
41
|
+
for (const cls of readdirSync(langDir)) {
|
|
42
|
+
const clsDir = resolve(langDir, cls);
|
|
43
|
+
if (!statSync(clsDir).isDirectory()) continue;
|
|
44
|
+
for (const file of readdirSync(clsDir)) {
|
|
45
|
+
if (!file.endsWith('.md')) continue;
|
|
46
|
+
out.push(resolve(clsDir, file));
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return out.sort();
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function loadExpectedRanges() {
|
|
54
|
+
const raw = readFileSync(EXPECTED_RANGES_PATH, 'utf8');
|
|
55
|
+
const ranges = JSON.parse(raw);
|
|
56
|
+
if (ranges?.schemaVersion !== 1 || !ranges?.metrics || typeof ranges.metrics !== 'object') {
|
|
57
|
+
throw new Error(`${EXPECTED_RANGES_PATH}: expected schemaVersion=1 and metrics object`);
|
|
58
|
+
}
|
|
59
|
+
return ranges.metrics;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function emptyMetrics() {
|
|
63
|
+
return { tp: 0, fp: 0, fn: 0, tn: 0, total: 0 };
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function updateMetrics(m, predicted, expected) {
|
|
67
|
+
m.total++;
|
|
68
|
+
if (predicted && expected) m.tp++;
|
|
69
|
+
else if (predicted && !expected) m.fp++;
|
|
70
|
+
else if (!predicted && expected) m.fn++;
|
|
71
|
+
else m.tn++;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function summarize(m) {
|
|
75
|
+
const accuracy = m.total ? (m.tp + m.tn) / m.total : 0;
|
|
76
|
+
const precision = m.tp + m.fp ? m.tp / (m.tp + m.fp) : 0;
|
|
77
|
+
const recall = m.tp + m.fn ? m.tp / (m.tp + m.fn) : 0;
|
|
78
|
+
const f1 = precision + recall ? (2 * precision * recall) / (precision + recall) : 0;
|
|
79
|
+
return {
|
|
80
|
+
...m,
|
|
81
|
+
accuracy: round(accuracy),
|
|
82
|
+
precision: round(precision),
|
|
83
|
+
recall: round(recall),
|
|
84
|
+
f1: round(f1),
|
|
85
|
+
n: m.total,
|
|
86
|
+
ci_low: round(wilsonInterval(m.tp + m.tn, m.total).low),
|
|
87
|
+
ci_high: round(wilsonInterval(m.tp + m.tn, m.total).high),
|
|
88
|
+
confidence_method: 'Wilson score interval, 95%',
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function round(n, digits = 3) {
|
|
93
|
+
return Math.round(n * 10 ** digits) / 10 ** digits;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function wilsonInterval(successes, n, z = 1.959963984540054) {
|
|
97
|
+
if (!n) return { low: 0, high: 0 };
|
|
98
|
+
const phat = successes / n;
|
|
99
|
+
const denom = 1 + (z ** 2) / n;
|
|
100
|
+
const center = (phat + (z ** 2) / (2 * n)) / denom;
|
|
101
|
+
const margin = (z * Math.sqrt((phat * (1 - phat) + (z ** 2) / (4 * n)) / n)) / denom;
|
|
102
|
+
return {
|
|
103
|
+
low: Math.max(0, center - margin),
|
|
104
|
+
high: Math.min(1, center + margin),
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function detectorHot(result) {
|
|
109
|
+
return {
|
|
110
|
+
burstiness: result.paragraphs.some((p) => p.burstiness?.band === 'low'),
|
|
111
|
+
mattr: result.paragraphs.some((p) => p.mattr?.band === 'low'),
|
|
112
|
+
lexicon: result.paragraphs.some((p) => p.lexicon?.hot),
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function emptyDetectorMetrics() {
|
|
117
|
+
return {
|
|
118
|
+
burstiness: emptyMetrics(),
|
|
119
|
+
mattr: emptyMetrics(),
|
|
120
|
+
lexicon: emptyMetrics(),
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function validateExpectedMetrics(path, expected = {}, observed = {}) {
|
|
125
|
+
const failures = [];
|
|
126
|
+
if (expected.cv_band && observed.cv_band !== expected.cv_band) {
|
|
127
|
+
failures.push(`cv_band expected ${expected.cv_band}, got ${observed.cv_band}`);
|
|
128
|
+
}
|
|
129
|
+
if (expected.mattr_band && observed.mattr_band !== expected.mattr_band) {
|
|
130
|
+
failures.push(`mattr_band expected ${expected.mattr_band}, got ${observed.mattr_band}`);
|
|
131
|
+
}
|
|
132
|
+
if (typeof expected.lexicon_density_min === 'number' && observed.lexicon_density < expected.lexicon_density_min) {
|
|
133
|
+
failures.push(`lexicon_density expected >= ${expected.lexicon_density_min}, got ${observed.lexicon_density}`);
|
|
134
|
+
}
|
|
135
|
+
if (typeof expected.lexicon_density_max === 'number' && observed.lexicon_density > expected.lexicon_density_max) {
|
|
136
|
+
failures.push(`lexicon_density expected <= ${expected.lexicon_density_max}, got ${observed.lexicon_density}`);
|
|
137
|
+
}
|
|
138
|
+
if (Array.isArray(expected.cv_range) && !inRange(observed.cv, expected.cv_range)) {
|
|
139
|
+
failures.push(`cv expected ${formatRange(expected.cv_range)}, got ${observed.cv}`);
|
|
140
|
+
}
|
|
141
|
+
if (Array.isArray(expected.mattr_range) && !inRange(observed.mattr, expected.mattr_range)) {
|
|
142
|
+
failures.push(`mattr expected ${formatRange(expected.mattr_range)}, got ${observed.mattr}`);
|
|
143
|
+
}
|
|
144
|
+
if (Array.isArray(expected.lexicon_density_range) && !inRange(observed.lexicon_density, expected.lexicon_density_range)) {
|
|
145
|
+
failures.push(`lexicon_density expected ${formatRange(expected.lexicon_density_range)}, got ${observed.lexicon_density}`);
|
|
146
|
+
}
|
|
147
|
+
if (expected.detectors) {
|
|
148
|
+
for (const [name, expectedHot] of Object.entries(expected.detectors)) {
|
|
149
|
+
if (observed.detectors?.[name] !== expectedHot) {
|
|
150
|
+
failures.push(`detector.${name} expected ${expectedHot}, got ${observed.detectors?.[name]}`);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
if (typeof expected.predicted_hot === 'boolean' && observed.predicted_hot !== expected.predicted_hot) {
|
|
155
|
+
failures.push(`predicted_hot expected ${expected.predicted_hot}, got ${observed.predicted_hot}`);
|
|
156
|
+
}
|
|
157
|
+
if (failures.length) {
|
|
158
|
+
throw new Error(`${path}: expected_metrics regression failed: ${failures.join('; ')}`);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function inRange(value, [min, max]) {
|
|
163
|
+
return typeof value === 'number' && value >= min && value <= max;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
function formatRange([min, max]) {
|
|
167
|
+
return `[${min}, ${max}]`;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function main() {
|
|
171
|
+
const quiet = process.argv.includes('--quiet');
|
|
172
|
+
const fixtures = listFixtures();
|
|
173
|
+
const expectedRanges = loadExpectedRanges();
|
|
174
|
+
|
|
175
|
+
if (fixtures.length === 0) {
|
|
176
|
+
console.error('No fixtures found under', FIXTURES_ROOT);
|
|
177
|
+
process.exit(2);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
const lexicons = {};
|
|
181
|
+
const perLanguage = {};
|
|
182
|
+
const byDetector = {};
|
|
183
|
+
const fixtureLog = [];
|
|
184
|
+
|
|
185
|
+
for (const path of fixtures) {
|
|
186
|
+
const { meta, body } = parseFixture(path);
|
|
187
|
+
const lang = meta.language;
|
|
188
|
+
if (typeof meta.expected_hot !== 'boolean') {
|
|
189
|
+
throw new Error(
|
|
190
|
+
`${path}: \`expected_hot\` must be a literal boolean (got ${typeof meta.expected_hot}: ${JSON.stringify(meta.expected_hot)})`
|
|
191
|
+
);
|
|
192
|
+
}
|
|
193
|
+
if (!lexicons[lang]) lexicons[lang] = loadLexicon(lang, REPO_ROOT);
|
|
194
|
+
if (!perLanguage[lang]) perLanguage[lang] = emptyMetrics();
|
|
195
|
+
if (!byDetector[lang]) byDetector[lang] = emptyDetectorMetrics();
|
|
196
|
+
|
|
197
|
+
const result = analyzeText(body, {
|
|
198
|
+
lang,
|
|
199
|
+
lexicon: lexicons[lang],
|
|
200
|
+
});
|
|
201
|
+
const predicted = result.hot;
|
|
202
|
+
const expected = meta.expected_hot;
|
|
203
|
+
updateMetrics(perLanguage[lang], predicted, expected);
|
|
204
|
+
const detectors = detectorHot(result);
|
|
205
|
+
for (const [name, hot] of Object.entries(detectors)) {
|
|
206
|
+
updateMetrics(byDetector[lang][name], hot, expected);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
const p = result.paragraphs[0] || {};
|
|
210
|
+
const observed = {
|
|
211
|
+
cv: round(p.burstiness?.cv ?? 0),
|
|
212
|
+
cv_band: p.burstiness?.band,
|
|
213
|
+
mattr: round(p.mattr?.value ?? 0),
|
|
214
|
+
mattr_band: p.mattr?.band,
|
|
215
|
+
lexicon_density: round(p.lexicon?.density ?? 0),
|
|
216
|
+
lexicon_hits: p.lexicon?.hits ?? [],
|
|
217
|
+
};
|
|
218
|
+
const pinned = expectedRanges[meta.fixture_id];
|
|
219
|
+
if (!pinned) {
|
|
220
|
+
throw new Error(
|
|
221
|
+
`${path}: missing benchmark regression range. Run node scripts/update-benchmark-ranges.mjs after reviewing the fixture.`
|
|
222
|
+
);
|
|
223
|
+
}
|
|
224
|
+
observed.detectors = detectors;
|
|
225
|
+
observed.predicted_hot = predicted;
|
|
226
|
+
validateExpectedMetrics(path, pinned, observed);
|
|
227
|
+
if (meta.expected_metrics) validateExpectedMetrics(path, meta.expected_metrics, observed);
|
|
228
|
+
fixtureLog.push({
|
|
229
|
+
fixture_id: meta.fixture_id,
|
|
230
|
+
lang,
|
|
231
|
+
class: meta.class,
|
|
232
|
+
expected_hot: expected,
|
|
233
|
+
predicted_hot: predicted,
|
|
234
|
+
correct: predicted === expected,
|
|
235
|
+
detectors,
|
|
236
|
+
...observed,
|
|
237
|
+
expected_metrics: meta.expected_metrics ?? null,
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
const summary = {};
|
|
242
|
+
let totalCorrect = 0;
|
|
243
|
+
let totalCount = 0;
|
|
244
|
+
for (const [lang, m] of Object.entries(perLanguage)) {
|
|
245
|
+
summary[lang] = summarize(m);
|
|
246
|
+
summary[lang].byDetector = Object.fromEntries(
|
|
247
|
+
Object.entries(byDetector[lang]).map(([name, metrics]) => [name, summarize(metrics)])
|
|
248
|
+
);
|
|
249
|
+
totalCorrect += m.tp + m.tn;
|
|
250
|
+
totalCount += m.total;
|
|
251
|
+
}
|
|
252
|
+
const overallAccuracy = totalCount ? totalCorrect / totalCount : 0;
|
|
253
|
+
const overallCi = wilsonInterval(totalCorrect, totalCount);
|
|
254
|
+
|
|
255
|
+
const results = {
|
|
256
|
+
schemaVersion: 2,
|
|
257
|
+
fixtureSchemaVersion: FIXTURE_SCHEMA_VERSION,
|
|
258
|
+
nodeVersion: process.version,
|
|
259
|
+
generatedAt: new Date().toISOString(),
|
|
260
|
+
fixtureCount: fixtureLog.length,
|
|
261
|
+
overallAccuracy: round(overallAccuracy),
|
|
262
|
+
overall: {
|
|
263
|
+
accuracy: round(overallAccuracy),
|
|
264
|
+
n: totalCount,
|
|
265
|
+
ci_low: round(overallCi.low),
|
|
266
|
+
ci_high: round(overallCi.high),
|
|
267
|
+
confidence_method: 'Wilson score interval, 95%',
|
|
268
|
+
},
|
|
269
|
+
perLanguage: summary,
|
|
270
|
+
fixtures: fixtureLog,
|
|
271
|
+
};
|
|
272
|
+
|
|
273
|
+
writeFileSync(RESULTS_PATH, JSON.stringify(results, null, 2) + '\n');
|
|
274
|
+
|
|
275
|
+
const wrong = fixtureLog.filter((f) => !f.correct);
|
|
276
|
+
|
|
277
|
+
if (!quiet) {
|
|
278
|
+
console.log(`# Quality benchmark — ${fixtureLog.length} fixtures`);
|
|
279
|
+
console.log(`Overall accuracy: ${(overallAccuracy * 100).toFixed(1)}%`);
|
|
280
|
+
console.log();
|
|
281
|
+
console.log('| lang | n | accuracy | precision | recall | f1 | TP | FP | FN | TN |');
|
|
282
|
+
console.log('|------|---|----------|-----------|--------|----|----|----|----|----|');
|
|
283
|
+
for (const [lang, s] of Object.entries(summary)) {
|
|
284
|
+
console.log(
|
|
285
|
+
`| ${lang} | ${s.total} | ${(s.accuracy * 100).toFixed(1)}% | ${(s.precision * 100).toFixed(1)}% | ${(s.recall * 100).toFixed(1)}% | ${s.f1.toFixed(2)} | ${s.tp} | ${s.fp} | ${s.fn} | ${s.tn} |`
|
|
286
|
+
);
|
|
287
|
+
}
|
|
288
|
+
console.log();
|
|
289
|
+
if (wrong.length > 0) {
|
|
290
|
+
console.log(`Misclassified (${wrong.length}):`);
|
|
291
|
+
for (const f of wrong) {
|
|
292
|
+
console.log(
|
|
293
|
+
` ${f.fixture_id} (${f.class}) → predicted=${f.predicted_hot}, expected=${f.expected_hot} | cv=${f.cv} ${f.cv_band}, mattr=${f.mattr} ${f.mattr_band}, lex=${f.lexicon_density}/1000`
|
|
294
|
+
);
|
|
295
|
+
}
|
|
296
|
+
} else {
|
|
297
|
+
console.log('All fixtures classified correctly.');
|
|
298
|
+
}
|
|
299
|
+
console.log(`\nFull log: ${RESULTS_PATH}`);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Non-zero exit on any misclassification so CI catches regressions even in --quiet mode.
|
|
303
|
+
if (wrong.length > 0) process.exitCode = 1;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
main();
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schemaVersion": 1,
|
|
3
|
+
"runId": "manual-example-2026-05-20",
|
|
4
|
+
"collectedAt": "2026-05-20",
|
|
5
|
+
"note": "Example only. Replace with manually collected third-party detector labels before drawing conclusions.",
|
|
6
|
+
"detectors": [
|
|
7
|
+
{
|
|
8
|
+
"id": "example-third-party",
|
|
9
|
+
"name": "Example Third-Party Detector",
|
|
10
|
+
"kind": "manual-third-party",
|
|
11
|
+
"scoreScale": "0..1",
|
|
12
|
+
"threshold": 0.5
|
|
13
|
+
}
|
|
14
|
+
],
|
|
15
|
+
"results": [
|
|
16
|
+
{
|
|
17
|
+
"fixture_id": "en-ai-01",
|
|
18
|
+
"detector": "example-third-party",
|
|
19
|
+
"label": "hot",
|
|
20
|
+
"score": 0.87,
|
|
21
|
+
"notes": "Example row; do not treat as real vendor evidence."
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"fixture_id": "en-nat-01",
|
|
25
|
+
"detector": "example-third-party",
|
|
26
|
+
"label": "cold",
|
|
27
|
+
"score": 0.12,
|
|
28
|
+
"notes": "Example row; do not treat as real vendor evidence."
|
|
29
|
+
}
|
|
30
|
+
]
|
|
31
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Deterministic dogfood guard for public docs.
|
|
3
|
+
//
|
|
4
|
+
// This deliberately avoids live LLM calls in CI. It uses patina's in-tree
|
|
5
|
+
// stylometry/lexicon analyzer and reports the percentage of prose paragraphs
|
|
6
|
+
// that trip a hot signal. The threshold is a regression guard, not an
|
|
7
|
+
// authorship verdict.
|
|
8
|
+
|
|
9
|
+
import { readFileSync } from 'node:fs';
|
|
10
|
+
import { dirname, resolve } from 'node:path';
|
|
11
|
+
import { fileURLToPath } from 'node:url';
|
|
12
|
+
|
|
13
|
+
import { scoreText } from '../../scripts/prose-score.mjs';
|
|
14
|
+
|
|
15
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
16
|
+
const REPO_ROOT = resolve(__dirname, '../..');
|
|
17
|
+
const THRESHOLD = 30;
|
|
18
|
+
const TARGETS = [
|
|
19
|
+
{ file: 'README.md', lang: 'en' },
|
|
20
|
+
{ file: 'README_KR.md', lang: 'ko' },
|
|
21
|
+
{ file: 'README_ZH.md', lang: 'zh' },
|
|
22
|
+
{ file: 'README_JA.md', lang: 'ja' },
|
|
23
|
+
{ file: 'docs/FAQ.md', lang: 'en' },
|
|
24
|
+
{ file: 'SKILL.md', lang: 'ko' },
|
|
25
|
+
];
|
|
26
|
+
|
|
27
|
+
function scoreFile({ file, lang }) {
|
|
28
|
+
const raw = readFileSync(resolve(REPO_ROOT, file), 'utf8');
|
|
29
|
+
return scoreText(raw, { file, lang, gate: THRESHOLD, repoRoot: REPO_ROOT });
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const rows = TARGETS.map(scoreFile);
|
|
33
|
+
console.log('# Dogfood docs score');
|
|
34
|
+
console.log('| file | lang | paragraphs | hot | score | threshold |');
|
|
35
|
+
console.log('|---|---|---:|---:|---:|---:|');
|
|
36
|
+
for (const r of rows) {
|
|
37
|
+
console.log(`| ${r.file} | ${r.lang} | ${r.paragraphCount} | ${r.hotCount} | ${r.score.toFixed(1)} | ${THRESHOLD} |`);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const failures = rows.filter((r) => r.score > THRESHOLD);
|
|
41
|
+
if (failures.length) {
|
|
42
|
+
console.error(`\nDogfood score exceeded ${THRESHOLD}: ${failures.map((f) => `${f.file}=${f.score.toFixed(1)}`).join(', ')}`);
|
|
43
|
+
process.exitCode = 1;
|
|
44
|
+
}
|