patina-cli 3.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.patina.default.yaml +211 -0
- package/CHANGELOG.md +265 -0
- package/LICENSE +21 -0
- package/README.md +319 -0
- package/README_JA.md +254 -0
- package/README_KR.md +253 -0
- package/README_ZH.md +254 -0
- package/SKILL-MAX.md +455 -0
- package/SKILL.md +730 -0
- package/assets/brand/patina-icon.svg +9 -0
- package/assets/brand/patina-logo.svg +17 -0
- package/assets/social/patina-before-after.svg +46 -0
- package/assets/social/patina-og.svg +31 -0
- package/bin/patina.js +9 -0
- package/core/scoring.md +657 -0
- package/core/standalone-prompt.md +364 -0
- package/core/stylometry.md +754 -0
- package/core/voice.md +163 -0
- package/docs/AUTHENTICATION.md +105 -0
- package/docs/AUTHENTICATION_KR.md +105 -0
- package/docs/BRANDING.md +37 -0
- package/docs/CLI.md +80 -0
- package/docs/COMPARISON.md +38 -0
- package/docs/COOKBOOK.md +173 -0
- package/docs/DEMO.md +40 -0
- package/docs/ETHICS.md +27 -0
- package/docs/EXAMPLES.md +130 -0
- package/docs/EXAMPLES_KR.md +130 -0
- package/docs/EXIT-CODES.md +25 -0
- package/docs/FAQ.md +67 -0
- package/docs/FAQ_KR.md +65 -0
- package/docs/FLAG-PARITY.md +53 -0
- package/docs/GLOSSARY.md +123 -0
- package/docs/PATTERNS-EN.md +718 -0
- package/docs/PATTERNS-JA.md +706 -0
- package/docs/PATTERNS-KO.md +707 -0
- package/docs/PATTERNS-ZH.md +706 -0
- package/docs/PATTERNS.md +22 -0
- package/docs/ROADMAP.md +315 -0
- package/docs/audits/2026-05-deep-research.md +290 -0
- package/docs/benchmarks/detector-comparison.json +442 -0
- package/docs/benchmarks/detector-comparison.md +65 -0
- package/docs/benchmarks/latest.json +988 -0
- package/docs/benchmarks/latest.md +112 -0
- package/docs/integrations/docker.md +19 -0
- package/docs/integrations/github-action.md +59 -0
- package/docs/integrations/pre-commit.md +77 -0
- package/docs/integrations/release.md +43 -0
- package/docs/internal/HARNESS.md +14 -0
- package/docs/internal/README.md +14 -0
- package/docs/internal/WARP.md +23 -0
- package/docs/research/2025-rebaseline-plan.md +89 -0
- package/docs/research/ai-human-metrics.md +380 -0
- package/docs/social/gstack-cardnews.html +236 -0
- package/docs/social/gstack-cardnews.md +88 -0
- package/docs/social/gstack-thread.md +106 -0
- package/docs/social/patina-launch-copy.md +227 -0
- package/docs/superpowers/specs/2026-04-03-meaning-preservation-design.md +299 -0
- package/lexicon/ai-en.md +162 -0
- package/lexicon/ai-ko.md +159 -0
- package/package.json +100 -0
- package/patina-max/SKILL.md +523 -0
- package/patina-max/composite.py +457 -0
- package/patterns/en-communication.md +89 -0
- package/patterns/en-content.md +133 -0
- package/patterns/en-filler.md +113 -0
- package/patterns/en-language.md +163 -0
- package/patterns/en-structure.md +173 -0
- package/patterns/en-style.md +139 -0
- package/patterns/en-viral-hook.md +211 -0
- package/patterns/ja-communication.md +101 -0
- package/patterns/ja-content.md +153 -0
- package/patterns/ja-filler.md +123 -0
- package/patterns/ja-language.md +190 -0
- package/patterns/ja-structure.md +142 -0
- package/patterns/ja-style.md +147 -0
- package/patterns/ja-viral-hook.md +216 -0
- package/patterns/ko-communication.md +98 -0
- package/patterns/ko-content.md +154 -0
- package/patterns/ko-filler.md +105 -0
- package/patterns/ko-language.md +182 -0
- package/patterns/ko-structure.md +147 -0
- package/patterns/ko-style.md +146 -0
- package/patterns/ko-viral-hook.md +211 -0
- package/patterns/zh-communication.md +101 -0
- package/patterns/zh-content.md +153 -0
- package/patterns/zh-filler.md +118 -0
- package/patterns/zh-language.md +173 -0
- package/patterns/zh-structure.md +145 -0
- package/patterns/zh-style.md +159 -0
- package/patterns/zh-viral-hook.md +216 -0
- package/profiles/academic.md +53 -0
- package/profiles/blog.md +81 -0
- package/profiles/casual-conversation.md +105 -0
- package/profiles/code-comment.md +104 -0
- package/profiles/commit-message.md +99 -0
- package/profiles/default.md +62 -0
- package/profiles/email.md +52 -0
- package/profiles/formal.md +98 -0
- package/profiles/instructional.md +80 -0
- package/profiles/legal.md +57 -0
- package/profiles/marketing.md +56 -0
- package/profiles/medical.md +53 -0
- package/profiles/narrative.md +79 -0
- package/profiles/release-notes.md +98 -0
- package/profiles/social.md +56 -0
- package/profiles/technical.md +53 -0
- package/scripts/benchmark-report.mjs +252 -0
- package/scripts/check-release-metadata.mjs +48 -0
- package/scripts/detector-comparison.mjs +267 -0
- package/scripts/lint.mjs +40 -0
- package/scripts/precommit-score.mjs +31 -0
- package/scripts/prose-score.mjs +186 -0
- package/scripts/update-benchmark-ranges.mjs +108 -0
- package/src/api.js +330 -0
- package/src/auth.js +105 -0
- package/src/backends/claude-cli.js +112 -0
- package/src/backends/codex-cli.js +121 -0
- package/src/backends/contract.js +21 -0
- package/src/backends/gemini-cli.js +135 -0
- package/src/backends/index.js +159 -0
- package/src/cache.js +106 -0
- package/src/cli.js +1280 -0
- package/src/commands/doctor.js +229 -0
- package/src/commands/init.js +208 -0
- package/src/config.js +126 -0
- package/src/errors.js +53 -0
- package/src/features/index.js +96 -0
- package/src/features/lexicon.js +90 -0
- package/src/features/segment.js +49 -0
- package/src/features/stylometry.js +50 -0
- package/src/loader.js +103 -0
- package/src/logger.js +70 -0
- package/src/manifest.js +162 -0
- package/src/max-mode.js +207 -0
- package/src/ouroboros.js +233 -0
- package/src/output.js +480 -0
- package/src/prompt-builder.js +409 -0
- package/src/providers.js +100 -0
- package/src/scoring.js +531 -0
- package/src/security.js +133 -0
- package/tests/fixtures/suspect-zones/en/ai/en-ai-01.md +16 -0
- package/tests/fixtures/suspect-zones/en/ai/en-ai-02.md +16 -0
- package/tests/fixtures/suspect-zones/en/ai/en-ai-03.md +17 -0
- package/tests/fixtures/suspect-zones/en/ai/en-ai-04.md +15 -0
- package/tests/fixtures/suspect-zones/en/ai/en-ai-05.md +16 -0
- package/tests/fixtures/suspect-zones/en/ai/en-ai-06-chat-register.md +16 -0
- package/tests/fixtures/suspect-zones/en/natural/en-nat-01.md +15 -0
- package/tests/fixtures/suspect-zones/en/natural/en-nat-02.md +15 -0
- package/tests/fixtures/suspect-zones/en/natural/en-nat-03.md +15 -0
- package/tests/fixtures/suspect-zones/en/natural/en-nat-04.md +15 -0
- package/tests/fixtures/suspect-zones/en/natural/en-nat-05.md +15 -0
- package/tests/fixtures/suspect-zones/expected-ranges.json +939 -0
- package/tests/fixtures/suspect-zones/ja/ai/ja-ai-01.md +11 -0
- package/tests/fixtures/suspect-zones/ja/ai/ja-ai-02.md +11 -0
- package/tests/fixtures/suspect-zones/ja/ai/ja-ai-03.md +11 -0
- package/tests/fixtures/suspect-zones/ja/natural/ja-nat-01.md +11 -0
- package/tests/fixtures/suspect-zones/ja/natural/ja-nat-02.md +11 -0
- package/tests/fixtures/suspect-zones/ja/natural/ja-nat-03.md +11 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-01.md +14 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-02.md +16 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-03.md +15 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-04.md +15 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-05.md +16 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-06-chat-register.md +16 -0
- package/tests/fixtures/suspect-zones/ko/natural/ko-nat-01.md +15 -0
- package/tests/fixtures/suspect-zones/ko/natural/ko-nat-02.md +15 -0
- package/tests/fixtures/suspect-zones/ko/natural/ko-nat-03.md +15 -0
- package/tests/fixtures/suspect-zones/ko/natural/ko-nat-04.md +14 -0
- package/tests/fixtures/suspect-zones/ko/natural/ko-nat-05.md +15 -0
- package/tests/fixtures/suspect-zones/zh/ai/zh-ai-01.md +11 -0
- package/tests/fixtures/suspect-zones/zh/ai/zh-ai-02.md +11 -0
- package/tests/fixtures/suspect-zones/zh/ai/zh-ai-03.md +11 -0
- package/tests/fixtures/suspect-zones/zh/natural/zh-nat-01.md +11 -0
- package/tests/fixtures/suspect-zones/zh/natural/zh-nat-02.md +11 -0
- package/tests/fixtures/suspect-zones/zh/natural/zh-nat-03.md +11 -0
- package/tests/quality/README.md +121 -0
- package/tests/quality/benchmark.mjs +306 -0
- package/tests/quality/detectors.manual.example.json +31 -0
- package/tests/quality/dogfood.mjs +44 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
---
|
|
2
|
+
profile: social
|
|
3
|
+
name: SNS/소셜미디어 프로필
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
scope: 트위터/X, 인스타그램, 스레드, 블루스카이, 카카오스토리, 페이스북
|
|
6
|
+
voice-overrides:
|
|
7
|
+
first-person: amplify # 1인칭 적극 사용
|
|
8
|
+
opinions: amplify # 의견 강하게
|
|
9
|
+
rhythm-variation: amplify # 리듬 변화 극대화
|
|
10
|
+
humor: amplify # 유머 적극 허용
|
|
11
|
+
messiness: amplify # 불완전 구조, 파편 문장 적극 허용
|
|
12
|
+
concrete-emotions: amplify # 구체적 감정 표현 강화
|
|
13
|
+
pattern-overrides:
|
|
14
|
+
ko:
|
|
15
|
+
17: suppress # 이모지 — SNS에서는 표준
|
|
16
|
+
22: reduce # 채움 표현 — 일부 캐주얼 필러 허용
|
|
17
|
+
14: suppress # 볼드체 — SNS에서는 비해당 (플랫폼 지원 불일치)
|
|
18
|
+
21: reduce # 아첨 — 약간의 친근한 표현은 허용
|
|
19
|
+
en:
|
|
20
|
+
17: suppress # Emojis — standard in social media
|
|
21
|
+
22: reduce # Filler — some casual filler is natural
|
|
22
|
+
14: suppress # Boldface — not relevant for most social platforms
|
|
23
|
+
21: reduce # Sycophantic — some friendly tone allowed
|
|
24
|
+
zh:
|
|
25
|
+
17: suppress # 表情符号 — SNS 표준
|
|
26
|
+
22: reduce # 填充表达 — 캐주얼 필러 허용
|
|
27
|
+
14: suppress # 加粗 — 비해당
|
|
28
|
+
ja:
|
|
29
|
+
17: suppress # 絵文字 — SNS 표준
|
|
30
|
+
22: reduce # フィラー — 캐주얼 필러 허용
|
|
31
|
+
14: suppress # 太字 — 비해당
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
# SNS/소셜미디어 프로필
|
|
35
|
+
|
|
36
|
+
소셜미디어 특유의 캐주얼한 어조를 유지하면서 AI 패턴을 제거한다. 이모지, 구어체, 파편 문장을 허용하되, 여전히 AI 티가 나는 구조적 패턴은 교정한다.
|
|
37
|
+
|
|
38
|
+
## 범위
|
|
39
|
+
|
|
40
|
+
트위터/X, 인스타그램, 스레드, 블루스카이, 카카오스토리, 페이스북 포스트. 기업 공식 계정은 `default` 프로필이 더 적합.
|
|
41
|
+
|
|
42
|
+
## 어조 지침
|
|
43
|
+
|
|
44
|
+
- **짧게 쓴다.** SNS의 핵심은 간결함. 한두 문장이면 충분할 때가 많다.
|
|
45
|
+
- **구어체가 기본이다.** "~거든", "~잖아", "ㅋㅋ", "ㅠㅠ" 등 자연스러운 SNS 언어 허용.
|
|
46
|
+
- **이모지는 자유롭게.** 교정 대상이 아니다.
|
|
47
|
+
- **의견을 과감하게.** "이거 진짜 별로다", "개꿀팁" — 태도가 뚜렷해야 사람다움.
|
|
48
|
+
- **파편 문장 OK.** "진짜로.", "이게 맞나.", "아 글쎄." — SNS에서는 자연스럽다.
|
|
49
|
+
|
|
50
|
+
## 적극 교정 대상
|
|
51
|
+
|
|
52
|
+
- **AI 고빈도 어휘 (#7):** "다양한", "혁신적인" 등은 SNS에서 특히 어색. 구어로 교체.
|
|
53
|
+
- **과도한 중요성 부여 (#1):** SNS에서 "획기적인 성과"는 풍자가 아닌 이상 부적절.
|
|
54
|
+
- **구조적 반복 (#25):** 모든 트윗이 같은 구조면 봇처럼 보인다.
|
|
55
|
+
- **과제와 전망 공식 (#6):** "도전과 기회가 공존" 같은 표현은 SNS에서 극도로 부자연스럽다.
|
|
56
|
+
- **챗봇 표현 (#19):** "도움이 되셨으면 좋겠습니다"는 SNS에서도 여전히 교정 대상.
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
---
|
|
2
|
+
profile: technical
|
|
3
|
+
name: 기술 문서/API 문서 프로필
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
scope: 기술 문서, API 문서, README, 가이드, 튜토리얼, 사양서
|
|
6
|
+
voice-overrides:
|
|
7
|
+
first-person: suppress # 1인칭 억제
|
|
8
|
+
opinions: suppress # 의견 억제, 사실 기반 서술
|
|
9
|
+
rhythm-variation: normal # 리듬 변화 유지
|
|
10
|
+
humor: suppress # 유머 억제
|
|
11
|
+
messiness: suppress # 불완전 구조 억제
|
|
12
|
+
concrete-emotions: suppress # 감정 표현 억제
|
|
13
|
+
pattern-overrides:
|
|
14
|
+
ko:
|
|
15
|
+
14: suppress # 볼드체 — 기술 문서에서 키워드/파라미터 볼드 표준
|
|
16
|
+
15: suppress # 인라인 헤더 — API 문서의 표준 형식
|
|
17
|
+
27: reduce # 수동태 — 사양서에서 허용
|
|
18
|
+
en:
|
|
19
|
+
14: suppress # Boldface — standard in technical docs for keywords
|
|
20
|
+
15: suppress # Inline-header lists — standard API doc format
|
|
21
|
+
18: suppress # Curly quotes — actively fix in code/config contexts
|
|
22
|
+
26: reduce # Passive — acceptable in specifications
|
|
23
|
+
zh:
|
|
24
|
+
14: suppress # 加粗 — 기술 문서 표준
|
|
25
|
+
15: suppress # 内联标题列表 — API 문서 표준
|
|
26
|
+
27: reduce # 被字句 — 사양서에서 허용
|
|
27
|
+
ja:
|
|
28
|
+
14: suppress # 太字 — 기술 문서 표준
|
|
29
|
+
15: suppress # インラインヘッダー — API 문서 표준
|
|
30
|
+
16: suppress # 敬語 — 기술 문서에서는 비해당 (である調 기본)
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
# 기술 문서/API 문서 프로필
|
|
34
|
+
|
|
35
|
+
기술 문서의 형식적 관행을 존중하면서 AI 패턴을 제거한다. 볼드 키워드, 파라미터 테이블, 인라인 헤더 리스트 등 기술 문서의 표준 요소는 건드리지 않는다.
|
|
36
|
+
|
|
37
|
+
## 범위
|
|
38
|
+
|
|
39
|
+
API 문서, README, 가이드, 튜토리얼, 기술 사양서. 기술 블로그 글은 `blog` 프로필이 더 적합.
|
|
40
|
+
|
|
41
|
+
## 어조 지침
|
|
42
|
+
|
|
43
|
+
- **명확하고 직접적으로 쓴다.** 기술 문서의 목표는 정확한 정보 전달.
|
|
44
|
+
- **1인칭은 쓰지 않는다.** "Set the variable" (O), "I set the variable" (X).
|
|
45
|
+
- **의견이나 유머는 넣지 않는다.** 사실과 절차만.
|
|
46
|
+
- **일관된 용어를 유지한다.** 같은 개념을 같은 단어로 부른다 (유의어 순환 금지).
|
|
47
|
+
|
|
48
|
+
## 적극 교정 대상
|
|
49
|
+
|
|
50
|
+
- **AI 고빈도 어휘 (#7):** "활용하여", "체계적으로" 등 → "사용하여", "순서대로" 등으로 교체.
|
|
51
|
+
- **채움 표현 (#22):** "주목할 만한 점은 ~라는 것이다" → 직접 서술.
|
|
52
|
+
- **과도한 중요성 부여 (#1):** 기술 문서에서 "획기적인 기능"은 부적절. 기능 설명으로 대체.
|
|
53
|
+
- **유의어 순환 (#11):** 기술 문서에서 같은 것을 다른 이름으로 부르면 혼란. 적극 교정.
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Generate a publishable benchmark report from the deterministic quality suite.
|
|
3
|
+
//
|
|
4
|
+
// Default behavior reruns tests/quality/benchmark.mjs first so docs/benchmarks/*
|
|
5
|
+
// reflects the current fixture set. Use --no-run to render from an existing
|
|
6
|
+
// tests/quality/results.json file.
|
|
7
|
+
|
|
8
|
+
import { spawnSync } from 'node:child_process';
|
|
9
|
+
import { mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
|
10
|
+
import { dirname, relative, resolve } from 'node:path';
|
|
11
|
+
import { fileURLToPath } from 'node:url';
|
|
12
|
+
|
|
13
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
14
|
+
const REPO_ROOT = resolve(__dirname, '..');
|
|
15
|
+
const RESULTS_PATH = resolve(REPO_ROOT, 'tests/quality/results.json');
|
|
16
|
+
const REPORT_DIR = resolve(REPO_ROOT, 'docs/benchmarks');
|
|
17
|
+
const JSON_PATH = resolve(REPORT_DIR, 'latest.json');
|
|
18
|
+
const MARKDOWN_PATH = resolve(REPORT_DIR, 'latest.md');
|
|
19
|
+
|
|
20
|
+
const runBenchmarkFirst = !process.argv.includes('--no-run');
|
|
21
|
+
const benchmarkCommand = ['node', 'tests/quality/benchmark.mjs', '--quiet'];
|
|
22
|
+
|
|
23
|
+
function runBenchmark() {
|
|
24
|
+
const result = spawnSync(process.execPath, ['tests/quality/benchmark.mjs', '--quiet'], {
|
|
25
|
+
cwd: REPO_ROOT,
|
|
26
|
+
stdio: 'inherit',
|
|
27
|
+
});
|
|
28
|
+
if (result.error) throw result.error;
|
|
29
|
+
return result.status ?? 1;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function readResults() {
|
|
33
|
+
try {
|
|
34
|
+
const results = JSON.parse(readFileSync(RESULTS_PATH, 'utf8'));
|
|
35
|
+
validateResultsSchema(results);
|
|
36
|
+
return results;
|
|
37
|
+
} catch (error) {
|
|
38
|
+
throw new Error(
|
|
39
|
+
`Cannot read ${relative(REPO_ROOT, RESULTS_PATH)}. Run npm run benchmark first. ${error.message}`
|
|
40
|
+
);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function validateResultsSchema(results) {
|
|
45
|
+
const missing = [];
|
|
46
|
+
if (results?.schemaVersion !== 2) missing.push('schemaVersion=2');
|
|
47
|
+
if (typeof results?.fixtureSchemaVersion !== 'number') missing.push('fixtureSchemaVersion');
|
|
48
|
+
if (typeof results?.nodeVersion !== 'string') missing.push('nodeVersion');
|
|
49
|
+
if (typeof results?.overall?.ci_low !== 'number') missing.push('overall.ci_low');
|
|
50
|
+
if (typeof results?.overall?.ci_high !== 'number') missing.push('overall.ci_high');
|
|
51
|
+
if (typeof results?.overall?.n !== 'number') missing.push('overall.n');
|
|
52
|
+
for (const [lang, summary] of Object.entries(results?.perLanguage || {})) {
|
|
53
|
+
for (const detector of ['burstiness', 'mattr', 'lexicon']) {
|
|
54
|
+
if (!summary.byDetector?.[detector]) missing.push(`perLanguage.${lang}.byDetector.${detector}`);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
if (missing.length) {
|
|
58
|
+
throw new Error(`Benchmark results schema is stale or invalid; missing ${missing.join(', ')}. Re-run tests/quality/benchmark.mjs.`);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function pct(value) {
|
|
63
|
+
return `${((value ?? 0) * 100).toFixed(1)}%`;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function num(value, digits = 3) {
|
|
67
|
+
return Number(value ?? 0).toFixed(digits).replace(/\.0+$/, '').replace(/(\.\d*?)0+$/, '$1');
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function bool(value) {
|
|
71
|
+
return value ? 'hot' : 'cold';
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function resultMark(value) {
|
|
75
|
+
return value ? '✓' : '✗';
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function cell(value) {
|
|
79
|
+
return String(value ?? '—').replace(/\|/g, '\\|').replace(/\s+/g, ' ').trim() || '—';
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function statusFromResults(results) {
|
|
83
|
+
return (results.fixtures || []).some((f) => !f.correct) ? 1 : 0;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function languageRows(perLanguage = {}) {
|
|
87
|
+
return Object.entries(perLanguage)
|
|
88
|
+
.sort(([a], [b]) => a.localeCompare(b))
|
|
89
|
+
.map(([lang, s]) =>
|
|
90
|
+
`| ${lang} | ${s.total} | ${pct(s.accuracy)} | ${pct(s.ci_low)}–${pct(s.ci_high)} | ${pct(s.precision)} | ${pct(s.recall)} | ${num(s.f1, 2)} | ${s.tp} | ${s.fp} | ${s.fn} | ${s.tn} |`
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function detectorRows(perLanguage = {}) {
|
|
95
|
+
const rows = [];
|
|
96
|
+
for (const [lang, s] of Object.entries(perLanguage).sort(([a], [b]) => a.localeCompare(b))) {
|
|
97
|
+
for (const [detector, d] of Object.entries(s.byDetector || {}).sort(([a], [b]) => a.localeCompare(b))) {
|
|
98
|
+
rows.push(
|
|
99
|
+
`| ${lang} | ${detector} | ${d.total} | ${pct(d.accuracy)} | ${pct(d.ci_low)}–${pct(d.ci_high)} | ${pct(d.precision)} | ${pct(d.recall)} | ${num(d.f1, 2)} | ${d.tp} | ${d.fp} | ${d.fn} | ${d.tn} |`
|
|
100
|
+
);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return rows;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function classRows(fixtures = []) {
|
|
107
|
+
const counts = new Map();
|
|
108
|
+
for (const f of fixtures) {
|
|
109
|
+
const key = `${f.lang}\0${f.class}`;
|
|
110
|
+
counts.set(key, (counts.get(key) || 0) + 1);
|
|
111
|
+
}
|
|
112
|
+
return [...counts.entries()]
|
|
113
|
+
.sort(([a], [b]) => a.localeCompare(b))
|
|
114
|
+
.map(([key, count]) => {
|
|
115
|
+
const [lang, klass] = key.split('\0');
|
|
116
|
+
return `| ${cell(lang)} | ${cell(klass)} | ${count} |`;
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function sampleSizeSummary(fixtures = []) {
|
|
121
|
+
return fixtures.reduce((acc, f) => {
|
|
122
|
+
const lang = f.lang || 'unknown';
|
|
123
|
+
const klass = f.class || 'unknown';
|
|
124
|
+
acc[lang] ||= {};
|
|
125
|
+
acc[lang][klass] = (acc[lang][klass] || 0) + 1;
|
|
126
|
+
return acc;
|
|
127
|
+
}, {});
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function fixtureRows(fixtures = []) {
|
|
131
|
+
return fixtures.map((f) => {
|
|
132
|
+
const hits = cell((f.lexicon_hits || []).slice(0, 4).join(', '));
|
|
133
|
+
return `| ${cell(f.fixture_id)} | ${cell(f.lang)} | ${cell(f.class)} | ${bool(f.expected_hot)} | ${bool(f.predicted_hot)} | ${resultMark(f.correct)} | ${num(f.cv)} ${cell(f.cv_band)} | ${num(f.mattr)} ${cell(f.mattr_band)} | ${num(f.lexicon_density)} | ${hits} |`;
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function misclassificationSection(fixtures = []) {
|
|
138
|
+
const wrong = fixtures.filter((f) => !f.correct);
|
|
139
|
+
if (wrong.length === 0) return 'All fixtures classified correctly.';
|
|
140
|
+
return [
|
|
141
|
+
'| fixture | lang | class | expected | predicted | cv | mattr | lexicon density |',
|
|
142
|
+
'|---|---|---|---|---|---:|---:|---:|',
|
|
143
|
+
...wrong.map(
|
|
144
|
+
(f) =>
|
|
145
|
+
`| ${f.fixture_id} | ${f.lang} | ${f.class} | ${bool(f.expected_hot)} | ${bool(f.predicted_hot)} | ${num(f.cv)} ${f.cv_band || ''} | ${num(f.mattr)} ${f.mattr_band || ''} | ${num(f.lexicon_density)} |`
|
|
146
|
+
),
|
|
147
|
+
].join('\n');
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function renderMarkdown(results, benchmarkStatus) {
|
|
151
|
+
const generatedAt = results.generatedAt || new Date().toISOString();
|
|
152
|
+
const languages = Object.keys(results.perLanguage || {}).sort();
|
|
153
|
+
const languageCount = languages.length;
|
|
154
|
+
const languageList = languages.join(', ');
|
|
155
|
+
const status = benchmarkStatus === 0 ? 'passing' : 'failing';
|
|
156
|
+
const overall = results.overall || {
|
|
157
|
+
accuracy: results.overallAccuracy,
|
|
158
|
+
ci_low: null,
|
|
159
|
+
ci_high: null,
|
|
160
|
+
n: results.fixtureCount,
|
|
161
|
+
confidence_method: 'unavailable',
|
|
162
|
+
};
|
|
163
|
+
|
|
164
|
+
return `# Benchmark Report
|
|
165
|
+
|
|
166
|
+
This is the latest checked-in report for patina's deterministic suspect-zone benchmark.
|
|
167
|
+
|
|
168
|
+
> Scope: this benchmark measures whether patina's stylometry layer flags fixture paragraphs as AI-like editing hotspots. It does **not** prove whether a real document was written by a human or by AI.
|
|
169
|
+
|
|
170
|
+
## Current result
|
|
171
|
+
|
|
172
|
+
- Status: **${status}**
|
|
173
|
+
- Generated at: ${generatedAt}
|
|
174
|
+
- Node: ${results.nodeVersion}
|
|
175
|
+
- Fixture schema: v${results.fixtureSchemaVersion}
|
|
176
|
+
- Fixtures: ${results.fixtureCount}
|
|
177
|
+
- Languages: ${languageCount} (${languageList})
|
|
178
|
+
- Overall accuracy: **${pct(overall.accuracy)}** [${pct(overall.ci_low)}–${pct(overall.ci_high)}] (n=${overall.n}, ${overall.confidence_method})
|
|
179
|
+
- Source fixtures: \`tests/fixtures/suspect-zones/**\`
|
|
180
|
+
- Regression ranges: \`tests/fixtures/suspect-zones/expected-ranges.json\` (refresh with \`npm run benchmark:ranges\`)
|
|
181
|
+
- Reproduce: \`npm run benchmark:report\`
|
|
182
|
+
- Raw JSON: [latest.json](latest.json)
|
|
183
|
+
- Detector comparison harness: [detector-comparison.md](detector-comparison.md)
|
|
184
|
+
- 2025+ re-baseline plan: [docs/research/2025-rebaseline-plan.md](../research/2025-rebaseline-plan.md)
|
|
185
|
+
|
|
186
|
+
## Language breakdown
|
|
187
|
+
|
|
188
|
+
| lang | fixtures | accuracy | 95% CI | precision | recall | f1 | TP | FP | FN | TN |
|
|
189
|
+
|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|
|
|
190
|
+
${languageRows(results.perLanguage).join('\n')}
|
|
191
|
+
|
|
192
|
+
## Detector breakdown
|
|
193
|
+
|
|
194
|
+
| lang | detector | fixtures | accuracy | 95% CI | precision | recall | f1 | TP | FP | FN | TN |
|
|
195
|
+
|---|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|
|
|
196
|
+
${detectorRows(results.perLanguage).join('\n')}
|
|
197
|
+
|
|
198
|
+
## Sample sizes
|
|
199
|
+
|
|
200
|
+
| lang | class | fixtures |
|
|
201
|
+
|---|---|---:|
|
|
202
|
+
${classRows(results.fixtures).join('\n')}
|
|
203
|
+
|
|
204
|
+
## Misclassifications
|
|
205
|
+
|
|
206
|
+
${misclassificationSection(results.fixtures)}
|
|
207
|
+
|
|
208
|
+
## Fixture log
|
|
209
|
+
|
|
210
|
+
| fixture | lang | class | expected | predicted | ok | CV band | MATTR band | lexicon/1k | sample lexicon hits |
|
|
211
|
+
|---|---|---|---|---|---:|---:|---:|---:|---|
|
|
212
|
+
${fixtureRows(results.fixtures).join('\n')}
|
|
213
|
+
|
|
214
|
+
## How to read this
|
|
215
|
+
|
|
216
|
+
- **Hot** means at least one deterministic signal crossed the benchmark threshold: low burstiness CV, low MATTR, or AI-lexicon density.
|
|
217
|
+
- **Cold** means the fixture did not cross those thresholds.
|
|
218
|
+
- The report is meant for regression tracking and contributor discussion, not for authorship accusation.
|
|
219
|
+
- This deterministic corpus is intentionally small (${results.fixtureCount} fixtures across ${languageList}); do not treat 100% fixture accuracy as generalization to new models, genres, or edited AI text.
|
|
220
|
+
- Confidence intervals use Wilson score intervals for the checked-in fixture set; external threshold sweeps and 2025+ model rebaselines are separate research follow-ups tracked in [2025+ Re-baseline Plan](../research/2025-rebaseline-plan.md).
|
|
221
|
+
- Broader methodology notes live in [AI/Human Metrics Research](../research/ai-human-metrics.md) and [Quality Checks](../../tests/quality/README.md).
|
|
222
|
+
`;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
function main() {
|
|
226
|
+
let benchmarkStatus = 0;
|
|
227
|
+
if (runBenchmarkFirst) benchmarkStatus = runBenchmark();
|
|
228
|
+
|
|
229
|
+
const results = readResults();
|
|
230
|
+
if (!runBenchmarkFirst) benchmarkStatus = statusFromResults(results);
|
|
231
|
+
|
|
232
|
+
const report = {
|
|
233
|
+
reportVersion: 2,
|
|
234
|
+
benchmarkCommand: benchmarkCommand.join(' '),
|
|
235
|
+
benchmarkStatus,
|
|
236
|
+
note: 'Deterministic suspect-zone benchmark; not an authorship detector.',
|
|
237
|
+
regressionRanges: 'tests/fixtures/suspect-zones/expected-ranges.json',
|
|
238
|
+
...results,
|
|
239
|
+
sampleSizes: sampleSizeSummary(results.fixtures),
|
|
240
|
+
};
|
|
241
|
+
|
|
242
|
+
mkdirSync(REPORT_DIR, { recursive: true });
|
|
243
|
+
writeFileSync(JSON_PATH, `${JSON.stringify(report, null, 2)}\n`);
|
|
244
|
+
writeFileSync(MARKDOWN_PATH, renderMarkdown(results, benchmarkStatus));
|
|
245
|
+
|
|
246
|
+
console.log(`Wrote ${relative(REPO_ROOT, MARKDOWN_PATH)}`);
|
|
247
|
+
console.log(`Wrote ${relative(REPO_ROOT, JSON_PATH)}`);
|
|
248
|
+
|
|
249
|
+
if (benchmarkStatus !== 0) process.exitCode = benchmarkStatus;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
main();
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
3
|
+
|
|
4
|
+
const pkg = readJson('package.json');
|
|
5
|
+
const version = pkg.version;
|
|
6
|
+
const checks = [];
|
|
7
|
+
|
|
8
|
+
expect(pkg.private === false, 'package.json private must be false');
|
|
9
|
+
expect(pkg.bin?.patina === 'bin/patina.js', 'package.json bin.patina must point to bin/patina.js');
|
|
10
|
+
expect(pkg.bin?.['patina-score'] === 'scripts/precommit-score.mjs', 'package.json bin.patina-score must point to scripts/precommit-score.mjs');
|
|
11
|
+
expect(existsSync('bin/patina.js'), 'bin/patina.js must exist');
|
|
12
|
+
expect(readVersionField('SKILL.md') === version, 'SKILL.md version must match package.json');
|
|
13
|
+
expect(readVersionField('SKILL-MAX.md') === version, 'SKILL-MAX.md version must match package.json');
|
|
14
|
+
expect(readVersionField('patina-max/SKILL.md') === version, 'patina-max/SKILL.md version must match package.json');
|
|
15
|
+
expect(readVersionField('.patina.default.yaml') === version, '.patina.default.yaml version must match package.json');
|
|
16
|
+
expect(readFileSync('README.md', 'utf8').includes(`version: "${version}"`), 'README.md config example version must match package.json');
|
|
17
|
+
expect(new RegExp(`^## ${escapeRegex(version)} — \\d{4}-\\d{2}-\\d{2}`, 'm').test(readFileSync('CHANGELOG.md', 'utf8')), 'CHANGELOG.md must contain a release heading for package.json version');
|
|
18
|
+
|
|
19
|
+
const aliasPkg = readJson('packages/patina-humanizer/package.json');
|
|
20
|
+
expect(aliasPkg.name === 'patina-humanizer', 'alias package name must be patina-humanizer');
|
|
21
|
+
expect(aliasPkg.version === version, 'patina-humanizer version must match package.json');
|
|
22
|
+
expect(aliasPkg.dependencies?.['patina-cli'] === version, 'patina-humanizer must depend on exact patina-cli version');
|
|
23
|
+
expect(aliasPkg.bin?.['patina-humanizer'] === 'bin/patina-humanizer.js', 'patina-humanizer bin must point to bin/patina-humanizer.js');
|
|
24
|
+
expect(existsSync('packages/patina-humanizer/bin/patina-humanizer.js'), 'patina-humanizer bin file must exist');
|
|
25
|
+
|
|
26
|
+
if (checks.length) {
|
|
27
|
+
console.error(checks.map((msg) => `- ${msg}`).join('\n'));
|
|
28
|
+
process.exit(1);
|
|
29
|
+
}
|
|
30
|
+
console.log(`Release metadata OK for ${version}`);
|
|
31
|
+
|
|
32
|
+
function readJson(path) {
|
|
33
|
+
return JSON.parse(readFileSync(path, 'utf8'));
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function readVersionField(path) {
|
|
37
|
+
const text = readFileSync(path, 'utf8');
|
|
38
|
+
const match = text.match(/^version:\s*["']?([^"'\n]+)["']?/m);
|
|
39
|
+
return match?.[1]?.trim();
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function expect(condition, message) {
|
|
43
|
+
if (!condition) checks.push(message);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function escapeRegex(value) {
|
|
47
|
+
return String(value).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
48
|
+
}
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Offline detector-comparison harness for the suspect-zone benchmark.
|
|
3
|
+
//
|
|
4
|
+
// Default mode compares Patina's deterministic in-tree analyzer against the
|
|
5
|
+
// checked-in fixture labels. Pass --input <manual-results.json> to merge scores
|
|
6
|
+
// copied manually from third-party tools. The script never scrapes websites,
|
|
7
|
+
// never sends text to external services, and never reads secrets.
|
|
8
|
+
|
|
9
|
+
import { spawnSync } from 'node:child_process';
|
|
10
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
|
11
|
+
import { dirname, relative, resolve } from 'node:path';
|
|
12
|
+
import { fileURLToPath } from 'node:url';
|
|
13
|
+
|
|
14
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
15
|
+
const REPO_ROOT = resolve(__dirname, '..');
|
|
16
|
+
const RESULTS_PATH = resolve(REPO_ROOT, 'tests/quality/results.json');
|
|
17
|
+
const REPORT_DIR = resolve(REPO_ROOT, 'docs/benchmarks');
|
|
18
|
+
const JSON_PATH = resolve(REPORT_DIR, 'detector-comparison.json');
|
|
19
|
+
const MARKDOWN_PATH = resolve(REPORT_DIR, 'detector-comparison.md');
|
|
20
|
+
|
|
21
|
+
const args = parseArgs(process.argv.slice(2));
|
|
22
|
+
|
|
23
|
+
function parseArgs(argv) {
|
|
24
|
+
const out = { runBenchmark: true, input: null };
|
|
25
|
+
for (let i = 0; i < argv.length; i++) {
|
|
26
|
+
const arg = argv[i];
|
|
27
|
+
if (arg === '--no-run') out.runBenchmark = false;
|
|
28
|
+
else if (arg === '--input') out.input = argv[++i];
|
|
29
|
+
else throw new Error(`Unknown argument: ${arg}`);
|
|
30
|
+
}
|
|
31
|
+
return out;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function runBenchmark() {
|
|
35
|
+
const result = spawnSync(process.execPath, ['tests/quality/benchmark.mjs', '--quiet'], {
|
|
36
|
+
cwd: REPO_ROOT,
|
|
37
|
+
stdio: 'inherit',
|
|
38
|
+
});
|
|
39
|
+
if (result.error) throw result.error;
|
|
40
|
+
if ((result.status ?? 1) !== 0) process.exit(result.status ?? 1);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function readBenchmarkResults() {
|
|
44
|
+
const results = JSON.parse(readFileSync(RESULTS_PATH, 'utf8'));
|
|
45
|
+
if (results?.schemaVersion !== 2 || !Array.isArray(results?.fixtures)) {
|
|
46
|
+
throw new Error(`${relative(REPO_ROOT, RESULTS_PATH)} is not a benchmark schema v2 result`);
|
|
47
|
+
}
|
|
48
|
+
return results;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function normalizeLabel(row) {
|
|
52
|
+
if (typeof row.predicted_hot === 'boolean') return row.predicted_hot;
|
|
53
|
+
if (typeof row.label === 'string') {
|
|
54
|
+
const label = row.label.toLowerCase();
|
|
55
|
+
if (['hot', 'ai', 'ai-like', 'suspect', 'generated'].includes(label)) return true;
|
|
56
|
+
if (['cold', 'human', 'natural', 'not-ai', 'clean'].includes(label)) return false;
|
|
57
|
+
}
|
|
58
|
+
throw new Error(`Manual result for ${row.fixture_id}/${row.detector} needs predicted_hot boolean or label hot|cold`);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function readManualInput(path) {
|
|
62
|
+
if (!path) return null;
|
|
63
|
+
const abs = resolve(REPO_ROOT, path);
|
|
64
|
+
if (!existsSync(abs)) throw new Error(`Manual detector input not found: ${path}`);
|
|
65
|
+
const manual = JSON.parse(readFileSync(abs, 'utf8'));
|
|
66
|
+
if (manual?.schemaVersion !== 1) throw new Error(`${path}: expected schemaVersion=1`);
|
|
67
|
+
if (!Array.isArray(manual.detectors)) throw new Error(`${path}: detectors must be an array`);
|
|
68
|
+
if (!Array.isArray(manual.results)) throw new Error(`${path}: results must be an array`);
|
|
69
|
+
return { path: abs, ...manual };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function emptyMetrics() {
|
|
73
|
+
return { tp: 0, fp: 0, fn: 0, tn: 0, total: 0 };
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function updateMetrics(m, predicted, expected) {
|
|
77
|
+
m.total++;
|
|
78
|
+
if (predicted && expected) m.tp++;
|
|
79
|
+
else if (predicted && !expected) m.fp++;
|
|
80
|
+
else if (!predicted && expected) m.fn++;
|
|
81
|
+
else m.tn++;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function summarize(m, fixtureCount) {
|
|
85
|
+
const precision = m.tp + m.fp ? m.tp / (m.tp + m.fp) : 0;
|
|
86
|
+
const recall = m.tp + m.fn ? m.tp / (m.tp + m.fn) : 0;
|
|
87
|
+
const accuracy = m.total ? (m.tp + m.tn) / m.total : 0;
|
|
88
|
+
const f1 = precision + recall ? (2 * precision * recall) / (precision + recall) : 0;
|
|
89
|
+
return {
|
|
90
|
+
...m,
|
|
91
|
+
fixtureCount,
|
|
92
|
+
coverage: fixtureCount ? round(m.total / fixtureCount) : 0,
|
|
93
|
+
accuracy: round(accuracy),
|
|
94
|
+
precision: round(precision),
|
|
95
|
+
recall: round(recall),
|
|
96
|
+
f1: round(f1),
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function round(n, digits = 3) {
|
|
101
|
+
return Math.round(n * 10 ** digits) / 10 ** digits;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function pct(n) {
|
|
105
|
+
return `${((n ?? 0) * 100).toFixed(1)}%`;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function cell(value) {
|
|
109
|
+
return String(value ?? '—').replace(/\|/g, '\\|').replace(/\s+/g, ' ').trim() || '—';
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function bool(value) {
|
|
113
|
+
return value ? 'hot' : 'cold';
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function builtInDetector(results) {
|
|
117
|
+
return {
|
|
118
|
+
detectors: [
|
|
119
|
+
{
|
|
120
|
+
id: 'patina-deterministic',
|
|
121
|
+
name: 'Patina deterministic suspect-zone analyzer',
|
|
122
|
+
kind: 'in-tree',
|
|
123
|
+
mode: 'offline',
|
|
124
|
+
threshold: 'burstiness low OR MATTR low OR lexicon density > threshold',
|
|
125
|
+
},
|
|
126
|
+
],
|
|
127
|
+
rows: results.fixtures.map((fixture) => ({
|
|
128
|
+
fixture_id: fixture.fixture_id,
|
|
129
|
+
detector: 'patina-deterministic',
|
|
130
|
+
predicted_hot: fixture.predicted_hot,
|
|
131
|
+
score: fixture.predicted_hot ? 1 : 0,
|
|
132
|
+
source: 'tests/quality/benchmark.mjs',
|
|
133
|
+
})),
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function mergeRows(results, manual) {
|
|
138
|
+
const fixtureById = Object.fromEntries(results.fixtures.map((f) => [f.fixture_id, f]));
|
|
139
|
+
const builtIn = builtInDetector(results);
|
|
140
|
+
const detectors = [...builtIn.detectors];
|
|
141
|
+
const rows = [...builtIn.rows];
|
|
142
|
+
if (manual) {
|
|
143
|
+
const detectorIds = new Set(detectors.map((d) => d.id));
|
|
144
|
+
for (const detector of manual.detectors) {
|
|
145
|
+
if (!detector?.id) throw new Error('Manual detector missing id');
|
|
146
|
+
if (!detectorIds.has(detector.id)) {
|
|
147
|
+
detectors.push({ ...detector, kind: detector.kind || 'manual-third-party', mode: 'manual-offline' });
|
|
148
|
+
detectorIds.add(detector.id);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
for (const row of manual.results) {
|
|
152
|
+
if (!fixtureById[row.fixture_id]) throw new Error(`Unknown fixture_id in manual input: ${row.fixture_id}`);
|
|
153
|
+
rows.push({
|
|
154
|
+
...row,
|
|
155
|
+
predicted_hot: normalizeLabel(row),
|
|
156
|
+
source: manual.runId || relative(REPO_ROOT, manual.path),
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
return { detectors, rows };
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function computeSummaries(results, detectors, rows) {
|
|
164
|
+
const fixtureById = Object.fromEntries(results.fixtures.map((f) => [f.fixture_id, f]));
|
|
165
|
+
const byDetector = {};
|
|
166
|
+
for (const detector of detectors) byDetector[detector.id] = emptyMetrics();
|
|
167
|
+
const expandedRows = [];
|
|
168
|
+
for (const row of rows) {
|
|
169
|
+
const fixture = fixtureById[row.fixture_id];
|
|
170
|
+
if (!fixture) continue;
|
|
171
|
+
const expected = fixture.expected_hot;
|
|
172
|
+
const predicted = Boolean(row.predicted_hot);
|
|
173
|
+
byDetector[row.detector] ||= emptyMetrics();
|
|
174
|
+
updateMetrics(byDetector[row.detector], predicted, expected);
|
|
175
|
+
expandedRows.push({
|
|
176
|
+
fixture_id: row.fixture_id,
|
|
177
|
+
lang: fixture.lang,
|
|
178
|
+
class: fixture.class,
|
|
179
|
+
detector: row.detector,
|
|
180
|
+
expected_hot: expected,
|
|
181
|
+
predicted_hot: predicted,
|
|
182
|
+
correct: predicted === expected,
|
|
183
|
+
score: typeof row.score === 'number' ? row.score : null,
|
|
184
|
+
source: row.source || null,
|
|
185
|
+
notes: row.notes || null,
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
return {
|
|
189
|
+
summaries: Object.fromEntries(
|
|
190
|
+
Object.entries(byDetector).map(([id, metrics]) => [id, summarize(metrics, results.fixtureCount)])
|
|
191
|
+
),
|
|
192
|
+
rows: expandedRows,
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
function detectorRows(detectors, summaries) {
|
|
197
|
+
return detectors.map((d) => {
|
|
198
|
+
const s = summaries[d.id] || summarize(emptyMetrics(), 0);
|
|
199
|
+
return `| ${cell(d.id)} | ${cell(d.name)} | ${cell(d.kind)} | ${s.total}/${s.fixtureCount} | ${pct(s.coverage)} | ${pct(s.accuracy)} | ${pct(s.precision)} | ${pct(s.recall)} | ${s.tp} | ${s.fp} | ${s.fn} | ${s.tn} |`;
|
|
200
|
+
});
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
function fixtureRows(rows) {
|
|
204
|
+
return rows.map((r) => `| ${cell(r.fixture_id)} | ${cell(r.lang)} | ${cell(r.class)} | ${cell(r.detector)} | ${bool(r.expected_hot)} | ${bool(r.predicted_hot)} | ${r.correct ? '✓' : '✗'} | ${cell(r.score)} | ${cell(r.source)} |`);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function renderMarkdown(report) {
|
|
208
|
+
return `# Detector Comparison Harness
|
|
209
|
+
|
|
210
|
+
This report is generated offline from the checked-in suspect-zone fixtures. It is a comparison harness, not a vendor ranking claim.
|
|
211
|
+
|
|
212
|
+
## Current run
|
|
213
|
+
|
|
214
|
+
- Generated at: ${report.generatedAt}
|
|
215
|
+
- Fixture source: \`tests/fixtures/suspect-zones/**\`
|
|
216
|
+
- Fixture count: ${report.fixtureCount}
|
|
217
|
+
- Manual third-party input: ${report.manualInput ? `\`${report.manualInput}\`` : 'none'}
|
|
218
|
+
- Reproduce built-in comparison: \`npm run benchmark:compare\`
|
|
219
|
+
- Merge manual scores: \`node scripts/detector-comparison.mjs --input tests/quality/detectors.manual.example.json\`
|
|
220
|
+
|
|
221
|
+
## Summary
|
|
222
|
+
|
|
223
|
+
| detector | name | kind | covered | coverage | accuracy | precision | recall | TP | FP | FN | TN |
|
|
224
|
+
|---|---|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|
|
|
225
|
+
${detectorRows(report.detectors, report.summaries).join('\n')}
|
|
226
|
+
|
|
227
|
+
## Fixture-level rows
|
|
228
|
+
|
|
229
|
+
| fixture | lang | class | detector | expected | predicted | ok | score | source |
|
|
230
|
+
|---|---|---|---|---|---|---:|---:|---|
|
|
231
|
+
${fixtureRows(report.rows).join('\n')}
|
|
232
|
+
|
|
233
|
+
## Manual third-party protocol
|
|
234
|
+
|
|
235
|
+
1. Use only redistributable fixture text from \`tests/fixtures/suspect-zones/**\`.
|
|
236
|
+
2. Paste text into a third-party detector manually, respecting that service's terms.
|
|
237
|
+
3. Record only fixture id, detector id, date/version, score, and hot/cold label. Do not check private text into the repo.
|
|
238
|
+
4. Run this script with \`--input <json>\`. The script does not scrape sites or call external APIs.
|
|
239
|
+
5. Treat results as time-stamped evidence, not a universal claim about authorship detection.
|
|
240
|
+
`;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
function main() {
|
|
244
|
+
if (args.runBenchmark) runBenchmark();
|
|
245
|
+
const results = readBenchmarkResults();
|
|
246
|
+
const manual = readManualInput(args.input);
|
|
247
|
+
const { detectors, rows } = mergeRows(results, manual);
|
|
248
|
+
const computed = computeSummaries(results, detectors, rows);
|
|
249
|
+
const report = {
|
|
250
|
+
reportVersion: 1,
|
|
251
|
+
generatedAt: new Date().toISOString(),
|
|
252
|
+
fixtureCount: results.fixtureCount,
|
|
253
|
+
benchmarkGeneratedAt: results.generatedAt,
|
|
254
|
+
note: 'Offline comparison harness. Built-in Patina row uses deterministic suspect-zone analyzer; third-party rows are manual opt-in only.',
|
|
255
|
+
manualInput: manual ? relative(REPO_ROOT, manual.path) : null,
|
|
256
|
+
detectors,
|
|
257
|
+
summaries: computed.summaries,
|
|
258
|
+
rows: computed.rows,
|
|
259
|
+
};
|
|
260
|
+
mkdirSync(REPORT_DIR, { recursive: true });
|
|
261
|
+
writeFileSync(JSON_PATH, `${JSON.stringify(report, null, 2)}\n`);
|
|
262
|
+
writeFileSync(MARKDOWN_PATH, renderMarkdown(report));
|
|
263
|
+
console.log(`Wrote ${relative(REPO_ROOT, MARKDOWN_PATH)}`);
|
|
264
|
+
console.log(`Wrote ${relative(REPO_ROOT, JSON_PATH)}`);
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
main();
|