patina-cli 3.11.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.patina.default.yaml +29 -29
- package/CHANGELOG.md +53 -0
- package/NOTICE +21 -0
- package/README.md +117 -224
- package/README_JA.md +134 -77
- package/README_KR.md +132 -74
- package/README_ZH.md +137 -80
- package/SKILL.md +11 -20
- package/artifacts/rebaseline-2025/README.md +147 -0
- package/artifacts/rebaseline-2025/human-controls.public.jsonl +250 -0
- package/artifacts/rebaseline-2025/intake.example.jsonl +2 -0
- package/artifacts/rebaseline-2025/intake.local.example.jsonl +25 -0
- package/artifacts/rebaseline-2025/prompts.template.jsonl +7 -0
- package/artifacts/rebaseline-2025/sources.ko-public.jsonl +39 -0
- package/assets/brand/patina-badge.svg +18 -0
- package/assets/brand/patina-mark.svg +8 -0
- package/assets/demo/README.md +79 -0
- package/core/scoring.md +12 -12
- package/core/standalone-prompt.md +3 -1
- package/core/stylometry.md +93 -22
- package/docs/API.md +1554 -0
- package/docs/AUTHENTICATION.md +50 -26
- package/docs/AUTHENTICATION_KR.md +54 -29
- package/docs/BRANDING.md +9 -8
- package/docs/CLI.md +55 -14
- package/docs/COOKBOOK.md +8 -21
- package/docs/DEMO.md +32 -5
- package/docs/EXIT-CODES.md +2 -3
- package/docs/FALSE-POSITIVES.md +63 -0
- package/docs/FAQ.md +9 -1
- package/docs/FAQ_KR.md +3 -1
- package/docs/FLAG-PARITY.md +33 -47
- package/docs/ISSUE-WAVES.md +57 -0
- package/docs/PATTERNS-EN.md +67 -3
- package/docs/PATTERNS-JA.md +68 -2
- package/docs/PATTERNS-KO.md +70 -7
- package/docs/PATTERNS-ZH.md +67 -3
- package/docs/PATTERNS.md +5 -5
- package/docs/RESEARCH-DOCS-PLATFORM.md +54 -0
- package/docs/ROADMAP.md +46 -66
- package/docs/TRANSLATIONESE-KO.md +51 -0
- package/docs/audits/2026-05-deep-research.md +3 -1
- package/docs/benchmarks/README.md +51 -0
- package/docs/benchmarks/detector-comparison.json +69 -9
- package/docs/benchmarks/detector-comparison.md +10 -5
- package/docs/benchmarks/katfish-ko-latest.json +657 -0
- package/docs/benchmarks/katfish-ko-latest.md +77 -0
- package/docs/benchmarks/latest.json +1183 -108
- package/docs/benchmarks/latest.md +84 -60
- package/docs/benchmarks/lexicon-freshness-en-2026-05-22.json +1121 -0
- package/docs/benchmarks/lexicon-freshness-en-2026-05-22.md +136 -0
- package/docs/benchmarks/rebaseline-latest.json +381 -0
- package/docs/benchmarks/rebaseline-latest.md +121 -0
- package/docs/benchmarks/register-stratified-latest.json +164 -0
- package/docs/benchmarks/register-stratified-latest.md +99 -0
- package/docs/benchmarks/register-stratified.md +43 -0
- package/docs/integrations/github-action.md +44 -11
- package/docs/integrations/playground.md +58 -0
- package/docs/integrations/pre-commit.md +5 -5
- package/docs/integrations/release.md +5 -3
- package/docs/integrations/static-sites.md +83 -0
- package/docs/research/2025-rebaseline-plan.md +71 -2
- package/docs/research/2026-rebaseline.md +102 -0
- package/docs/research/adversarial-mps.md +41 -0
- package/docs/research/ai-human-metrics.md +35 -23
- package/docs/research/human-eval-panel.md +42 -0
- package/docs/research/judge-agreement.md +24 -0
- package/docs/research/ko-2025-corpus-sources.md +135 -0
- package/docs/research/lexicon-freshness-audit.md +64 -0
- package/docs/research/zh-ja-lexicon-calibration.md +60 -0
- package/docs/social/patina-launch-copy.md +173 -100
- package/docs/social/patina-launch-execution.md +94 -0
- package/docs/social/patina-launch-korean-first.md +83 -0
- package/docs/social/signs-of-ai-writing.md +26 -0
- package/docs/social/signs-of-ai-writing_KR.md +26 -0
- package/lexicon/ai-en.md +21 -24
- package/lexicon/ai-ja.md +158 -0
- package/lexicon/ai-ko.md +9 -9
- package/lexicon/ai-zh.md +158 -0
- package/lexicon/provenance/ai-en.json +970 -0
- package/lexicon/provenance/ai-ja.json +542 -0
- package/lexicon/provenance/ai-ko.json +866 -0
- package/lexicon/provenance/ai-zh.json +542 -0
- package/package.json +49 -8
- package/patterns/en-communication.md +5 -0
- package/patterns/en-content.md +5 -0
- package/patterns/en-filler.md +5 -0
- package/patterns/en-language.md +29 -1
- package/patterns/en-structure.md +5 -0
- package/patterns/en-style.md +5 -0
- package/patterns/en-viral-hook.md +42 -2
- package/patterns/ja-communication.md +5 -0
- package/patterns/ja-content.md +5 -0
- package/patterns/ja-filler.md +5 -0
- package/patterns/ja-language.md +33 -1
- package/patterns/ja-structure.md +12 -0
- package/patterns/ja-style.md +5 -0
- package/patterns/ja-viral-hook.md +41 -2
- package/patterns/ko-communication.md +5 -0
- package/patterns/ko-content.md +5 -0
- package/patterns/ko-filler.md +5 -0
- package/patterns/ko-language.md +33 -1
- package/patterns/ko-structure.md +25 -6
- package/patterns/ko-style.md +5 -0
- package/patterns/ko-viral-hook.md +38 -2
- package/patterns/zh-communication.md +5 -0
- package/patterns/zh-content.md +5 -0
- package/patterns/zh-filler.md +5 -0
- package/patterns/zh-language.md +37 -1
- package/patterns/zh-structure.md +12 -0
- package/patterns/zh-style.md +5 -0
- package/patterns/zh-viral-hook.md +38 -2
- package/playground/README.md +55 -0
- package/playground/analytics.js +4 -0
- package/playground/analyzer.js +883 -0
- package/playground/app.js +157 -0
- package/playground/data/lexicons.js +343 -0
- package/playground/index.html +138 -0
- package/playground/styles.css +267 -0
- package/profiles/namuwiki.md +111 -0
- package/scripts/adversarial-mps-report.mjs +201 -0
- package/scripts/badge-json.mjs +79 -0
- package/scripts/benchmark-report.mjs +56 -9
- package/scripts/check-release-metadata.mjs +0 -2
- package/scripts/detector-comparison.mjs +7 -7
- package/scripts/generate-playground-data.mjs +77 -0
- package/scripts/katfish-calibration.mjs +464 -0
- package/scripts/lexicon-freshness.mjs +485 -0
- package/scripts/lint.mjs +1 -1
- package/scripts/precommit-score.mjs +4 -3
- package/scripts/prose-score.mjs +81 -5
- package/scripts/rebaseline-intake.mjs +242 -0
- package/scripts/rebaseline-score.mjs +268 -0
- package/scripts/rebaseline-summary.mjs +773 -0
- package/scripts/rebaseline-web-collect.mjs +410 -0
- package/scripts/update-benchmark-ranges.mjs +1 -0
- package/src/api.js +69 -105
- package/src/auth.js +50 -2
- package/src/backends/claude-cli.js +19 -4
- package/src/backends/codex-cli.js +19 -3
- package/src/backends/contract.js +230 -1
- package/src/backends/gemini-cli.js +18 -5
- package/src/backends/index.js +87 -12
- package/src/backends/kimi-cli.js +161 -0
- package/src/cli.js +577 -567
- package/src/commands/doctor.js +2 -2
- package/src/config.js +29 -0
- package/src/errors.js +53 -1
- package/src/features/discourse-tells.js +68 -0
- package/src/features/index.js +82 -8
- package/src/features/lexicon.js +40 -6
- package/src/features/markup-leakage.js +69 -0
- package/src/features/segment.js +41 -0
- package/src/features/signal-strength.js +81 -0
- package/src/features/stylometry.js +231 -1
- package/src/features/translationese.js +127 -0
- package/src/loader.js +76 -0
- package/src/logger.js +22 -23
- package/src/model-defaults.js +55 -0
- package/src/ouroboros.js +31 -0
- package/src/output.js +102 -90
- package/src/prompt-builder.js +103 -68
- package/src/providers.js +51 -4
- package/src/scoring.js +210 -2
- package/src/security.js +75 -0
- package/tests/fixtures/live-quality/en/public-docs-01.md +26 -0
- package/tests/fixtures/live-quality/ko/public-docs-01.md +26 -0
- package/tests/fixtures/suspect-zones/expected-ranges.json +207 -16
- package/tests/fixtures/suspect-zones/ja/ai/ja-ai-04-lexicon.md +11 -0
- package/tests/fixtures/suspect-zones/ja/natural/ja-nat-04-lexicon-cold.md +11 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-02.md +4 -5
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-07-ko-diagnostic.md +11 -0
- package/tests/fixtures/suspect-zones/zh/ai/zh-ai-04-lexicon.md +11 -0
- package/tests/fixtures/suspect-zones/zh/natural/zh-nat-04-lexicon-cold.md +11 -0
- package/tests/quality/README.md +188 -11
- package/tests/quality/adversarial-mps/fixtures.jsonl +10 -0
- package/tests/quality/benchmark.mjs +39 -1
- package/tests/quality/dogfood.mjs +5 -3
- package/tests/quality/live-fixtures.jsonl +2 -0
- package/tests/quality/live-quality.mjs +596 -0
- package/tests/quality/ranking-metrics.mjs +136 -0
- package/tests/quality/rebaseline-manifest.example.jsonl +5 -0
- package/vercel.json +53 -0
- package/SKILL-MAX.md +0 -455
- package/docs/internal/HARNESS.md +0 -14
- package/docs/internal/README.md +0 -14
- package/docs/internal/WARP.md +0 -23
- package/patina-max/SKILL.md +0 -523
- package/patina-max/composite.py +0 -457
- package/src/cache.js +0 -106
- package/src/commands/init.js +0 -208
- package/src/manifest.js +0 -162
- package/src/max-mode.js +0 -207
|
@@ -0,0 +1,464 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Private KatFish calibration runner for the Korean diagnostic layer.
|
|
3
|
+
//
|
|
4
|
+
// The KatFish repository currently has no detected license metadata, so this
|
|
5
|
+
// script reads raw KatFish JSONL only from a local/private directory and writes
|
|
6
|
+
// aggregate metrics only. Do not commit the downloaded dataset rows.
|
|
7
|
+
|
|
8
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
|
9
|
+
import { dirname, relative, resolve } from 'node:path';
|
|
10
|
+
import { fileURLToPath } from 'node:url';
|
|
11
|
+
|
|
12
|
+
import { analyzeText } from '../src/features/index.js';
|
|
13
|
+
|
|
14
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
15
|
+
const REPO_ROOT = resolve(__dirname, '..');
|
|
16
|
+
|
|
17
|
+
export const DEFAULT_KATFISH_DIR = 'artifacts/rebaseline-2025/private/katfish';
|
|
18
|
+
export const DEFAULT_HUMAN_CONTROLS =
|
|
19
|
+
'artifacts/rebaseline-2025/private/web-human-controls.generated.private.jsonl';
|
|
20
|
+
export const DEFAULT_REPORT_DIR = 'docs/benchmarks';
|
|
21
|
+
export const DEFAULT_REPORT_BASENAME = 'katfish-ko-latest';
|
|
22
|
+
export const KATFISH_FILES = ['essay.jsonl', 'abstract.jsonl', 'poetry.jsonl'];
|
|
23
|
+
|
|
24
|
+
export function parseArgs(argv = process.argv.slice(2)) {
|
|
25
|
+
const args = {
|
|
26
|
+
katfishDir: DEFAULT_KATFISH_DIR,
|
|
27
|
+
humanControls: DEFAULT_HUMAN_CONTROLS,
|
|
28
|
+
reportDir: DEFAULT_REPORT_DIR,
|
|
29
|
+
basename: DEFAULT_REPORT_BASENAME,
|
|
30
|
+
write: false,
|
|
31
|
+
json: false,
|
|
32
|
+
help: false,
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
for (let i = 0; i < argv.length; i++) {
|
|
36
|
+
const arg = argv[i];
|
|
37
|
+
if (arg === '--katfish-dir') args.katfishDir = argv[++i];
|
|
38
|
+
else if (arg === '--human-controls') args.humanControls = argv[++i];
|
|
39
|
+
else if (arg === '--report-dir') args.reportDir = argv[++i];
|
|
40
|
+
else if (arg === '--basename') args.basename = argv[++i];
|
|
41
|
+
else if (arg === '--write') args.write = true;
|
|
42
|
+
else if (arg === '--json') args.json = true;
|
|
43
|
+
else if (arg === '--help' || arg === '-h') args.help = true;
|
|
44
|
+
else throw new Error(`Unknown argument: ${arg}`);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return args;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function loadKatfishRows(katfishDir = DEFAULT_KATFISH_DIR) {
|
|
51
|
+
const base = resolveRepoPath(katfishDir);
|
|
52
|
+
const rows = [];
|
|
53
|
+
const errors = [];
|
|
54
|
+
|
|
55
|
+
for (const fileName of KATFISH_FILES) {
|
|
56
|
+
const genre = fileName.replace(/\.jsonl$/u, '');
|
|
57
|
+
const path = resolve(base, fileName);
|
|
58
|
+
if (!existsSync(path)) {
|
|
59
|
+
errors.push(`missing KatFish file: ${toRepoRelative(path)}`);
|
|
60
|
+
continue;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const lines = readFileSync(path, 'utf8').split(/\r?\n/u);
|
|
64
|
+
for (let index = 0; index < lines.length; index++) {
|
|
65
|
+
const line = lines[index].trim();
|
|
66
|
+
if (!line) continue;
|
|
67
|
+
const lineNumber = index + 1;
|
|
68
|
+
try {
|
|
69
|
+
const raw = JSON.parse(line);
|
|
70
|
+
rows.push(normalizeKatfishRow(raw, { genre, lineNumber, path }));
|
|
71
|
+
} catch (error) {
|
|
72
|
+
errors.push(`${toRepoRelative(path)}:${lineNumber}: ${error.message}`);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return { rows, errors, path: toRepoRelative(base) };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export function loadHumanControlRows(input = DEFAULT_HUMAN_CONTROLS) {
|
|
81
|
+
const path = resolveRepoPath(input);
|
|
82
|
+
const rows = [];
|
|
83
|
+
const errors = [];
|
|
84
|
+
if (!existsSync(path)) {
|
|
85
|
+
return {
|
|
86
|
+
rows,
|
|
87
|
+
errors: [`human-control private input not found: ${toRepoRelative(path)}`],
|
|
88
|
+
path: toRepoRelative(path),
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const lines = readFileSync(path, 'utf8').split(/\r?\n/u);
|
|
93
|
+
for (let index = 0; index < lines.length; index++) {
|
|
94
|
+
const line = lines[index].trim();
|
|
95
|
+
if (!line) continue;
|
|
96
|
+
const lineNumber = index + 1;
|
|
97
|
+
try {
|
|
98
|
+
const raw = JSON.parse(line);
|
|
99
|
+
if (typeof raw.text !== 'string' || raw.text.trim() === '') {
|
|
100
|
+
throw new Error('human-control row requires private text');
|
|
101
|
+
}
|
|
102
|
+
rows.push({
|
|
103
|
+
id: raw.sample_id || `human-control-${lineNumber}`,
|
|
104
|
+
register: raw.register || 'unknown',
|
|
105
|
+
text: raw.text,
|
|
106
|
+
expectedHot: false,
|
|
107
|
+
});
|
|
108
|
+
} catch (error) {
|
|
109
|
+
errors.push(`${toRepoRelative(path)}:${lineNumber}: ${error.message}`);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
return { rows, errors, path: toRepoRelative(path) };
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export function normalizeKatfishRow(raw, { genre, lineNumber, path } = {}) {
|
|
117
|
+
if (!raw || typeof raw !== 'object' || Array.isArray(raw)) {
|
|
118
|
+
throw new Error('KatFish row must be a JSON object');
|
|
119
|
+
}
|
|
120
|
+
if (typeof raw.text !== 'string' || raw.text.trim() === '') {
|
|
121
|
+
throw new Error('KatFish row requires text');
|
|
122
|
+
}
|
|
123
|
+
const label = Number(raw.label);
|
|
124
|
+
if (label !== 0 && label !== 1) {
|
|
125
|
+
throw new Error('KatFish label must be 0 (human) or 1 (generated)');
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return {
|
|
129
|
+
id: `${genre || 'katfish'}:${raw.index ?? lineNumber ?? '?'}`,
|
|
130
|
+
genre: genre || 'unknown',
|
|
131
|
+
model: String(raw.written_by || (label === 0 ? 'human' : 'unknown-model')),
|
|
132
|
+
sourcePath: path ? toRepoRelative(path) : null,
|
|
133
|
+
text: raw.text,
|
|
134
|
+
expectedHot: label === 1,
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
export function evaluateCalibration({ katfishRows = [], humanControlRows = [], repoRoot = REPO_ROOT } = {}) {
|
|
139
|
+
const modes = [
|
|
140
|
+
{ id: 'burstiness_mattr_only', label: 'Burstiness+MATTR only' },
|
|
141
|
+
{ id: 'patina_without_ko_diagnostics', label: 'Patina without KO diagnostics' },
|
|
142
|
+
{ id: 'patina_current', label: 'Patina current' },
|
|
143
|
+
];
|
|
144
|
+
|
|
145
|
+
const katfish = scoreRows(katfishRows, { repoRoot });
|
|
146
|
+
const humanControls = scoreRows(humanControlRows, { repoRoot });
|
|
147
|
+
const katfishMetrics = Object.fromEntries(
|
|
148
|
+
modes.map((mode) => [mode.id, summarizeConfusion(katfish, mode.id)])
|
|
149
|
+
);
|
|
150
|
+
const humanControlMetrics = Object.fromEntries(
|
|
151
|
+
modes.map((mode) => [mode.id, summarizeConfusion(humanControls, mode.id)])
|
|
152
|
+
);
|
|
153
|
+
|
|
154
|
+
return {
|
|
155
|
+
schemaVersion: 1,
|
|
156
|
+
generatedAt: new Date().toISOString(),
|
|
157
|
+
inputs: {
|
|
158
|
+
katfishRows: katfishRows.length,
|
|
159
|
+
humanControlRows: humanControlRows.length,
|
|
160
|
+
},
|
|
161
|
+
modes,
|
|
162
|
+
katfish: {
|
|
163
|
+
counts: countsBy(katfishRows, ['genre', 'model', 'expectedHot']),
|
|
164
|
+
metrics: katfishMetrics,
|
|
165
|
+
byGenre: summarizeGroups(katfish, 'genre', modes),
|
|
166
|
+
byModel: summarizeGroups(katfish, 'model', modes),
|
|
167
|
+
},
|
|
168
|
+
humanControls: {
|
|
169
|
+
counts: countsBy(humanControlRows, ['register']),
|
|
170
|
+
metrics: humanControlMetrics,
|
|
171
|
+
byRegister: summarizeGroups(humanControls, 'register', modes),
|
|
172
|
+
},
|
|
173
|
+
deltas: {
|
|
174
|
+
currentVsBurstinessMattr: delta(katfishMetrics.patina_current, katfishMetrics.burstiness_mattr_only),
|
|
175
|
+
currentVsNoKoDiagnostics: delta(
|
|
176
|
+
katfishMetrics.patina_current,
|
|
177
|
+
katfishMetrics.patina_without_ko_diagnostics
|
|
178
|
+
),
|
|
179
|
+
humanFpCurrentVsNoKoDiagnostics: deltaFp(
|
|
180
|
+
humanControlMetrics.patina_current,
|
|
181
|
+
humanControlMetrics.patina_without_ko_diagnostics
|
|
182
|
+
),
|
|
183
|
+
},
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
export function renderMarkdownReport(summary, context = {}) {
|
|
188
|
+
const lines = [
|
|
189
|
+
'# KatFish Korean Calibration',
|
|
190
|
+
'',
|
|
191
|
+
'| field | value |',
|
|
192
|
+
'|---|---:|',
|
|
193
|
+
`| Generated at | ${summary.generatedAt} |`,
|
|
194
|
+
`| KatFish input | \`${context.katfishPath || DEFAULT_KATFISH_DIR}\` |`,
|
|
195
|
+
`| Human-control input | \`${context.humanControlsPath || DEFAULT_HUMAN_CONTROLS}\` |`,
|
|
196
|
+
`| KatFish rows | ${summary.inputs.katfishRows} |`,
|
|
197
|
+
`| Public-web human-control rows | ${summary.inputs.humanControlRows} |`,
|
|
198
|
+
'| Raw text committed | 0 |',
|
|
199
|
+
'',
|
|
200
|
+
'This report is aggregate-only. KatFish rows and public-web extracts stay in ignored private files because the external dataset and source pages have not been relicensed into this repository.',
|
|
201
|
+
'',
|
|
202
|
+
'## Headline',
|
|
203
|
+
'',
|
|
204
|
+
'| metric | value |',
|
|
205
|
+
'|---|---:|',
|
|
206
|
+
`| KatFish catch rate, Patina without KO diagnostics | ${pct(summary.katfish.metrics.patina_without_ko_diagnostics.recall)} |`,
|
|
207
|
+
`| KatFish catch rate, Patina current | ${pct(summary.katfish.metrics.patina_current.recall)} |`,
|
|
208
|
+
`| Delta | ${pp(summary.deltas.currentVsNoKoDiagnostics.recall)} |`,
|
|
209
|
+
`| Public-web human-control FP delta | ${pp(summary.deltas.humanFpCurrentVsNoKoDiagnostics.fpr)} (${summary.deltas.humanFpCurrentVsNoKoDiagnostics.fp} rows) |`,
|
|
210
|
+
'',
|
|
211
|
+
'## KatFish metrics',
|
|
212
|
+
'',
|
|
213
|
+
metricsTable(summary.katfish.metrics),
|
|
214
|
+
'',
|
|
215
|
+
'## Public-web Korean human controls',
|
|
216
|
+
'',
|
|
217
|
+
metricsTable(summary.humanControls.metrics),
|
|
218
|
+
'',
|
|
219
|
+
'## KatFish by genre',
|
|
220
|
+
'',
|
|
221
|
+
groupTable(summary.katfish.byGenre, 'genre'),
|
|
222
|
+
'',
|
|
223
|
+
'## Public-web controls by register',
|
|
224
|
+
'',
|
|
225
|
+
groupTable(summary.humanControls.byRegister, 'register'),
|
|
226
|
+
'',
|
|
227
|
+
'## Interpretation',
|
|
228
|
+
'',
|
|
229
|
+
'- The KO diagnostics layer is evaluated against `patina_without_ko_diagnostics`, so the delta isolates the spacing/comma/suffix proxy path from existing lexicon behavior.',
|
|
230
|
+
'- The human-control non-regression gate uses the 250-row hash-only public-web Korean control set from #157.',
|
|
231
|
+
'- KatFish human rows are reported in the KatFish table as an OOD caveat; do not turn this binary catch-rate report into an authorship or public AUROC claim.',
|
|
232
|
+
];
|
|
233
|
+
|
|
234
|
+
return lines.join('\n') + '\n';
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
export function writeReport(summary, options = {}) {
|
|
238
|
+
const reportDir = resolveRepoPath(options.reportDir || DEFAULT_REPORT_DIR);
|
|
239
|
+
mkdirSync(reportDir, { recursive: true });
|
|
240
|
+
const basename = options.basename || DEFAULT_REPORT_BASENAME;
|
|
241
|
+
const mdPath = resolve(reportDir, `${basename}.md`);
|
|
242
|
+
const jsonPath = resolve(reportDir, `${basename}.json`);
|
|
243
|
+
writeFileSync(mdPath, renderMarkdownReport(summary, options), 'utf8');
|
|
244
|
+
writeFileSync(jsonPath, JSON.stringify(summary, null, 2) + '\n', 'utf8');
|
|
245
|
+
return { markdown: toRepoRelative(mdPath), json: toRepoRelative(jsonPath) };
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
function scoreRows(rows, { repoRoot }) {
|
|
249
|
+
return rows.map((row) => {
|
|
250
|
+
const current = analyzeText(row.text, { lang: 'ko', repoRoot });
|
|
251
|
+
const noKoDiagnostics = analyzeText(row.text, {
|
|
252
|
+
lang: 'ko',
|
|
253
|
+
repoRoot,
|
|
254
|
+
koDiagnosticsEnabled: false,
|
|
255
|
+
});
|
|
256
|
+
const burstinessMattrOnly = current.paragraphs.some(
|
|
257
|
+
(paragraph) =>
|
|
258
|
+
paragraph.burstiness?.band === 'low' ||
|
|
259
|
+
paragraph.mattr?.band === 'low'
|
|
260
|
+
);
|
|
261
|
+
|
|
262
|
+
return {
|
|
263
|
+
id: row.id,
|
|
264
|
+
genre: row.genre,
|
|
265
|
+
register: row.register,
|
|
266
|
+
model: row.model,
|
|
267
|
+
expectedHot: Boolean(row.expectedHot),
|
|
268
|
+
predictions: {
|
|
269
|
+
burstiness_mattr_only: burstinessMattrOnly,
|
|
270
|
+
patina_without_ko_diagnostics: Boolean(noKoDiagnostics.hot),
|
|
271
|
+
patina_current: Boolean(current.hot),
|
|
272
|
+
},
|
|
273
|
+
};
|
|
274
|
+
});
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
function summarizeGroups(rows, key, modes) {
|
|
278
|
+
const groups = [...new Set(rows.map((row) => row[key] || 'unknown'))].sort();
|
|
279
|
+
return Object.fromEntries(
|
|
280
|
+
groups.map((group) => [
|
|
281
|
+
group,
|
|
282
|
+
Object.fromEntries(
|
|
283
|
+
modes.map((mode) => [
|
|
284
|
+
mode.id,
|
|
285
|
+
summarizeConfusion(rows.filter((row) => (row[key] || 'unknown') === group), mode.id),
|
|
286
|
+
])
|
|
287
|
+
),
|
|
288
|
+
])
|
|
289
|
+
);
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
function summarizeConfusion(rows, mode) {
|
|
293
|
+
const counts = { tp: 0, fp: 0, fn: 0, tn: 0, total: rows.length };
|
|
294
|
+
for (const row of rows) {
|
|
295
|
+
const predicted = Boolean(row.predictions?.[mode]);
|
|
296
|
+
if (predicted && row.expectedHot) counts.tp++;
|
|
297
|
+
else if (predicted && !row.expectedHot) counts.fp++;
|
|
298
|
+
else if (!predicted && row.expectedHot) counts.fn++;
|
|
299
|
+
else counts.tn++;
|
|
300
|
+
}
|
|
301
|
+
const precision = counts.tp + counts.fp ? counts.tp / (counts.tp + counts.fp) : 0;
|
|
302
|
+
const recall = counts.tp + counts.fn ? counts.tp / (counts.tp + counts.fn) : 0;
|
|
303
|
+
const fpr = counts.fp + counts.tn ? counts.fp / (counts.fp + counts.tn) : 0;
|
|
304
|
+
const accuracy = counts.total ? (counts.tp + counts.tn) / counts.total : 0;
|
|
305
|
+
const f1 = precision + recall ? (2 * precision * recall) / (precision + recall) : 0;
|
|
306
|
+
return {
|
|
307
|
+
...counts,
|
|
308
|
+
accuracy: round(accuracy),
|
|
309
|
+
precision: round(precision),
|
|
310
|
+
recall: round(recall),
|
|
311
|
+
f1: round(f1),
|
|
312
|
+
fpr: round(fpr),
|
|
313
|
+
};
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
function delta(current, baseline) {
|
|
317
|
+
return {
|
|
318
|
+
accuracy: round(current.accuracy - baseline.accuracy),
|
|
319
|
+
precision: round(current.precision - baseline.precision),
|
|
320
|
+
recall: round(current.recall - baseline.recall),
|
|
321
|
+
f1: round(current.f1 - baseline.f1),
|
|
322
|
+
fpr: round(current.fpr - baseline.fpr),
|
|
323
|
+
tp: current.tp - baseline.tp,
|
|
324
|
+
fp: current.fp - baseline.fp,
|
|
325
|
+
fn: current.fn - baseline.fn,
|
|
326
|
+
tn: current.tn - baseline.tn,
|
|
327
|
+
};
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
function deltaFp(current, baseline) {
|
|
331
|
+
return {
|
|
332
|
+
fpr: round(current.fpr - baseline.fpr),
|
|
333
|
+
fp: current.fp - baseline.fp,
|
|
334
|
+
};
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
function countsBy(rows, fields) {
|
|
338
|
+
const out = {};
|
|
339
|
+
for (const field of fields) {
|
|
340
|
+
out[field] = {};
|
|
341
|
+
for (const row of rows) {
|
|
342
|
+
const raw = row[field];
|
|
343
|
+
const value = typeof raw === 'boolean' ? String(raw) : raw || 'unknown';
|
|
344
|
+
out[field][value] = (out[field][value] || 0) + 1;
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
return out;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
function metricsTable(metrics) {
|
|
351
|
+
const lines = [
|
|
352
|
+
'| mode | n | accuracy | precision | recall / catch | F1 | FP rate | TP/FP/FN/TN |',
|
|
353
|
+
'|---|---:|---:|---:|---:|---:|---:|---:|',
|
|
354
|
+
];
|
|
355
|
+
for (const [mode, m] of Object.entries(metrics)) {
|
|
356
|
+
lines.push(
|
|
357
|
+
`| ${cell(mode)} | ${m.total} | ${pct(m.accuracy)} | ${pct(m.precision)} | ${pct(m.recall)} | ${m.f1.toFixed(3)} | ${pct(m.fpr)} | ${m.tp}/${m.fp}/${m.fn}/${m.tn} |`
|
|
358
|
+
);
|
|
359
|
+
}
|
|
360
|
+
return lines.join('\n');
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
function groupTable(groups, label) {
|
|
364
|
+
const lines = [
|
|
365
|
+
`| ${label} | mode | n | recall / catch | FP rate | TP/FP/FN/TN |`,
|
|
366
|
+
'|---|---|---:|---:|---:|---:|',
|
|
367
|
+
];
|
|
368
|
+
for (const [group, metrics] of Object.entries(groups)) {
|
|
369
|
+
for (const [mode, m] of Object.entries(metrics)) {
|
|
370
|
+
lines.push(
|
|
371
|
+
`| ${cell(group)} | ${cell(mode)} | ${m.total} | ${pct(m.recall)} | ${pct(m.fpr)} | ${m.tp}/${m.fp}/${m.fn}/${m.tn} |`
|
|
372
|
+
);
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
return lines.join('\n');
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
function pct(value) {
|
|
379
|
+
return `${((value || 0) * 100).toFixed(1)}%`;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
function pp(value) {
|
|
383
|
+
const n = (value || 0) * 100;
|
|
384
|
+
return `${n >= 0 ? '+' : ''}${n.toFixed(1)} pp`;
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
function cell(value) {
|
|
388
|
+
return String(value ?? '—').replace(/\|/gu, '\\|').replace(/\s+/gu, ' ').trim() || '—';
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
function round(value, digits = 3) {
|
|
392
|
+
return Math.round((value || 0) * 10 ** digits) / 10 ** digits;
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
function resolveRepoPath(path) {
|
|
396
|
+
return resolve(REPO_ROOT, path || '.');
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
function toRepoRelative(path) {
|
|
400
|
+
return relative(REPO_ROOT, path).replace(/\\/gu, '/') || '.';
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
function printHelp() {
|
|
404
|
+
console.log(`Usage: node scripts/katfish-calibration.mjs [options]
|
|
405
|
+
|
|
406
|
+
Options:
|
|
407
|
+
--katfish-dir <path> Private directory containing essay/abstract/poetry JSONL
|
|
408
|
+
--human-controls <path> Private public-web human-control JSONL with text
|
|
409
|
+
--write Write docs/benchmarks/<basename>.{md,json}
|
|
410
|
+
--basename <name> Report basename (default: ${DEFAULT_REPORT_BASENAME})
|
|
411
|
+
--report-dir <path> Report directory (default: ${DEFAULT_REPORT_DIR})
|
|
412
|
+
--json Print JSON summary instead of Markdown
|
|
413
|
+
-h, --help Show this help
|
|
414
|
+
`);
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
async function main() {
|
|
418
|
+
const args = parseArgs();
|
|
419
|
+
if (args.help) {
|
|
420
|
+
printHelp();
|
|
421
|
+
return;
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
const katfish = loadKatfishRows(args.katfishDir);
|
|
425
|
+
const humanControls = loadHumanControlRows(args.humanControls);
|
|
426
|
+
const errors = [...katfish.errors, ...humanControls.errors];
|
|
427
|
+
if (errors.length) {
|
|
428
|
+
console.error(errors.map((error) => `- ${error}`).join('\n'));
|
|
429
|
+
process.exitCode = 1;
|
|
430
|
+
return;
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
const summary = evaluateCalibration({
|
|
434
|
+
katfishRows: katfish.rows,
|
|
435
|
+
humanControlRows: humanControls.rows,
|
|
436
|
+
});
|
|
437
|
+
|
|
438
|
+
let written = null;
|
|
439
|
+
if (args.write) {
|
|
440
|
+
written = writeReport(summary, {
|
|
441
|
+
reportDir: args.reportDir,
|
|
442
|
+
basename: args.basename,
|
|
443
|
+
katfishPath: katfish.path,
|
|
444
|
+
humanControlsPath: humanControls.path,
|
|
445
|
+
});
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
if (args.json) {
|
|
449
|
+
console.log(JSON.stringify({ ...summary, written }, null, 2));
|
|
450
|
+
} else {
|
|
451
|
+
console.log(renderMarkdownReport(summary, {
|
|
452
|
+
katfishPath: katfish.path,
|
|
453
|
+
humanControlsPath: humanControls.path,
|
|
454
|
+
}));
|
|
455
|
+
if (written) {
|
|
456
|
+
console.log(`Wrote ${written.markdown}`);
|
|
457
|
+
console.log(`Wrote ${written.json}`);
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
463
|
+
main();
|
|
464
|
+
}
|