patina-cli 3.11.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. package/.patina.default.yaml +29 -29
  2. package/CHANGELOG.md +53 -0
  3. package/NOTICE +21 -0
  4. package/README.md +117 -224
  5. package/README_JA.md +134 -77
  6. package/README_KR.md +132 -74
  7. package/README_ZH.md +137 -80
  8. package/SKILL.md +11 -20
  9. package/artifacts/rebaseline-2025/README.md +147 -0
  10. package/artifacts/rebaseline-2025/human-controls.public.jsonl +250 -0
  11. package/artifacts/rebaseline-2025/intake.example.jsonl +2 -0
  12. package/artifacts/rebaseline-2025/intake.local.example.jsonl +25 -0
  13. package/artifacts/rebaseline-2025/prompts.template.jsonl +7 -0
  14. package/artifacts/rebaseline-2025/sources.ko-public.jsonl +39 -0
  15. package/assets/brand/patina-badge.svg +18 -0
  16. package/assets/brand/patina-mark.svg +8 -0
  17. package/assets/demo/README.md +79 -0
  18. package/core/scoring.md +12 -12
  19. package/core/standalone-prompt.md +3 -1
  20. package/core/stylometry.md +93 -22
  21. package/docs/API.md +1554 -0
  22. package/docs/AUTHENTICATION.md +50 -26
  23. package/docs/AUTHENTICATION_KR.md +54 -29
  24. package/docs/BRANDING.md +9 -8
  25. package/docs/CLI.md +55 -14
  26. package/docs/COOKBOOK.md +8 -21
  27. package/docs/DEMO.md +32 -5
  28. package/docs/EXIT-CODES.md +2 -3
  29. package/docs/FALSE-POSITIVES.md +63 -0
  30. package/docs/FAQ.md +9 -1
  31. package/docs/FAQ_KR.md +3 -1
  32. package/docs/FLAG-PARITY.md +33 -47
  33. package/docs/ISSUE-WAVES.md +57 -0
  34. package/docs/PATTERNS-EN.md +67 -3
  35. package/docs/PATTERNS-JA.md +68 -2
  36. package/docs/PATTERNS-KO.md +70 -7
  37. package/docs/PATTERNS-ZH.md +67 -3
  38. package/docs/PATTERNS.md +5 -5
  39. package/docs/RESEARCH-DOCS-PLATFORM.md +54 -0
  40. package/docs/ROADMAP.md +46 -66
  41. package/docs/TRANSLATIONESE-KO.md +51 -0
  42. package/docs/audits/2026-05-deep-research.md +3 -1
  43. package/docs/benchmarks/README.md +51 -0
  44. package/docs/benchmarks/detector-comparison.json +69 -9
  45. package/docs/benchmarks/detector-comparison.md +10 -5
  46. package/docs/benchmarks/katfish-ko-latest.json +657 -0
  47. package/docs/benchmarks/katfish-ko-latest.md +77 -0
  48. package/docs/benchmarks/latest.json +1183 -108
  49. package/docs/benchmarks/latest.md +84 -60
  50. package/docs/benchmarks/lexicon-freshness-en-2026-05-22.json +1121 -0
  51. package/docs/benchmarks/lexicon-freshness-en-2026-05-22.md +136 -0
  52. package/docs/benchmarks/rebaseline-latest.json +381 -0
  53. package/docs/benchmarks/rebaseline-latest.md +121 -0
  54. package/docs/benchmarks/register-stratified-latest.json +164 -0
  55. package/docs/benchmarks/register-stratified-latest.md +99 -0
  56. package/docs/benchmarks/register-stratified.md +43 -0
  57. package/docs/integrations/github-action.md +44 -11
  58. package/docs/integrations/playground.md +58 -0
  59. package/docs/integrations/pre-commit.md +5 -5
  60. package/docs/integrations/release.md +5 -3
  61. package/docs/integrations/static-sites.md +83 -0
  62. package/docs/research/2025-rebaseline-plan.md +71 -2
  63. package/docs/research/2026-rebaseline.md +102 -0
  64. package/docs/research/adversarial-mps.md +41 -0
  65. package/docs/research/ai-human-metrics.md +35 -23
  66. package/docs/research/human-eval-panel.md +42 -0
  67. package/docs/research/judge-agreement.md +24 -0
  68. package/docs/research/ko-2025-corpus-sources.md +135 -0
  69. package/docs/research/lexicon-freshness-audit.md +64 -0
  70. package/docs/research/zh-ja-lexicon-calibration.md +60 -0
  71. package/docs/social/patina-launch-copy.md +173 -100
  72. package/docs/social/patina-launch-execution.md +94 -0
  73. package/docs/social/patina-launch-korean-first.md +83 -0
  74. package/docs/social/signs-of-ai-writing.md +26 -0
  75. package/docs/social/signs-of-ai-writing_KR.md +26 -0
  76. package/lexicon/ai-en.md +21 -24
  77. package/lexicon/ai-ja.md +158 -0
  78. package/lexicon/ai-ko.md +9 -9
  79. package/lexicon/ai-zh.md +158 -0
  80. package/lexicon/provenance/ai-en.json +970 -0
  81. package/lexicon/provenance/ai-ja.json +542 -0
  82. package/lexicon/provenance/ai-ko.json +866 -0
  83. package/lexicon/provenance/ai-zh.json +542 -0
  84. package/package.json +49 -8
  85. package/patterns/en-communication.md +5 -0
  86. package/patterns/en-content.md +5 -0
  87. package/patterns/en-filler.md +5 -0
  88. package/patterns/en-language.md +29 -1
  89. package/patterns/en-structure.md +5 -0
  90. package/patterns/en-style.md +5 -0
  91. package/patterns/en-viral-hook.md +42 -2
  92. package/patterns/ja-communication.md +5 -0
  93. package/patterns/ja-content.md +5 -0
  94. package/patterns/ja-filler.md +5 -0
  95. package/patterns/ja-language.md +33 -1
  96. package/patterns/ja-structure.md +12 -0
  97. package/patterns/ja-style.md +5 -0
  98. package/patterns/ja-viral-hook.md +41 -2
  99. package/patterns/ko-communication.md +5 -0
  100. package/patterns/ko-content.md +5 -0
  101. package/patterns/ko-filler.md +5 -0
  102. package/patterns/ko-language.md +33 -1
  103. package/patterns/ko-structure.md +25 -6
  104. package/patterns/ko-style.md +5 -0
  105. package/patterns/ko-viral-hook.md +38 -2
  106. package/patterns/zh-communication.md +5 -0
  107. package/patterns/zh-content.md +5 -0
  108. package/patterns/zh-filler.md +5 -0
  109. package/patterns/zh-language.md +37 -1
  110. package/patterns/zh-structure.md +12 -0
  111. package/patterns/zh-style.md +5 -0
  112. package/patterns/zh-viral-hook.md +38 -2
  113. package/playground/README.md +55 -0
  114. package/playground/analytics.js +4 -0
  115. package/playground/analyzer.js +883 -0
  116. package/playground/app.js +157 -0
  117. package/playground/data/lexicons.js +343 -0
  118. package/playground/index.html +138 -0
  119. package/playground/styles.css +267 -0
  120. package/profiles/namuwiki.md +111 -0
  121. package/scripts/adversarial-mps-report.mjs +201 -0
  122. package/scripts/badge-json.mjs +79 -0
  123. package/scripts/benchmark-report.mjs +56 -9
  124. package/scripts/check-release-metadata.mjs +0 -2
  125. package/scripts/detector-comparison.mjs +7 -7
  126. package/scripts/generate-playground-data.mjs +77 -0
  127. package/scripts/katfish-calibration.mjs +464 -0
  128. package/scripts/lexicon-freshness.mjs +485 -0
  129. package/scripts/lint.mjs +1 -1
  130. package/scripts/precommit-score.mjs +4 -3
  131. package/scripts/prose-score.mjs +81 -5
  132. package/scripts/rebaseline-intake.mjs +242 -0
  133. package/scripts/rebaseline-score.mjs +268 -0
  134. package/scripts/rebaseline-summary.mjs +773 -0
  135. package/scripts/rebaseline-web-collect.mjs +410 -0
  136. package/scripts/update-benchmark-ranges.mjs +1 -0
  137. package/src/api.js +69 -105
  138. package/src/auth.js +50 -2
  139. package/src/backends/claude-cli.js +19 -4
  140. package/src/backends/codex-cli.js +19 -3
  141. package/src/backends/contract.js +230 -1
  142. package/src/backends/gemini-cli.js +18 -5
  143. package/src/backends/index.js +87 -12
  144. package/src/backends/kimi-cli.js +161 -0
  145. package/src/cli.js +577 -567
  146. package/src/commands/doctor.js +2 -2
  147. package/src/config.js +29 -0
  148. package/src/errors.js +53 -1
  149. package/src/features/discourse-tells.js +68 -0
  150. package/src/features/index.js +82 -8
  151. package/src/features/lexicon.js +40 -6
  152. package/src/features/markup-leakage.js +69 -0
  153. package/src/features/segment.js +41 -0
  154. package/src/features/signal-strength.js +81 -0
  155. package/src/features/stylometry.js +231 -1
  156. package/src/features/translationese.js +127 -0
  157. package/src/loader.js +76 -0
  158. package/src/logger.js +22 -23
  159. package/src/model-defaults.js +55 -0
  160. package/src/ouroboros.js +31 -0
  161. package/src/output.js +102 -90
  162. package/src/prompt-builder.js +103 -68
  163. package/src/providers.js +51 -4
  164. package/src/scoring.js +210 -2
  165. package/src/security.js +75 -0
  166. package/tests/fixtures/live-quality/en/public-docs-01.md +26 -0
  167. package/tests/fixtures/live-quality/ko/public-docs-01.md +26 -0
  168. package/tests/fixtures/suspect-zones/expected-ranges.json +207 -16
  169. package/tests/fixtures/suspect-zones/ja/ai/ja-ai-04-lexicon.md +11 -0
  170. package/tests/fixtures/suspect-zones/ja/natural/ja-nat-04-lexicon-cold.md +11 -0
  171. package/tests/fixtures/suspect-zones/ko/ai/ko-ai-02.md +4 -5
  172. package/tests/fixtures/suspect-zones/ko/ai/ko-ai-07-ko-diagnostic.md +11 -0
  173. package/tests/fixtures/suspect-zones/zh/ai/zh-ai-04-lexicon.md +11 -0
  174. package/tests/fixtures/suspect-zones/zh/natural/zh-nat-04-lexicon-cold.md +11 -0
  175. package/tests/quality/README.md +188 -11
  176. package/tests/quality/adversarial-mps/fixtures.jsonl +10 -0
  177. package/tests/quality/benchmark.mjs +39 -1
  178. package/tests/quality/dogfood.mjs +5 -3
  179. package/tests/quality/live-fixtures.jsonl +2 -0
  180. package/tests/quality/live-quality.mjs +596 -0
  181. package/tests/quality/ranking-metrics.mjs +136 -0
  182. package/tests/quality/rebaseline-manifest.example.jsonl +5 -0
  183. package/vercel.json +53 -0
  184. package/SKILL-MAX.md +0 -455
  185. package/docs/internal/HARNESS.md +0 -14
  186. package/docs/internal/README.md +0 -14
  187. package/docs/internal/WARP.md +0 -23
  188. package/patina-max/SKILL.md +0 -523
  189. package/patina-max/composite.py +0 -457
  190. package/src/cache.js +0 -106
  191. package/src/commands/init.js +0 -208
  192. package/src/manifest.js +0 -162
  193. package/src/max-mode.js +0 -207
@@ -0,0 +1,485 @@
1
+ #!/usr/bin/env node
2
+ // Validate lexicon per-entry provenance and produce public-safe lift reports
3
+ // from local/private JSONL corpora. Raw text is never written to reports.
4
+
5
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
6
+ import { dirname, relative, resolve, basename } from 'node:path';
7
+ import { fileURLToPath } from 'node:url';
8
+
9
+ import yaml from 'js-yaml';
10
+
11
+ import { computeDensity } from '../src/features/lexicon.js';
12
+ import { tokenize } from '../src/features/segment.js';
13
+
14
+ const __dirname = dirname(fileURLToPath(import.meta.url));
15
+ const REPO_ROOT = resolve(__dirname, '..');
16
+ const DEFAULT_LEXICON_DIR = 'lexicon';
17
+ const REQUIRED_PROVENANCE_FIELDS = ['entry', 'kind', 'added', 'source', 'last_validated', 'lift', 'status'];
18
+ const HOT_CLASSES = new Set(['ai', 'ai-like', 'synthetic-ai', 'generated', 'lightly-edited-ai', 'heavily-edited-ai']);
19
+ const COLD_CLASSES = new Set(['human', 'natural', 'natural-human', 'human-reference']);
20
+
21
+ export function parseArgs(argv = process.argv.slice(2)) {
22
+ const args = {
23
+ check: false,
24
+ input: null,
25
+ lang: 'en',
26
+ sourceId: null,
27
+ sourceNote: null,
28
+ validatedAt: new Date().toISOString().slice(0, 10),
29
+ outputJson: null,
30
+ outputMd: null,
31
+ sourceUrls: [],
32
+ json: false,
33
+ help: false,
34
+ };
35
+
36
+ for (let i = 0; i < argv.length; i++) {
37
+ const arg = argv[i];
38
+ if (arg === '--check') args.check = true;
39
+ else if (arg === '--input') args.input = argv[++i];
40
+ else if (arg === '--lang') args.lang = argv[++i];
41
+ else if (arg === '--source-id') args.sourceId = argv[++i];
42
+ else if (arg === '--source-note') args.sourceNote = argv[++i];
43
+ else if (arg === '--validated-at') args.validatedAt = argv[++i];
44
+ else if (arg === '--output-json') args.outputJson = argv[++i];
45
+ else if (arg === '--output-md') args.outputMd = argv[++i];
46
+ else if (arg === '--source-url') args.sourceUrls.push(argv[++i]);
47
+ else if (arg === '--json') args.json = true;
48
+ else if (arg === '--help' || arg === '-h') args.help = true;
49
+ else throw new Error(`Unknown argument: ${arg}`);
50
+ }
51
+
52
+ if (!args.check && !args.input) args.check = true;
53
+ return args;
54
+ }
55
+
56
+ export function parseFrontmatterFile(path) {
57
+ const raw = readFileSync(path, 'utf8');
58
+ const match = raw.match(/^---\n([\s\S]*?)\n---\n?/u);
59
+ if (!match) return { meta: {}, body: raw, raw };
60
+ return {
61
+ meta: yaml.load(match[1]) || {},
62
+ body: raw.slice(match[0].length),
63
+ raw,
64
+ };
65
+ }
66
+
67
+ export function parseLexiconEntries(body) {
68
+ const strict = [];
69
+ const phrases = [];
70
+ let mode = null;
71
+ for (const rawLine of body.split('\n')) {
72
+ const line = rawLine.trim();
73
+ if (line.startsWith('## ')) {
74
+ const heading = line.toLowerCase();
75
+ if (heading.includes('strict matches')) mode = 'strict';
76
+ else if (heading.includes('multi-word phrases')) mode = 'phrase';
77
+ else mode = null;
78
+ continue;
79
+ }
80
+ if (mode && line.startsWith('- ')) {
81
+ const entry = line.slice(2).trim().normalize('NFC');
82
+ if (entry) (mode === 'strict' ? strict : phrases).push(entry);
83
+ }
84
+ }
85
+ return { strict, phrases, all: [...strict.map((entry) => ({ kind: 'strict', entry })), ...phrases.map((entry) => ({ kind: 'phrase', entry }))] };
86
+ }
87
+
88
+ export function checkLexiconProvenance(options = {}) {
89
+ const repoRoot = options.repoRoot || REPO_ROOT;
90
+ const lexiconDir = resolve(repoRoot, options.lexiconDir || DEFAULT_LEXICON_DIR);
91
+ const langs = options.langs || ['en', 'ko', 'zh', 'ja'];
92
+ const errors = [];
93
+ const warnings = [];
94
+ const files = [];
95
+
96
+ for (const lang of langs) {
97
+ const lexiconPath = resolve(lexiconDir, `ai-${lang}.md`);
98
+ if (!existsSync(lexiconPath)) {
99
+ errors.push(`missing lexicon: ${toRepoRelative(lexiconPath, repoRoot)}`);
100
+ continue;
101
+ }
102
+
103
+ const parsed = parseFrontmatterFile(lexiconPath);
104
+ const entries = parseLexiconEntries(parsed.body).all;
105
+ const fileLabel = toRepoRelative(lexiconPath, repoRoot);
106
+ const provenanceRef = parsed.meta['entry-provenance'];
107
+ if (typeof provenanceRef !== 'string' || provenanceRef.length === 0) {
108
+ errors.push(`${fileLabel}: entry-provenance frontmatter is required`);
109
+ continue;
110
+ }
111
+
112
+ if (typeof parsed.meta.entries === 'number' && parsed.meta.entries !== entries.length) {
113
+ errors.push(`${fileLabel}: frontmatter entries=${parsed.meta.entries} but parsed ${entries.length}`);
114
+ }
115
+
116
+ const provenancePath = resolve(repoRoot, provenanceRef);
117
+ if (!existsSync(provenancePath)) {
118
+ errors.push(`${fileLabel}: provenance file not found: ${provenanceRef}`);
119
+ continue;
120
+ }
121
+
122
+ let provenance;
123
+ try {
124
+ provenance = JSON.parse(readFileSync(provenancePath, 'utf8'));
125
+ } catch (error) {
126
+ errors.push(`${provenanceRef}: invalid JSON (${error.message})`);
127
+ continue;
128
+ }
129
+ if (!Array.isArray(provenance)) {
130
+ errors.push(`${provenanceRef}: provenance must be a JSON array`);
131
+ continue;
132
+ }
133
+
134
+ const entryKeys = new Set(entries.map(entryKey));
135
+ const seen = new Set();
136
+ for (const item of provenance) {
137
+ if (!item || typeof item !== 'object' || Array.isArray(item)) {
138
+ errors.push(`${provenanceRef}: each provenance row must be an object`);
139
+ continue;
140
+ }
141
+ for (const field of REQUIRED_PROVENANCE_FIELDS) {
142
+ if (!Object.prototype.hasOwnProperty.call(item, field)) {
143
+ errors.push(`${provenanceRef}: ${entryLabel(item)} missing field ${field}`);
144
+ }
145
+ }
146
+ if (!['strict', 'phrase'].includes(item.kind)) {
147
+ errors.push(`${provenanceRef}: ${entryLabel(item)} kind must be strict or phrase`);
148
+ }
149
+ if (typeof item.entry !== 'string' || item.entry.length === 0) {
150
+ errors.push(`${provenanceRef}: entry must be a non-empty string`);
151
+ }
152
+ if (typeof item.source !== 'string' || item.source.length === 0) {
153
+ errors.push(`${provenanceRef}: ${entryLabel(item)} source must be a non-empty string`);
154
+ }
155
+ if (typeof item.status !== 'string' || item.status.length === 0) {
156
+ errors.push(`${provenanceRef}: ${entryLabel(item)} status must be a non-empty string`);
157
+ }
158
+ if (item.last_validated !== null && typeof item.last_validated !== 'string') {
159
+ errors.push(`${provenanceRef}: ${entryLabel(item)} last_validated must be string or null`);
160
+ }
161
+ if (item.added !== null && typeof item.added !== 'string') {
162
+ errors.push(`${provenanceRef}: ${entryLabel(item)} added must be string or null`);
163
+ }
164
+
165
+ const key = entryKey(item);
166
+ if (seen.has(key)) errors.push(`${provenanceRef}: duplicate provenance for ${key}`);
167
+ seen.add(key);
168
+ if (!entryKeys.has(key)) errors.push(`${provenanceRef}: orphan provenance for ${key}`);
169
+ }
170
+
171
+ for (const entry of entries) {
172
+ const key = entryKey(entry);
173
+ if (!seen.has(key)) errors.push(`${provenanceRef}: missing provenance for ${key}`);
174
+ }
175
+
176
+ files.push({ file: fileLabel, provenance: provenanceRef, entries: entries.length, provenanceRows: provenance.length });
177
+ }
178
+
179
+ return { ok: errors.length === 0, files, errors, warnings };
180
+ }
181
+
182
+ export function loadJsonlRows(inputPath, options = {}) {
183
+ const repoRoot = options.repoRoot || REPO_ROOT;
184
+ const abs = resolvePath(inputPath, repoRoot);
185
+ const rows = [];
186
+ const errors = [];
187
+ if (!existsSync(abs)) {
188
+ return { input: toRepoRelative(abs, repoRoot), rows, errors: [`input not found: ${toRepoRelative(abs, repoRoot)}`] };
189
+ }
190
+
191
+ const lines = readFileSync(abs, 'utf8').split(/\r?\n/u);
192
+ for (let index = 0; index < lines.length; index++) {
193
+ const line = lines[index].trim();
194
+ if (!line) continue;
195
+ try {
196
+ rows.push({ lineNumber: index + 1, value: JSON.parse(line) });
197
+ } catch (error) {
198
+ errors.push(`line ${index + 1}: invalid JSON (${error.message})`);
199
+ }
200
+ }
201
+ return { input: toRepoRelative(abs, repoRoot), rows, errors };
202
+ }
203
+
204
+ export function mineLexiconLift(rows, entries, options = {}) {
205
+ const lang = options.lang || 'en';
206
+ const hotDocs = [];
207
+ const coldDocs = [];
208
+ const errors = [];
209
+ const warnings = [];
210
+
211
+ for (const row of rows) {
212
+ const raw = row.value || row;
213
+ const label = row.lineNumber ? `line ${row.lineNumber}` : raw.sample_id || 'row';
214
+ if (!raw || typeof raw !== 'object' || Array.isArray(raw)) {
215
+ errors.push(`${label}: record must be an object`);
216
+ continue;
217
+ }
218
+ if ((raw.language || lang) !== lang) continue;
219
+ if (typeof raw.text !== 'string' || raw.text.length === 0) {
220
+ warnings.push(`${label}: skipped row without local text`);
221
+ continue;
222
+ }
223
+ if (HOT_CLASSES.has(raw.class)) hotDocs.push(raw);
224
+ else if (COLD_CLASSES.has(raw.class)) coldDocs.push(raw);
225
+ else warnings.push(`${label}: skipped unknown class ${raw.class}`);
226
+ }
227
+
228
+ const hotCounts = countEntryDocumentFrequency(hotDocs, entries, lang);
229
+ const coldCounts = countEntryDocumentFrequency(coldDocs, entries, lang);
230
+ const decisions = entries.map((entry) => {
231
+ const key = entryKey(entry);
232
+ const hot = hotCounts.counts.get(key) || 0;
233
+ const cold = coldCounts.counts.get(key) || 0;
234
+ const hotRate = hotDocs.length ? hot / hotDocs.length : 0;
235
+ const coldRate = coldDocs.length ? cold / coldDocs.length : 0;
236
+ const lift = cold === 0 ? (hot === 0 ? 0 : 'Infinity') : round(hotRate / coldRate, 3);
237
+ const numericLift = lift === 'Infinity' ? Infinity : lift;
238
+ const keep = hot > 0 && numericLift >= 4 && coldRate <= 0.05;
239
+ return {
240
+ kind: entry.kind,
241
+ entry: entry.entry,
242
+ hot_docs: hot,
243
+ cold_docs: cold,
244
+ hot_rate: round(hotRate, 5),
245
+ cold_rate: round(coldRate, 5),
246
+ lift,
247
+ decision: keep ? 'keep' : 'drop',
248
+ };
249
+ });
250
+
251
+ decisions.sort((a, b) => a.decision.localeCompare(b.decision) || a.kind.localeCompare(b.kind) || a.entry.localeCompare(b.entry));
252
+
253
+ const gate = {
254
+ hot_docs: hotDocs.length,
255
+ cold_docs: coldDocs.length,
256
+ hot_registers: hotCounts.registers,
257
+ cold_registers: coldCounts.registers,
258
+ min_docs_per_class: 25,
259
+ min_registers_per_class: 2,
260
+ };
261
+ gate.ready = gate.hot_docs >= gate.min_docs_per_class && gate.cold_docs >= gate.min_docs_per_class
262
+ && Object.keys(gate.hot_registers).length >= gate.min_registers_per_class
263
+ && Object.keys(gate.cold_registers).length >= gate.min_registers_per_class;
264
+
265
+ return {
266
+ language: lang,
267
+ source_id: options.sourceId || null,
268
+ source_note: options.sourceNote || null,
269
+ source_urls: options.sourceUrls || [],
270
+ validated_at: options.validatedAt || null,
271
+ input: options.input || null,
272
+ entries: entries.length,
273
+ kept: decisions.filter((row) => row.decision === 'keep').length,
274
+ dropped: decisions.filter((row) => row.decision === 'drop').length,
275
+ gate,
276
+ decisions,
277
+ errors,
278
+ warnings,
279
+ };
280
+ }
281
+
282
+ export function writeLiftReport(result, options = {}) {
283
+ const written = {};
284
+ if (options.outputJson) {
285
+ const outputJson = resolvePath(options.outputJson, options.repoRoot || REPO_ROOT);
286
+ mkdirSync(dirname(outputJson), { recursive: true });
287
+ writeFileSync(outputJson, `${JSON.stringify(result, null, 2)}\n`);
288
+ written.json = toRepoRelative(outputJson, options.repoRoot || REPO_ROOT);
289
+ }
290
+ if (options.outputMd) {
291
+ const outputMd = resolvePath(options.outputMd, options.repoRoot || REPO_ROOT);
292
+ mkdirSync(dirname(outputMd), { recursive: true });
293
+ writeFileSync(outputMd, renderLiftMarkdown(result));
294
+ written.markdown = toRepoRelative(outputMd, options.repoRoot || REPO_ROOT);
295
+ }
296
+ return written;
297
+ }
298
+
299
+ export function renderCheckMarkdown(result) {
300
+ const lines = [
301
+ '# Lexicon Freshness Check',
302
+ '',
303
+ `- Validation: **${result.ok ? 'PASS' : 'FAIL'}**`,
304
+ `- Files checked: ${result.files.length}`,
305
+ ];
306
+ if (result.files.length) {
307
+ lines.push('', '| lexicon | provenance | entries | provenance rows |', '|---|---|---:|---:|');
308
+ for (const file of result.files) {
309
+ lines.push(`| \`${file.file}\` | \`${file.provenance}\` | ${file.entries} | ${file.provenanceRows} |`);
310
+ }
311
+ }
312
+ if (result.errors.length) lines.push('', '## Errors', ...result.errors.map((error) => `- ${escapeMarkdown(error)}`));
313
+ if (result.warnings.length) lines.push('', '## Warnings', ...result.warnings.map((warning) => `- ${escapeMarkdown(warning)}`));
314
+ return `${lines.join('\n')}\n`;
315
+ }
316
+
317
+ export function renderLiftMarkdown(result) {
318
+ const lines = [
319
+ '# Lexicon Freshness Lift Report',
320
+ '',
321
+ `- Language: ${result.language}`,
322
+ `- Source: ${result.source_id || 'not recorded'}`,
323
+ `- Validated at: ${result.validated_at || 'not recorded'}`,
324
+ `- Input: ${result.input || 'not recorded'}`,
325
+ `- Entries evaluated: ${result.entries}`,
326
+ `- Decision summary: ${result.kept} keep / ${result.dropped} drop`,
327
+ `- Gate: **${result.gate.ready ? 'PASS' : 'BLOCKED'}** (${result.gate.hot_docs} hot docs, ${result.gate.cold_docs} cold docs)`,
328
+ ];
329
+ if (result.source_note) lines.push(`- Source note: ${escapeMarkdown(result.source_note)}`);
330
+ if (result.source_urls?.length) {
331
+ lines.push('', '## Source provenance', '');
332
+ for (const url of result.source_urls) lines.push(`- <${url}>`);
333
+ lines.push('- Public report policy: aggregate counts only; raw corpus rows stay local/private.');
334
+ }
335
+
336
+ lines.push(
337
+ '',
338
+ '## Register coverage',
339
+ '',
340
+ '| class | registers |',
341
+ '|---|---|',
342
+ `| hot | ${formatRegisterCounts(result.gate.hot_registers)} |`,
343
+ `| cold | ${formatRegisterCounts(result.gate.cold_registers)} |`,
344
+ '',
345
+ '## Entry decisions',
346
+ '',
347
+ '| decision | kind | entry | hot docs | cold docs | lift | cold rate |',
348
+ '|---|---|---|---:|---:|---:|---:|'
349
+ );
350
+ for (const row of result.decisions) {
351
+ lines.push(`| ${row.decision} | ${row.kind} | ${escapeMarkdown(row.entry)} | ${row.hot_docs} | ${row.cold_docs} | ${row.lift} | ${(row.cold_rate * 100).toFixed(2)}% |`);
352
+ }
353
+ if (result.errors.length) lines.push('', '## Errors', ...result.errors.map((error) => `- ${escapeMarkdown(error)}`));
354
+ if (result.warnings.length) lines.push('', '## Warnings', ...result.warnings.map((warning) => `- ${escapeMarkdown(warning)}`));
355
+ return `${lines.join('\n')}\n`;
356
+ }
357
+
358
+ function countEntryDocumentFrequency(docs, entries, lang) {
359
+ const counts = new Map(entries.map((entry) => [entryKey(entry), 0]));
360
+ const registers = {};
361
+ for (const doc of docs) {
362
+ const register = doc.register || 'unspecified';
363
+ registers[register] = (registers[register] || 0) + 1;
364
+ const tokens = tokenize(doc.text, { lang });
365
+ for (const entry of entries) {
366
+ const lexicon = {
367
+ lang,
368
+ strict: entry.kind === 'strict' ? [entry.entry] : [],
369
+ phrases: entry.kind === 'phrase' ? [entry.entry] : [],
370
+ };
371
+ if (computeDensity(doc.text, tokens, lexicon).matches > 0) {
372
+ counts.set(entryKey(entry), (counts.get(entryKey(entry)) || 0) + 1);
373
+ }
374
+ }
375
+ }
376
+ return { counts, registers: sortObject(registers) };
377
+ }
378
+
379
+ function loadEntriesForLang(lang, repoRoot = REPO_ROOT) {
380
+ const file = resolve(repoRoot, 'lexicon', `ai-${lang}.md`);
381
+ const parsed = parseFrontmatterFile(file);
382
+ return parseLexiconEntries(parsed.body).all;
383
+ }
384
+
385
+ function formatRegisterCounts(registers = {}) {
386
+ const entries = Object.entries(registers);
387
+ if (!entries.length) return '—';
388
+ return entries.map(([key, value]) => `${escapeMarkdown(key)}=${value}`).join(', ');
389
+ }
390
+
391
+ function sortObject(value) {
392
+ return Object.fromEntries(Object.entries(value).sort(([a], [b]) => a.localeCompare(b)));
393
+ }
394
+
395
+ function entryKey(row) {
396
+ return `${row.kind}:${row.entry}`;
397
+ }
398
+
399
+ function entryLabel(row) {
400
+ return row?.entry ? `${row.kind || '?'}:${row.entry}` : 'row';
401
+ }
402
+
403
+ function resolvePath(path, repoRoot = REPO_ROOT) {
404
+ if (path.startsWith('/')) return path;
405
+ return resolve(repoRoot, path);
406
+ }
407
+
408
+ function toRepoRelative(path, repoRoot = REPO_ROOT) {
409
+ return relative(repoRoot, path) || basename(path);
410
+ }
411
+
412
+ function round(value, digits = 3) {
413
+ return Math.round(value * 10 ** digits) / 10 ** digits;
414
+ }
415
+
416
+ function escapeMarkdown(value) {
417
+ return String(value ?? '—').replace(/\|/gu, '\\|').replace(/\n/gu, ' ');
418
+ }
419
+
420
+ function printHelp() {
421
+ console.log(`Usage: node scripts/lexicon-freshness.mjs [--check] [--input <private.jsonl>] [options]
422
+
423
+ Default mode validates lexicon/ai-*.md entry-provenance sidecars.
424
+ When --input is provided, it also computes an EN hot/cold document-frequency
425
+ lift report from local JSONL rows. Reports never include raw text.
426
+
427
+ Options:
428
+ --check Validate lexicon provenance sidecars
429
+ --input <path> Local/private JSONL corpus with text fields
430
+ --lang <lang> Language to mine (default: en)
431
+ --source-id <id> Stable source id for the report
432
+ --source-note <text> Human-readable source note
433
+ --source-url <url> Source URL for the report (repeatable)
434
+ --validated-at <date> Validation date (default: today)
435
+ --output-json <path> Write public-safe aggregate JSON
436
+ --output-md <path> Write public-safe Markdown report
437
+ --json Print JSON instead of Markdown`);
438
+ }
439
+
440
+ function main() {
441
+ const args = parseArgs();
442
+ if (args.help) {
443
+ printHelp();
444
+ return;
445
+ }
446
+
447
+ const outputs = {};
448
+ let failed = false;
449
+ if (args.check) {
450
+ const check = checkLexiconProvenance();
451
+ outputs.check = check;
452
+ if (!args.json) console.log(renderCheckMarkdown(check));
453
+ if (!check.ok) failed = true;
454
+ }
455
+
456
+ if (args.input) {
457
+ const loaded = loadJsonlRows(args.input);
458
+ const entries = loadEntriesForLang(args.lang);
459
+ const report = loaded.errors.length
460
+ ? { language: args.lang, errors: loaded.errors, warnings: [], decisions: [], gate: { ready: false, hot_docs: 0, cold_docs: 0, hot_registers: {}, cold_registers: {} }, entries: 0, kept: 0, dropped: 0 }
461
+ : mineLexiconLift(loaded.rows, entries, {
462
+ lang: args.lang,
463
+ input: loaded.input,
464
+ sourceId: args.sourceId,
465
+ sourceNote: args.sourceNote,
466
+ sourceUrls: args.sourceUrls,
467
+ validatedAt: args.validatedAt,
468
+ });
469
+ const written = report.errors.length === 0 ? writeLiftReport(report, { outputJson: args.outputJson, outputMd: args.outputMd }) : {};
470
+ outputs.report = report;
471
+ outputs.written = written;
472
+ if (!args.json) {
473
+ console.log(renderLiftMarkdown(report));
474
+ for (const path of Object.values(written)) console.log(`Wrote ${path}`);
475
+ }
476
+ if (report.errors.length) failed = true;
477
+ }
478
+
479
+ if (args.json) console.log(JSON.stringify(outputs, null, 2));
480
+ if (failed) process.exit(1);
481
+ }
482
+
483
+ if (process.argv[1] && resolve(process.argv[1]) === fileURLToPath(import.meta.url)) {
484
+ main();
485
+ }
package/scripts/lint.mjs CHANGED
@@ -8,7 +8,7 @@ import { fileURLToPath } from 'node:url';
8
8
 
9
9
  const __dirname = dirname(fileURLToPath(import.meta.url));
10
10
  const REPO_ROOT = resolve(__dirname, '..');
11
- const ROOTS = ['bin', 'scripts', 'src', 'tests'];
11
+ const ROOTS = ['bin', 'scripts', 'src', 'tests', 'playground'];
12
12
  const EXT_RE = /\.(?:js|mjs)$/;
13
13
 
14
14
  function walk(dir, out = []) {
@@ -5,13 +5,14 @@ function parseArgs(argv) {
5
5
  const out = { files: [], gate: 30, lang: 'auto', maxFiles: 200 };
6
6
  for (let i = 0; i < argv.length; i++) {
7
7
  const arg = argv[i];
8
- if (arg === '--gate' || arg === '--score-threshold') out.gate = Number(argv[++i]);
8
+ if (arg === '--score-threshold') out.gate = Number(argv[++i]);
9
9
  else if (arg === '--lang') out.lang = argv[++i] || 'auto';
10
10
  else if (arg === '--max-files') out.maxFiles = Number(argv[++i]);
11
- else if (!arg.startsWith('-')) out.files.push(arg);
11
+ else if (arg.startsWith('-')) throw new Error(`unknown option ${arg}`);
12
+ else out.files.push(arg);
12
13
  }
13
14
  if (!Number.isFinite(out.gate) || out.gate < 0 || out.gate > 100) {
14
- throw new Error(`--gate expects a number from 0 to 100, got ${out.gate}`);
15
+ throw new Error(`--score-threshold expects a number from 0 to 100, got ${out.gate}`);
15
16
  }
16
17
  return out;
17
18
  }
@@ -4,12 +4,20 @@ import { fileURLToPath } from 'node:url';
4
4
 
5
5
  import { analyzeText } from '../src/features/index.js';
6
6
  import { loadLexicon } from '../src/features/lexicon.js';
7
+ import {
8
+ paragraphSignalStrength,
9
+ summarizeSignalStrength,
10
+ } from '../src/features/signal-strength.js';
11
+ import { loadPatterns } from '../src/loader.js';
12
+
13
+ export { paragraphSignalStrength, summarizeSignalStrength };
7
14
 
8
15
  const __dirname = dirname(fileURLToPath(import.meta.url));
9
16
  export const DEFAULT_REPO_ROOT = resolve(__dirname, '..');
10
17
  export const DEFAULT_PROSE_EXTENSIONS = ['.md', '.mdx', '.txt', '.rst', '.adoc'];
11
18
 
12
19
  const lexiconCache = new Map();
20
+ const patternTermCache = new Map();
13
21
 
14
22
  export function parseBoolean(value, defaultValue = false) {
15
23
  if (value === undefined || value === null || value === '') return defaultValue;
@@ -33,13 +41,16 @@ export function stripNonProse(markdown) {
33
41
  .replace(/^---\n[\s\S]*?\n---\s*/, '\n')
34
42
  .replace(/```[\s\S]*?```/g, '\n')
35
43
  .replace(/~~~[\s\S]*?~~~/g, '\n')
44
+ // Remove Markdown tables before stripping inline HTML. Cells such as
45
+ // `p<0.01` are prose-visible math, not HTML tags; if HTML stripping runs
46
+ // first it can consume across rows and leave table fragments behind.
47
+ .replace(/^\s*\|.*\|\s*$/gm, '\n')
36
48
  .replace(/`[^`]*`/g, ' ')
37
49
  .replace(/!\[[^\]]*\]\([^)]*\)/g, ' ')
38
50
  .replace(/\[([^\]]+)\]\([^)]*\)/g, '$1')
39
51
  .replace(/<svg[\s\S]*?<\/svg>/gi, '\n')
40
52
  .replace(/<[^>]+>/g, ' ')
41
- .replace(/^\s*\|.*\|\s*$/gm, '\n')
42
- .replace(/^\s{0,3}#{1,6}\s+/gm, '')
53
+ .replace(/^\s{0,3}#{1,6}\s+.*$/gm, '\n')
43
54
  .replace(/^\s{0,3}>\s?/gm, '')
44
55
  .replace(/^\s*[-*+]\s+\[[ xX]\]\s+/gm, '')
45
56
  .replace(/^\s*[-*+]\s+/gm, '')
@@ -76,6 +87,14 @@ function getLexicon(lang, repoRoot) {
76
87
  return lexiconCache.get(key);
77
88
  }
78
89
 
90
+ function getPatternWatchTerms(lang, repoRoot) {
91
+ const key = `${repoRoot}\0${lang}`;
92
+ if (!patternTermCache.has(key)) {
93
+ patternTermCache.set(key, extractPatternWatchTerms(loadPatterns(repoRoot, lang)));
94
+ }
95
+ return patternTermCache.get(key);
96
+ }
97
+
79
98
  export function scoreText(text, { file = '', lang = 'auto', gate = 30, repoRoot = DEFAULT_REPO_ROOT } = {}) {
80
99
  const prose = stripNonProse(text);
81
100
  const resolvedLang = detectLanguage(file, prose, lang);
@@ -84,21 +103,76 @@ export function scoreText(text, { file = '', lang = 'auto', gate = 30, repoRoot
84
103
  repoRoot,
85
104
  lexicon: getLexicon(resolvedLang, repoRoot),
86
105
  });
106
+ const patternHits = countPatternWatchHits(prose, getPatternWatchTerms(resolvedLang, repoRoot), resolvedLang);
87
107
  const paragraphCount = result.paragraphs.length;
88
108
  const hotCount = result.paragraphs.filter((p) => p.hot).length;
89
109
  const score = paragraphCount ? (hotCount / paragraphCount) * 100 : 0;
110
+ const signalScore = summarizeSignalStrength(result.paragraphs);
90
111
  return {
91
112
  file,
92
113
  lang: resolvedLang,
93
114
  paragraphCount,
94
115
  hotCount,
95
116
  score,
117
+ signalScore,
118
+ patternHits,
96
119
  gate,
97
120
  overGate: score > gate,
98
121
  skipped: paragraphCount === 0,
99
122
  };
100
123
  }
101
124
 
125
+ export function extractPatternWatchTerms(patterns = []) {
126
+ const terms = [];
127
+ for (const pattern of patterns) {
128
+ for (const line of String(pattern.body || '').split('\n')) {
129
+ const match = line.match(/^\*\*([^*]+)\*\*\s*(.+)$/);
130
+ if (!match || !isWatchLabel(match[1])) continue;
131
+ const value = match[2].replace(/\s+—\s+/g, ', ');
132
+ for (const raw of value.split(/[,,、;]/)) {
133
+ const term = cleanPatternTerm(raw);
134
+ if (term.length >= 2) terms.push(term);
135
+ }
136
+ }
137
+ }
138
+ return [...new Set(terms)];
139
+ }
140
+
141
+ export function countPatternWatchHits(text, terms = [], lang = 'en') {
142
+ if (!text || !Array.isArray(terms) || terms.length === 0) return 0;
143
+ const haystack = lang === 'en' ? String(text).toLowerCase() : String(text);
144
+ let count = 0;
145
+ for (const term of terms) {
146
+ const needle = lang === 'en' ? term.toLowerCase() : term;
147
+ if (needle && haystack.includes(needle)) count++;
148
+ }
149
+ return count;
150
+ }
151
+
152
+ function isWatchLabel(label) {
153
+ const normalized = label.replace(/[::]/g, '').trim().toLowerCase();
154
+ return [
155
+ 'watch words',
156
+ '주의 어휘',
157
+ '고빈도 ai 어휘',
158
+ '고빈도 어휘',
159
+ '고빈도 표현',
160
+ '高频词汇',
161
+ '注意词汇',
162
+ '注意词',
163
+ '高頻度語彙',
164
+ '注意語彙',
165
+ '注意語',
166
+ ].some((needle) => normalized.includes(needle.toLowerCase()));
167
+ }
168
+
169
+ function cleanPatternTerm(term) {
170
+ return String(term || '')
171
+ .replace(/^[\s`*_"'“”‘’「」『』()()]+|[\s`*_"'“”‘’「」『』()().。]+$/g, '')
172
+ .replace(/\s+/g, ' ')
173
+ .trim();
174
+ }
175
+
102
176
  function isInside(base, candidate) {
103
177
  const rel = relative(base, candidate);
104
178
  return rel === '' || (!rel.startsWith('..') && !rel.includes(`..${sep}`));
@@ -160,11 +234,13 @@ export function formatMarkdownReport(rows, { gate = 30, title = 'Patina prose ho
160
234
  return lines.join('\n');
161
235
  }
162
236
 
163
- lines.push('| status | file | lang | paragraphs | hot | score |');
164
- lines.push('|---|---|---:|---:|---:|---:|');
237
+ lines.push('| status | file | lang | paragraphs | hot | score | signal | pattern hits |');
238
+ lines.push('|---|---|---:|---:|---:|---:|---:|---:|');
165
239
  for (const row of rows) {
240
+ const signalScore = Number.isFinite(Number(row.signalScore)) ? Number(row.signalScore) : 0;
241
+ const patternHits = Number.isFinite(Number(row.patternHits)) ? Number(row.patternHits) : 0;
166
242
  lines.push(
167
- `| ${statusIcon(row)} | ${escapeCell(row.file)} | ${row.lang} | ${row.paragraphCount} | ${row.hotCount} | ${row.score.toFixed(1)}% |`
243
+ `| ${statusIcon(row)} | ${escapeCell(row.file)} | ${row.lang} | ${row.paragraphCount} | ${row.hotCount} | ${row.score.toFixed(1)}% | ${signalScore.toFixed(1)} | ${patternHits} |`
168
244
  );
169
245
  }
170
246
  return lines.join('\n');