patina-cli 3.11.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. package/.patina.default.yaml +29 -29
  2. package/CHANGELOG.md +53 -0
  3. package/NOTICE +21 -0
  4. package/README.md +117 -224
  5. package/README_JA.md +134 -77
  6. package/README_KR.md +132 -74
  7. package/README_ZH.md +137 -80
  8. package/SKILL.md +11 -20
  9. package/artifacts/rebaseline-2025/README.md +147 -0
  10. package/artifacts/rebaseline-2025/human-controls.public.jsonl +250 -0
  11. package/artifacts/rebaseline-2025/intake.example.jsonl +2 -0
  12. package/artifacts/rebaseline-2025/intake.local.example.jsonl +25 -0
  13. package/artifacts/rebaseline-2025/prompts.template.jsonl +7 -0
  14. package/artifacts/rebaseline-2025/sources.ko-public.jsonl +39 -0
  15. package/assets/brand/patina-badge.svg +18 -0
  16. package/assets/brand/patina-mark.svg +8 -0
  17. package/assets/demo/README.md +79 -0
  18. package/core/scoring.md +12 -12
  19. package/core/standalone-prompt.md +3 -1
  20. package/core/stylometry.md +93 -22
  21. package/docs/API.md +1554 -0
  22. package/docs/AUTHENTICATION.md +50 -26
  23. package/docs/AUTHENTICATION_KR.md +54 -29
  24. package/docs/BRANDING.md +9 -8
  25. package/docs/CLI.md +55 -14
  26. package/docs/COOKBOOK.md +8 -21
  27. package/docs/DEMO.md +32 -5
  28. package/docs/EXIT-CODES.md +2 -3
  29. package/docs/FALSE-POSITIVES.md +63 -0
  30. package/docs/FAQ.md +9 -1
  31. package/docs/FAQ_KR.md +3 -1
  32. package/docs/FLAG-PARITY.md +33 -47
  33. package/docs/ISSUE-WAVES.md +57 -0
  34. package/docs/PATTERNS-EN.md +67 -3
  35. package/docs/PATTERNS-JA.md +68 -2
  36. package/docs/PATTERNS-KO.md +70 -7
  37. package/docs/PATTERNS-ZH.md +67 -3
  38. package/docs/PATTERNS.md +5 -5
  39. package/docs/RESEARCH-DOCS-PLATFORM.md +54 -0
  40. package/docs/ROADMAP.md +46 -66
  41. package/docs/TRANSLATIONESE-KO.md +51 -0
  42. package/docs/audits/2026-05-deep-research.md +3 -1
  43. package/docs/benchmarks/README.md +51 -0
  44. package/docs/benchmarks/detector-comparison.json +69 -9
  45. package/docs/benchmarks/detector-comparison.md +10 -5
  46. package/docs/benchmarks/katfish-ko-latest.json +657 -0
  47. package/docs/benchmarks/katfish-ko-latest.md +77 -0
  48. package/docs/benchmarks/latest.json +1183 -108
  49. package/docs/benchmarks/latest.md +84 -60
  50. package/docs/benchmarks/lexicon-freshness-en-2026-05-22.json +1121 -0
  51. package/docs/benchmarks/lexicon-freshness-en-2026-05-22.md +136 -0
  52. package/docs/benchmarks/rebaseline-latest.json +381 -0
  53. package/docs/benchmarks/rebaseline-latest.md +121 -0
  54. package/docs/benchmarks/register-stratified-latest.json +164 -0
  55. package/docs/benchmarks/register-stratified-latest.md +99 -0
  56. package/docs/benchmarks/register-stratified.md +43 -0
  57. package/docs/integrations/github-action.md +44 -11
  58. package/docs/integrations/playground.md +58 -0
  59. package/docs/integrations/pre-commit.md +5 -5
  60. package/docs/integrations/release.md +5 -3
  61. package/docs/integrations/static-sites.md +83 -0
  62. package/docs/research/2025-rebaseline-plan.md +71 -2
  63. package/docs/research/2026-rebaseline.md +102 -0
  64. package/docs/research/adversarial-mps.md +41 -0
  65. package/docs/research/ai-human-metrics.md +35 -23
  66. package/docs/research/human-eval-panel.md +42 -0
  67. package/docs/research/judge-agreement.md +24 -0
  68. package/docs/research/ko-2025-corpus-sources.md +135 -0
  69. package/docs/research/lexicon-freshness-audit.md +64 -0
  70. package/docs/research/zh-ja-lexicon-calibration.md +60 -0
  71. package/docs/social/patina-launch-copy.md +173 -100
  72. package/docs/social/patina-launch-execution.md +94 -0
  73. package/docs/social/patina-launch-korean-first.md +83 -0
  74. package/docs/social/signs-of-ai-writing.md +26 -0
  75. package/docs/social/signs-of-ai-writing_KR.md +26 -0
  76. package/lexicon/ai-en.md +21 -24
  77. package/lexicon/ai-ja.md +158 -0
  78. package/lexicon/ai-ko.md +9 -9
  79. package/lexicon/ai-zh.md +158 -0
  80. package/lexicon/provenance/ai-en.json +970 -0
  81. package/lexicon/provenance/ai-ja.json +542 -0
  82. package/lexicon/provenance/ai-ko.json +866 -0
  83. package/lexicon/provenance/ai-zh.json +542 -0
  84. package/package.json +49 -8
  85. package/patterns/en-communication.md +5 -0
  86. package/patterns/en-content.md +5 -0
  87. package/patterns/en-filler.md +5 -0
  88. package/patterns/en-language.md +29 -1
  89. package/patterns/en-structure.md +5 -0
  90. package/patterns/en-style.md +5 -0
  91. package/patterns/en-viral-hook.md +42 -2
  92. package/patterns/ja-communication.md +5 -0
  93. package/patterns/ja-content.md +5 -0
  94. package/patterns/ja-filler.md +5 -0
  95. package/patterns/ja-language.md +33 -1
  96. package/patterns/ja-structure.md +12 -0
  97. package/patterns/ja-style.md +5 -0
  98. package/patterns/ja-viral-hook.md +41 -2
  99. package/patterns/ko-communication.md +5 -0
  100. package/patterns/ko-content.md +5 -0
  101. package/patterns/ko-filler.md +5 -0
  102. package/patterns/ko-language.md +33 -1
  103. package/patterns/ko-structure.md +25 -6
  104. package/patterns/ko-style.md +5 -0
  105. package/patterns/ko-viral-hook.md +38 -2
  106. package/patterns/zh-communication.md +5 -0
  107. package/patterns/zh-content.md +5 -0
  108. package/patterns/zh-filler.md +5 -0
  109. package/patterns/zh-language.md +37 -1
  110. package/patterns/zh-structure.md +12 -0
  111. package/patterns/zh-style.md +5 -0
  112. package/patterns/zh-viral-hook.md +38 -2
  113. package/playground/README.md +55 -0
  114. package/playground/analytics.js +4 -0
  115. package/playground/analyzer.js +883 -0
  116. package/playground/app.js +157 -0
  117. package/playground/data/lexicons.js +343 -0
  118. package/playground/index.html +138 -0
  119. package/playground/styles.css +267 -0
  120. package/profiles/namuwiki.md +111 -0
  121. package/scripts/adversarial-mps-report.mjs +201 -0
  122. package/scripts/badge-json.mjs +79 -0
  123. package/scripts/benchmark-report.mjs +56 -9
  124. package/scripts/check-release-metadata.mjs +0 -2
  125. package/scripts/detector-comparison.mjs +7 -7
  126. package/scripts/generate-playground-data.mjs +77 -0
  127. package/scripts/katfish-calibration.mjs +464 -0
  128. package/scripts/lexicon-freshness.mjs +485 -0
  129. package/scripts/lint.mjs +1 -1
  130. package/scripts/precommit-score.mjs +4 -3
  131. package/scripts/prose-score.mjs +81 -5
  132. package/scripts/rebaseline-intake.mjs +242 -0
  133. package/scripts/rebaseline-score.mjs +268 -0
  134. package/scripts/rebaseline-summary.mjs +773 -0
  135. package/scripts/rebaseline-web-collect.mjs +410 -0
  136. package/scripts/update-benchmark-ranges.mjs +1 -0
  137. package/src/api.js +69 -105
  138. package/src/auth.js +50 -2
  139. package/src/backends/claude-cli.js +19 -4
  140. package/src/backends/codex-cli.js +19 -3
  141. package/src/backends/contract.js +230 -1
  142. package/src/backends/gemini-cli.js +18 -5
  143. package/src/backends/index.js +87 -12
  144. package/src/backends/kimi-cli.js +161 -0
  145. package/src/cli.js +577 -567
  146. package/src/commands/doctor.js +2 -2
  147. package/src/config.js +29 -0
  148. package/src/errors.js +53 -1
  149. package/src/features/discourse-tells.js +68 -0
  150. package/src/features/index.js +82 -8
  151. package/src/features/lexicon.js +40 -6
  152. package/src/features/markup-leakage.js +69 -0
  153. package/src/features/segment.js +41 -0
  154. package/src/features/signal-strength.js +81 -0
  155. package/src/features/stylometry.js +231 -1
  156. package/src/features/translationese.js +127 -0
  157. package/src/loader.js +76 -0
  158. package/src/logger.js +22 -23
  159. package/src/model-defaults.js +55 -0
  160. package/src/ouroboros.js +31 -0
  161. package/src/output.js +102 -90
  162. package/src/prompt-builder.js +103 -68
  163. package/src/providers.js +51 -4
  164. package/src/scoring.js +210 -2
  165. package/src/security.js +75 -0
  166. package/tests/fixtures/live-quality/en/public-docs-01.md +26 -0
  167. package/tests/fixtures/live-quality/ko/public-docs-01.md +26 -0
  168. package/tests/fixtures/suspect-zones/expected-ranges.json +207 -16
  169. package/tests/fixtures/suspect-zones/ja/ai/ja-ai-04-lexicon.md +11 -0
  170. package/tests/fixtures/suspect-zones/ja/natural/ja-nat-04-lexicon-cold.md +11 -0
  171. package/tests/fixtures/suspect-zones/ko/ai/ko-ai-02.md +4 -5
  172. package/tests/fixtures/suspect-zones/ko/ai/ko-ai-07-ko-diagnostic.md +11 -0
  173. package/tests/fixtures/suspect-zones/zh/ai/zh-ai-04-lexicon.md +11 -0
  174. package/tests/fixtures/suspect-zones/zh/natural/zh-nat-04-lexicon-cold.md +11 -0
  175. package/tests/quality/README.md +188 -11
  176. package/tests/quality/adversarial-mps/fixtures.jsonl +10 -0
  177. package/tests/quality/benchmark.mjs +39 -1
  178. package/tests/quality/dogfood.mjs +5 -3
  179. package/tests/quality/live-fixtures.jsonl +2 -0
  180. package/tests/quality/live-quality.mjs +596 -0
  181. package/tests/quality/ranking-metrics.mjs +136 -0
  182. package/tests/quality/rebaseline-manifest.example.jsonl +5 -0
  183. package/vercel.json +53 -0
  184. package/SKILL-MAX.md +0 -455
  185. package/docs/internal/HARNESS.md +0 -14
  186. package/docs/internal/README.md +0 -14
  187. package/docs/internal/WARP.md +0 -23
  188. package/patina-max/SKILL.md +0 -523
  189. package/patina-max/composite.py +0 -457
  190. package/src/cache.js +0 -106
  191. package/src/commands/init.js +0 -208
  192. package/src/manifest.js +0 -162
  193. package/src/max-mode.js +0 -207
@@ -0,0 +1,883 @@
1
+ // Browser-safe deterministic analyzer for the patina static playground.
2
+ // Mirrors src/features/* without Node-only lexicon loading.
3
+
4
+ import { PLAYGROUND_LEXICONS } from './data/lexicons.js';
5
+
6
+ export const SUPPORTED_LANGS = ['ko', 'en', 'zh', 'ja'];
7
+ export const DEFAULT_LANG = 'ko';
8
+ export const DEFAULT_LEXICON_DENSITY_THRESHOLD = 2.0;
9
+ export const DEFAULT_LEXICON_MIN_HOT_MATCHES = {
10
+ default: 1,
11
+ ko: 2,
12
+ zh: 2,
13
+ ja: 2,
14
+ };
15
+ export const DEFAULT_BURSTINESS_BANDS = { low: 0.30, high: 0.50 };
16
+ export const DEFAULT_MIN_BURSTINESS_SENTENCES = 3;
17
+ export const DEFAULT_MATTR_BANDS = { low: 0.55, high: 0.70 };
18
+ export const DEFAULT_MATTR_WINDOW = 50;
19
+ // Formatting tells mirror catalog patterns #13/#14/#17.
20
+ // Em dash is doc-level (3+ across the document). Bold fires at 5+ across the
21
+ // document or 3+ within one paragraph. Emoji currently mirrors the catalog's
22
+ // "any occurrence" contract for editorial/professional text.
23
+ export const DEFAULT_FORMATTING_THRESHOLDS = {
24
+ emDashDoc: 3, // U+2014 occurrences across the document
25
+ boldDoc: 5, // **bold** spans across the document
26
+ boldParagraph: 3, // **bold** spans inside one paragraph
27
+ emojiDoc: 1, // any emoji occurrence in the document
28
+ };
29
+ // Model-output leakage (#332) is near-proof-grade, so any hit short-circuits the
30
+ // document score into the 'heavily AI' band, mirroring src/scoring.js.
31
+ export const LEAKAGE_SCORE_FLOOR = 90;
32
+ export const DEFAULT_KO_DIAGNOSTIC_BANDS = {
33
+ minSentences: 4,
34
+ minEojeols: 20,
35
+ spacing: {
36
+ maxEojeolLengthCV: 0.38,
37
+ },
38
+ comma: {
39
+ maxPerSentence: 1,
40
+ },
41
+ posProxy: {
42
+ minMatchedCount: 10,
43
+ maxClassDiversity: 0.26,
44
+ },
45
+ };
46
+
47
+ export const SAMPLE_TEXT = {
48
+ ko: '이 솔루션은 혁신적인 접근을 통해 업무 생산성을 극대화하고, 다양한 이해관계자에게 지속 가능한 가치를 제공합니다. 더 나아가 조직의 디지털 전환을 가속화하는 핵심 기반으로 자리매김하고 있습니다.\n\n하지만 현장에서 필요한 것은 거창한 선언보다 오늘 바로 줄어드는 반복 작업입니다.',
49
+ en: 'This transformative solution empowers teams to unlock the full potential of a seamless workflow. In today\'s fast-paced landscape, it serves as a catalyst for meaningful collaboration and sustainable growth.\n\nThe real question is simpler: which repetitive step disappears first?',
50
+ zh: '总而言之,这一方案能够全面提升用户体验,并为未来发展提供新的可能。从长远来看,它将在数字时代发挥着重要作用。\n\n先看一个具体场景:团队每天少复制三次表格。',
51
+ ja: 'まとめると、この仕組みはユーザー体験を向上させ、より良い未来につながります。重要なのは、さまざまな場面で効果的に活用できる点です。\n\nまずは、毎朝の確認作業が一つ減るかどうかを見ます。',
52
+ };
53
+
54
+ const SENTENCE_SPLIT_RE = /[.!?]+\s+|(?<=[。!?…])|\n+/u;
55
+ const PARAGRAPH_SPLIT_RE = /\n\s*\n/;
56
+ const LIST_LINE_RE = /^\s*(?:[-*+]\s+|\d+[.)]\s+)/u;
57
+ const EDGE_PUNCT_RE = /^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu;
58
+ const CJK_TOKEN_RE = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\u30FC]|[A-Za-z0-9]+/gu;
59
+ const HANGUL_RE = /[\u3131-\u318e\uac00-\ud7a3]/u;
60
+ const COMMA_RE = /[,,、]/gu;
61
+ const KO_SUFFIX_CLASSES = {
62
+ formal_ending: /(습니다|습니까|합니다|됩니다|입니다|입니다만|했습니다|됩니다)$/u,
63
+ plain_ending: /(다|었다|았다|겠다)$/u,
64
+ topic: /(은|는)$/u,
65
+ subject: /(이|가)$/u,
66
+ object: /을|를$/u,
67
+ location: /(에서|에게|으로|로)$/u,
68
+ connective: /(고|며|지만|면서|도록)$/u,
69
+ };
70
+
71
+ export function normalizeLang(lang) {
72
+ return SUPPORTED_LANGS.includes(lang) ? lang : DEFAULT_LANG;
73
+ }
74
+
75
+ export function splitParagraphs(text) {
76
+ if (!text) return [];
77
+ return text
78
+ .normalize('NFC')
79
+ .split(PARAGRAPH_SPLIT_RE)
80
+ .map((p) => p.trim())
81
+ .filter((p) => p.length > 0);
82
+ }
83
+
84
+ function stripListBlocks(paragraph) {
85
+ const lines = String(paragraph ?? '').split(/\r?\n/);
86
+ const proseLines = [];
87
+ let colonListRemaining = 0;
88
+ for (let i = 0; i < lines.length; i++) {
89
+ const rawLine = lines[i];
90
+ const trimmed = rawLine.trim();
91
+ if (trimmed === '') {
92
+ colonListRemaining = 0;
93
+ proseLines.push(rawLine);
94
+ continue;
95
+ }
96
+ if (LIST_LINE_RE.test(rawLine)) continue;
97
+ if (colonListRemaining > 0) {
98
+ colonListRemaining--;
99
+ continue;
100
+ }
101
+ if (trimmed.endsWith(':')) {
102
+ colonListRemaining = countFollowingPlainListLines(lines, i + 1);
103
+ }
104
+ proseLines.push(rawLine);
105
+ }
106
+ return proseLines.join('\n');
107
+ }
108
+
109
+ function countFollowingPlainListLines(lines, start) {
110
+ let count = 0;
111
+ for (let i = start; i < lines.length; i++) {
112
+ const trimmed = lines[i].trim();
113
+ if (trimmed === '') break;
114
+ if (LIST_LINE_RE.test(lines[i])) continue;
115
+ count++;
116
+ }
117
+ return count >= 2 ? count : 0;
118
+ }
119
+
120
+ export function splitSentences(paragraph) {
121
+ if (!paragraph) return [];
122
+ return paragraph
123
+ .split(SENTENCE_SPLIT_RE)
124
+ .map((s) => s.trim().replace(/[.!?。!?…]+$/u, ''))
125
+ .filter((s) => s.length > 0);
126
+ }
127
+
128
+ export function splitProseSentences(paragraph) {
129
+ return splitSentences(stripListBlocks(paragraph));
130
+ }
131
+
132
+ function tokenizeCjk(text) {
133
+ const tokens = [];
134
+ for (const match of text.matchAll(CJK_TOKEN_RE)) {
135
+ const token = match[0].replace(EDGE_PUNCT_RE, '');
136
+ if (token) tokens.push(token);
137
+ }
138
+ return tokens;
139
+ }
140
+
141
+ export function tokenize(text, opts = {}) {
142
+ if (!text) return [];
143
+ if (opts.lang === 'zh' || opts.lang === 'ja') return tokenizeCjk(text);
144
+ return text
145
+ .split(/\s+/)
146
+ .map((chunk) => chunk.replace(EDGE_PUNCT_RE, ''))
147
+ .filter((t) => t.length > 0);
148
+ }
149
+
150
+ export function burstinessCV(sentenceTokenCounts) {
151
+ if (!Array.isArray(sentenceTokenCounts) || sentenceTokenCounts.length < 2) return null;
152
+ const n = sentenceTokenCounts.length;
153
+ const mean = sentenceTokenCounts.reduce((a, b) => a + b, 0) / n;
154
+ if (mean === 0) return null;
155
+ const variance = sentenceTokenCounts.reduce((acc, x) => acc + (x - mean) ** 2, 0) / n;
156
+ return Math.sqrt(variance) / mean;
157
+ }
158
+
159
+ export function mattr(tokens, window = DEFAULT_MATTR_WINDOW) {
160
+ if (!Array.isArray(tokens) || tokens.length === 0) return null;
161
+ const lower = tokens.map((t) => t.toLowerCase());
162
+ if (lower.length < window) return new Set(lower).size / lower.length;
163
+ let sum = 0;
164
+ let count = 0;
165
+ for (let i = 0; i + window <= lower.length; i++) {
166
+ const slice = lower.slice(i, i + window);
167
+ sum += new Set(slice).size / window;
168
+ count++;
169
+ }
170
+ return sum / count;
171
+ }
172
+
173
+ export function classifyBurstiness(cv, bands = DEFAULT_BURSTINESS_BANDS) {
174
+ if (cv == null) return null;
175
+ if (cv < bands.low) return 'low';
176
+ if (cv > bands.high) return 'high';
177
+ return 'mid';
178
+ }
179
+
180
+ export function classifyMattr(value, bands = DEFAULT_MATTR_BANDS) {
181
+ if (value == null) return null;
182
+ if (value < bands.low) return 'low';
183
+ if (value > bands.high) return 'high';
184
+ return 'mid';
185
+ }
186
+
187
+ export function koreanSpacingFeatures(paragraph) {
188
+ const eojeols = koreanEojeols(paragraph);
189
+ const lengths = eojeols.map(koreanLength).filter((length) => length > 0);
190
+ const eojeolCount = lengths.length;
191
+ return {
192
+ eojeolCount,
193
+ meanEojeolLength: mean(lengths),
194
+ eojeolLengthCV: coefficientOfVariation(lengths),
195
+ shortEojeolRatio:
196
+ eojeolCount > 0 ? lengths.filter((length) => length === 1).length / eojeolCount : null,
197
+ longEojeolRatio:
198
+ eojeolCount > 0 ? lengths.filter((length) => length >= 7).length / eojeolCount : null,
199
+ };
200
+ }
201
+
202
+ export function commaDensity(paragraph, sentenceCount = null) {
203
+ const commaCount = (paragraph.match(COMMA_RE) ?? []).length;
204
+ const charCount = Array.from(paragraph.replace(/\s+/gu, '')).length;
205
+ return {
206
+ count: commaCount,
207
+ perSentence: sentenceCount > 0 ? commaCount / sentenceCount : null,
208
+ per100Chars: charCount > 0 ? (commaCount / charCount) * 100 : null,
209
+ };
210
+ }
211
+
212
+ export function koreanPosDiversityProxy(paragraph) {
213
+ const eojeols = koreanEojeols(paragraph);
214
+ const classes = new Set();
215
+ let matchedCount = 0;
216
+ for (const token of eojeols) {
217
+ for (const [className, suffixPattern] of Object.entries(KO_SUFFIX_CLASSES)) {
218
+ if (suffixPattern.test(token)) {
219
+ classes.add(className);
220
+ matchedCount++;
221
+ break;
222
+ }
223
+ }
224
+ }
225
+ return {
226
+ proxy: 'suffix',
227
+ eojeolCount: eojeols.length,
228
+ matchedCount,
229
+ coverage: eojeols.length > 0 ? matchedCount / eojeols.length : null,
230
+ classCount: classes.size,
231
+ classDiversity: matchedCount > 0 ? classes.size / matchedCount : null,
232
+ classes: Array.from(classes).sort(),
233
+ };
234
+ }
235
+
236
+ export function classifyKoreanDiagnostics({
237
+ sentenceCount = 0,
238
+ spacing,
239
+ comma,
240
+ posDiversity,
241
+ } = {}, bands = DEFAULT_KO_DIAGNOSTIC_BANDS) {
242
+ const thresholds = mergeKoreanDiagnosticBands(bands);
243
+ const reasons = [];
244
+
245
+ const hasEnoughText =
246
+ sentenceCount >= thresholds.minSentences &&
247
+ (spacing?.eojeolCount ?? 0) >= thresholds.minEojeols;
248
+ if (!hasEnoughText) {
249
+ return { hot: false, strength: 0, reasons, thresholds };
250
+ }
251
+
252
+ const spacingStrength = lowThresholdStrength(
253
+ spacing?.eojeolLengthCV,
254
+ thresholds.spacing.maxEojeolLengthCV
255
+ );
256
+ if (spacingStrength > 0) reasons.push('regular-eojeol-length');
257
+
258
+ const commaStrength = lowThresholdStrength(
259
+ comma?.perSentence,
260
+ thresholds.comma.maxPerSentence
261
+ );
262
+ if (commaStrength > 0) reasons.push('low-comma-density');
263
+
264
+ const posHasCoverage =
265
+ (posDiversity?.matchedCount ?? 0) >= thresholds.posProxy.minMatchedCount;
266
+ const posStrength = posHasCoverage
267
+ ? lowThresholdStrength(
268
+ posDiversity?.classDiversity,
269
+ thresholds.posProxy.maxClassDiversity
270
+ )
271
+ : 0;
272
+ if (posStrength > 0) reasons.push('low-suffix-class-diversity');
273
+
274
+ const componentStrengths = [spacingStrength, commaStrength, posStrength];
275
+ const hot = componentStrengths.every((value) => value > 0);
276
+
277
+ return {
278
+ hot,
279
+ strength: hot ? Math.min(...componentStrengths) : 0,
280
+ reasons: hot ? reasons : [],
281
+ thresholds,
282
+ };
283
+ }
284
+
285
+ function phraseToRegex(phrase) {
286
+ const escaped = phrase.toLowerCase().replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
287
+ return new RegExp(escaped.replace(/~/g, '.{0,40}'), 'u');
288
+ }
289
+
290
+ export function computeDensity(paragraphText, tokens, lexicon) {
291
+ const lowerText = paragraphText.toLowerCase();
292
+ const hits = [];
293
+ const tokenSet = new Set(tokens.map((t) => t.toLowerCase()));
294
+ const cjkSubstring = ['ko', 'zh', 'ja'].includes(lexicon.lang);
295
+
296
+ for (const entry of lexicon.strict) {
297
+ const lowerEntry = entry.toLowerCase();
298
+ if (tokenSet.has(lowerEntry)) {
299
+ hits.push(entry);
300
+ continue;
301
+ }
302
+ const hasInternalPunct = /[^\p{L}\p{N}]/u.test(lowerEntry);
303
+ if ((cjkSubstring || hasInternalPunct) && lowerText.includes(lowerEntry)) hits.push(entry);
304
+ }
305
+
306
+ for (const phrase of lexicon.phrases) {
307
+ if (phraseToRegex(phrase).test(lowerText)) hits.push(phrase);
308
+ }
309
+
310
+ const density = tokens.length > 0 ? (hits.length / tokens.length) * 1000 : 0;
311
+ return { matches: hits.length, density, hits };
312
+ }
313
+
314
+ function fmt(value, digits = 2) {
315
+ return value == null ? 'n/a' : Number(value).toFixed(digits);
316
+ }
317
+
318
+ // Model-output leakage artifacts (issue #332): tokens LLM tooling injects that
319
+ // never appear in human prose. A single hit is near-proof-grade, so it forces
320
+ // the document hot. Mirrors src/features/markup-leakage.js.
321
+ const MARKUP_LEAKAGE_RULES = [
322
+ { id: 'oai-citation-markup', label: 'OpenAI citation markup', build: () => /:contentReference|oaicite|oai_citation/gi },
323
+ { id: 'model-tool-token', label: 'Model tool token', build: () => /\bturn\d+(?:search|view|news|image|forecast|finance|fetch)\d*\b|\bnavlist\b|\bgrok_card\b/gi },
324
+ { id: 'object-replacement-char', label: 'Object-replacement character ()', build: () => //g },
325
+ { id: 'ai-tracking-param', label: 'AI-tool tracking parameter in URL', build: () => /utm_source=(?:chatgpt\.com|openai\.com|perplexity\.ai|claude\.ai|gemini\.google\.com)|[?&](?:ref|utm_source)=chatgpt/gi },
326
+ { id: 'explicit-self-identification', label: 'Explicit AI self-identification', build: () => /\bas an? (?:AI|artificial intelligence) language model\b|\bas a large language model\b|\bas a language model\b|\bas an AI assistant\b|\bI am an AI\b|\bI'?m an AI\b/gi },
327
+ ];
328
+
329
+ export function detectMarkupLeakage(text) {
330
+ const str = typeof text === 'string' ? text : '';
331
+ const hits = [];
332
+ if (!str) return { leaked: false, hits };
333
+ for (const rule of MARKUP_LEAKAGE_RULES) {
334
+ const m = str.match(rule.build());
335
+ if (m && m.length > 0) {
336
+ hits.push({ id: rule.id, label: rule.label, count: m.length, samples: [...new Set(m.map((x) => x.trim()).filter(Boolean))].slice(0, 3) });
337
+ }
338
+ }
339
+ return { leaked: hits.length > 0, hits };
340
+ }
341
+
342
+ // Density-gated discourse tells (issue #334): fake-candor / manufactured-intimacy
343
+ // openers and decorative thematic breaks. Mirrors src/features/discourse-tells.js.
344
+ const FAKE_CANDOR_RULES = [
345
+ /\bhere'?s the thing\b/gi,
346
+ /\bhere'?s the kicker\b/gi,
347
+ /\blet'?s be honest\b/gi,
348
+ /\blet'?s be real\b/gi,
349
+ /\bthe truth is\b/gi,
350
+ /\bi'?ll be honest(?: with you)?\b/gi,
351
+ /\breal talk\b/gi,
352
+ ];
353
+ export const DEFAULT_FAKE_CANDOR_MIN = 2;
354
+ export const DEFAULT_THEMATIC_BREAK_MIN = 3;
355
+ const THEMATIC_BREAK_LINE = /^[ \t]*(?:-[ \t]*){3,}$|^[ \t]*(?:\*[ \t]*){3,}$|^[ \t]*(?:_[ \t]*){3,}$/;
356
+ const HEADING_LINE = /^[ \t]*#{1,6}[ \t]+\S/;
357
+
358
+ export function detectFakeCandor(text) {
359
+ const str = typeof text === 'string' ? text : '';
360
+ const hits = [];
361
+ let count = 0;
362
+ for (const re of FAKE_CANDOR_RULES) {
363
+ const m = str.match(re);
364
+ if (m && m.length) {
365
+ count += m.length;
366
+ hits.push(...new Set(m.map((x) => x.trim().toLowerCase())));
367
+ }
368
+ }
369
+ return { count, hits: [...new Set(hits)].slice(0, 5), hot: count >= DEFAULT_FAKE_CANDOR_MIN, threshold: DEFAULT_FAKE_CANDOR_MIN };
370
+ }
371
+
372
+ export function countFakeCandor(text) {
373
+ return detectFakeCandor(text).count;
374
+ }
375
+
376
+ export function detectThematicBreaks(text) {
377
+ const lines = (typeof text === 'string' ? text : '').split(/\r?\n/);
378
+ let count = 0;
379
+ let adjacentToHeading = 0;
380
+ for (let i = 0; i < lines.length; i++) {
381
+ if (!THEMATIC_BREAK_LINE.test(lines[i])) continue;
382
+ count++;
383
+ for (let j = i + 1; j < lines.length; j++) {
384
+ if (lines[j].trim() === '') continue;
385
+ if (HEADING_LINE.test(lines[j])) adjacentToHeading++;
386
+ break;
387
+ }
388
+ }
389
+ return {
390
+ count,
391
+ adjacentToHeading,
392
+ hot: count >= DEFAULT_THEMATIC_BREAK_MIN,
393
+ threshold: DEFAULT_THEMATIC_BREAK_MIN,
394
+ };
395
+ }
396
+
397
+ const EMOJI_BASE_RE = '\\p{Extended_Pictographic}(?:\\uFE0F|\\uFE0E)?(?:\\p{Emoji_Modifier})?';
398
+ const EMOJI_CLUSTER_PATTERN = `(?:\\p{Regional_Indicator}{2}|[#*0-9]\\uFE0F?\\u20E3|${EMOJI_BASE_RE}(?:\\u200D${EMOJI_BASE_RE})*)`;
399
+ const EMOJI_CLUSTER_RE = new RegExp(EMOJI_CLUSTER_PATTERN, 'u');
400
+ const EMOJI_CLUSTER_RE_GLOBAL = new RegExp(EMOJI_CLUSTER_PATTERN, 'gu');
401
+
402
+ function getGraphemeSegmenter() {
403
+ return typeof Intl !== 'undefined' && typeof Intl.Segmenter === 'function'
404
+ ? new Intl.Segmenter(undefined, { granularity: 'grapheme' })
405
+ : null;
406
+ }
407
+
408
+ function countEmojiClusters(text, segmenter = getGraphemeSegmenter()) {
409
+ const str = String(text ?? '');
410
+ if (!str) return 0;
411
+ if (segmenter) {
412
+ let count = 0;
413
+ for (const { segment } of segmenter.segment(str)) {
414
+ if (EMOJI_CLUSTER_RE.test(segment)) count++;
415
+ }
416
+ return count;
417
+ }
418
+ return (str.match(EMOJI_CLUSTER_RE_GLOBAL) || []).length;
419
+ }
420
+
421
+ // Count formatting tells in a chunk of raw text (em-dash U+2014, markdown **bold**
422
+ // spans, decorative emoji).
423
+ export function countFormatting(text, opts = {}) {
424
+ const str = String(text ?? '');
425
+ const emDash = (str.match(/—/gu) || []).length;
426
+ const bold = (str.match(/\*\*(?=\S)(?:[^*]|\*(?!\*))+?\*\*/gu) || []).length;
427
+ const emoji = countEmojiClusters(str, opts.segmenter);
428
+ return { emDash, bold, emoji };
429
+ }
430
+
431
+ function buildReasons({ cvBand, mattrBand, lexiconHot, lex, koDiagnostics, formatting, formattingThresholds, leakage, candor, thematicBreaks }) {
432
+ const reasons = [];
433
+ if (candor?.hot) {
434
+ reasons.push({
435
+ code: 'fake-candor',
436
+ label: 'Fake-candor opener',
437
+ detail: `Manufactured-intimacy opener ("here's the thing", "the truth is", …); ${candor.docCount} in the document (threshold ${DEFAULT_FAKE_CANDOR_MIN}).`,
438
+ });
439
+ }
440
+ if (thematicBreaks?.hot) {
441
+ reasons.push({
442
+ code: 'thematic-break',
443
+ label: 'Decorative thematic break',
444
+ detail: `${thematicBreaks.docCount} markdown dividers in the document (threshold ${DEFAULT_THEMATIC_BREAK_MIN}); this paragraph carries ${thematicBreaks.count}.`,
445
+ });
446
+ }
447
+ if (leakage?.leaked) {
448
+ const labels = leakage.hits.map((h) => h.label).join(', ');
449
+ reasons.push({
450
+ code: 'model-output-leakage',
451
+ label: 'Model-output leakage',
452
+ detail: `Pasted-LLM artifact present (${labels}). A single hit is near-proof-grade.`,
453
+ });
454
+ }
455
+ if (formatting?.emDashHot) {
456
+ reasons.push({
457
+ code: 'em-dash-overuse',
458
+ label: 'Em dash overuse',
459
+ detail: `${formatting.docEmDash} em dashes in the document (threshold ${formattingThresholds.emDashDoc}); this paragraph carries ${formatting.emDash}.`,
460
+ });
461
+ }
462
+ if (formatting?.boldHot) {
463
+ const paragraphOnly = formatting.bold >= formattingThresholds.boldParagraph && formatting.docBold < formattingThresholds.boldDoc;
464
+ reasons.push({
465
+ code: 'bold-overuse',
466
+ label: 'Boldface overuse',
467
+ detail: paragraphOnly
468
+ ? `${formatting.bold} bold spans in this paragraph (threshold ${formattingThresholds.boldParagraph}).`
469
+ : `${formatting.docBold} bold spans in the document (threshold ${formattingThresholds.boldDoc}); this paragraph carries ${formatting.bold}.`,
470
+ });
471
+ }
472
+ if (formatting?.emojiHot) {
473
+ reasons.push({
474
+ code: 'emoji-overuse',
475
+ label: 'Emoji overuse',
476
+ detail: `${formatting.docEmoji} emoji in the document (catalog threshold: any occurrence); this paragraph carries ${formatting.emoji}.`,
477
+ });
478
+ }
479
+ if (cvBand === 'low') {
480
+ reasons.push({
481
+ code: 'low-burstiness',
482
+ label: 'Low burstiness',
483
+ detail: 'Sentence lengths are unusually even, a common polished-LLM tell.',
484
+ });
485
+ }
486
+ if (mattrBand === 'low') {
487
+ reasons.push({
488
+ code: 'low-mattr',
489
+ label: 'Low lexical variety',
490
+ detail: 'The moving type-token ratio is below the editing threshold.',
491
+ });
492
+ }
493
+ if (lexiconHot) {
494
+ reasons.push({
495
+ code: 'lexicon-density',
496
+ label: 'AI-favored phrasing density',
497
+ detail: `${lex.matches} lexicon hit${lex.matches === 1 ? '' : 's'} / ${fmt(lex.density, 1)} per 1k tokens.`,
498
+ });
499
+ } else if (lex.matches > 0) {
500
+ reasons.push({
501
+ code: 'lexicon-hit',
502
+ label: 'AI-favored phrase present',
503
+ detail: `${lex.matches} lexicon hit${lex.matches === 1 ? '' : 's'}, below the hot-zone threshold.`,
504
+ });
505
+ }
506
+ if (koDiagnostics?.hot) {
507
+ reasons.push({
508
+ code: 'ko-diagnostics',
509
+ label: 'Korean rhythm composite',
510
+ detail: `Regular spacing, low comma rhythm, and low suffix diversity matched together (strength ${fmt(koDiagnostics.strength, 1)}).`,
511
+ });
512
+ }
513
+ return reasons;
514
+ }
515
+
516
+ export function analyzePlaygroundText(text, opts = {}) {
517
+ const lang = normalizeLang(opts.lang ?? DEFAULT_LANG);
518
+ const lexicon = PLAYGROUND_LEXICONS[lang];
519
+ const paragraphs = splitParagraphs(text);
520
+ const threshold = opts.lexiconDensityThreshold ?? DEFAULT_LEXICON_DENSITY_THRESHOLD;
521
+ const minHotMatches = opts.lexiconMinHotMatches ?? DEFAULT_LEXICON_MIN_HOT_MATCHES;
522
+ const formattingThresholds = opts.formattingThresholds ?? DEFAULT_FORMATTING_THRESHOLDS;
523
+
524
+ // Document-level formatting pass: count tells across all paragraphs first, then
525
+ // attribute hot status to the paragraphs that carry the token (catalog #13/#14 are doc-level).
526
+ const paraFormatting = paragraphs.map(countFormatting);
527
+ const docEmDash = paraFormatting.reduce((sum, f) => sum + f.emDash, 0);
528
+ const docBold = paraFormatting.reduce((sum, f) => sum + f.bold, 0);
529
+ const docEmoji = paraFormatting.reduce((sum, f) => sum + f.emoji, 0);
530
+ // Fake-candor openers (#334): doc-level density gate, then attribute to the
531
+ // paragraphs that carry an opener (same shape as the em-dash doc-level pass).
532
+ const paraCandor = paragraphs.map(countFakeCandor);
533
+ const docFakeCandor = detectFakeCandor(text);
534
+ const docCandor = docFakeCandor.count;
535
+
536
+ const paraThematicBreaks = paragraphs.map(detectThematicBreaks);
537
+ const docThematicBreaks = detectThematicBreaks(text);
538
+
539
+ const analyzed = paragraphs.map((paragraph, idx) => {
540
+ const sentences = splitProseSentences(paragraph);
541
+ const sentenceTokens = sentences.map((sentence) => tokenize(sentence, { lang }));
542
+ const sentenceTokenCounts = sentenceTokens.map((tokens) => tokens.length);
543
+ const tokens = sentenceTokens.flat();
544
+ const cv = burstinessCV(sentenceTokenCounts);
545
+ const cvBand = sentences.length >= DEFAULT_MIN_BURSTINESS_SENTENCES ? classifyBurstiness(cv) : null;
546
+ const mattrValue = mattr(tokens);
547
+ const mattrBand = classifyMattr(mattrValue);
548
+ const lex = computeDensity(paragraph, tokens, lexicon);
549
+ const koSignals = lang === 'ko'
550
+ ? buildKoreanSignals(paragraph, sentences.length)
551
+ : {};
552
+ const lexiconHot = classifyLexiconHot(lex, {
553
+ lang,
554
+ densityThreshold: threshold,
555
+ minHotMatches,
556
+ });
557
+ const counts = paraFormatting[idx];
558
+ const emDashHot = docEmDash >= formattingThresholds.emDashDoc && counts.emDash >= 1;
559
+ const boldHot =
560
+ (docBold >= formattingThresholds.boldDoc && counts.bold >= 1) ||
561
+ counts.bold >= formattingThresholds.boldParagraph;
562
+ const emojiHot = docEmoji >= formattingThresholds.emojiDoc && counts.emoji >= 1;
563
+ const formatting = { ...counts, docEmDash, docBold, docEmoji, emDashHot, boldHot, emojiHot };
564
+ // Model-output leakage (#332): per-paragraph hit, fires on a single occurrence.
565
+ const leakage = detectMarkupLeakage(paragraph);
566
+ // Fake-candor (#334): this paragraph carries an opener AND the doc total >= gate.
567
+ const candorHot = docCandor >= DEFAULT_FAKE_CANDOR_MIN && paraCandor[idx] >= 1;
568
+ const thematicBreakHot = docThematicBreaks.hot && paraThematicBreaks[idx].count >= 1;
569
+ const hot =
570
+ cvBand === 'low' ||
571
+ mattrBand === 'low' ||
572
+ lexiconHot ||
573
+ Boolean(koSignals.koDiagnostics?.hot) ||
574
+ emDashHot ||
575
+ boldHot ||
576
+ emojiHot ||
577
+ leakage.leaked ||
578
+ candorHot ||
579
+ thematicBreakHot;
580
+ const thematicBreaks = {
581
+ ...paraThematicBreaks[idx],
582
+ docCount: docThematicBreaks.count,
583
+ docAdjacentToHeading: docThematicBreaks.adjacentToHeading,
584
+ hot: thematicBreakHot,
585
+ };
586
+ const reasons = buildReasons({
587
+ cvBand,
588
+ mattrBand,
589
+ lexiconHot,
590
+ lex,
591
+ koDiagnostics: koSignals.koDiagnostics,
592
+ formatting,
593
+ formattingThresholds,
594
+ leakage,
595
+ candor: { hot: candorHot, docCount: docCandor },
596
+ thematicBreaks,
597
+ });
598
+
599
+ return {
600
+ id: `P${idx + 1}`,
601
+ text: paragraph,
602
+ sentenceCount: sentences.length,
603
+ tokenCount: tokens.length,
604
+ sentenceTokenCounts,
605
+ burstiness: { cv, band: cvBand },
606
+ mattr: { value: mattrValue, band: mattrBand },
607
+ lexicon: { ...lex, hot: lexiconHot },
608
+ formatting,
609
+ leakage,
610
+ ...koSignals,
611
+ thematicBreaks,
612
+ hot,
613
+ reasons,
614
+ };
615
+ });
616
+
617
+ const hotCount = analyzed.filter((p) => p.hot).length;
618
+ const hotRatio = paragraphs.length === 0 ? 0 : Math.round((hotCount / paragraphs.length) * 100);
619
+ const markupLeakage = detectMarkupLeakage(text);
620
+ const overall = markupLeakage.leaked ? Math.max(hotRatio, LEAKAGE_SCORE_FLOOR) : hotRatio;
621
+
622
+ return {
623
+ lang,
624
+ overall,
625
+ band: scoreBand(overall),
626
+ paragraphCount: paragraphs.length,
627
+ hotCount,
628
+ totalTokens: analyzed.reduce((sum, p) => sum + p.tokenCount, 0),
629
+ markupLeakage,
630
+ discourseTells: {
631
+ fakeCandor: docFakeCandor,
632
+ thematicBreaks: docThematicBreaks,
633
+ hot: docCandor >= DEFAULT_FAKE_CANDOR_MIN || docThematicBreaks.hot,
634
+ },
635
+ paragraphs: analyzed,
636
+ auditItems: analyzed.filter((p) => p.hot || p.lexicon.matches > 0),
637
+ note: 'Audit-only deterministic score. It marks editing hotspots, not authorship or intent.',
638
+ };
639
+ }
640
+
641
+ function classifyLexiconHot(
642
+ lexiconStats,
643
+ {
644
+ lang,
645
+ densityThreshold = DEFAULT_LEXICON_DENSITY_THRESHOLD,
646
+ minHotMatches = DEFAULT_LEXICON_MIN_HOT_MATCHES,
647
+ } = {}
648
+ ) {
649
+ const matches = lexiconStats?.matches ?? 0;
650
+ const density = lexiconStats?.density ?? 0;
651
+ const minMatches = resolveMinHotMatches(lang, minHotMatches);
652
+ return matches >= minMatches && density > densityThreshold;
653
+ }
654
+
655
+ function resolveMinHotMatches(lang, minHotMatches) {
656
+ if (typeof minHotMatches === 'number' && Number.isFinite(minHotMatches)) {
657
+ return Math.max(1, minHotMatches);
658
+ }
659
+ const normalized = typeof lang === 'string' ? lang.toLowerCase() : 'default';
660
+ const value = minHotMatches?.[normalized] ?? minHotMatches?.default;
661
+ return typeof value === 'number' && Number.isFinite(value) ? Math.max(1, value) : 1;
662
+ }
663
+
664
+ export function scoreBand(score) {
665
+ if (score <= 20) return { key: 'low', label: 'Low AI-likeness', tone: 'good' };
666
+ if (score <= 50) return { key: 'mixed', label: 'Mixed signals', tone: 'warn' };
667
+ return { key: 'high', label: 'Review suggested', tone: 'hot' };
668
+ }
669
+
670
+ export function escapeHtml(value) {
671
+ return String(value)
672
+ .replace(/&/g, '&amp;')
673
+ .replace(/</g, '&lt;')
674
+ .replace(/>/g, '&gt;')
675
+ .replace(/"/g, '&quot;')
676
+ .replace(/'/g, '&#39;');
677
+ }
678
+
679
+ function buildKoreanSignals(paragraph, sentenceCount) {
680
+ const spacing = koreanSpacingFeatures(paragraph);
681
+ const comma = commaDensity(paragraph, sentenceCount);
682
+ const posDiversity = koreanPosDiversityProxy(paragraph);
683
+ const koDiagnostics = classifyKoreanDiagnostics({
684
+ sentenceCount,
685
+ spacing,
686
+ comma,
687
+ posDiversity,
688
+ });
689
+
690
+ return {
691
+ spacing,
692
+ comma,
693
+ posDiversity,
694
+ koDiagnostics,
695
+ };
696
+ }
697
+
698
+ function koreanEojeols(paragraph) {
699
+ if (!paragraph || !HANGUL_RE.test(paragraph)) return [];
700
+ return paragraph
701
+ .split(/\s+/u)
702
+ .map((chunk) => chunk.replace(/^[^\u3131-\u318e\uac00-\ud7a3]+|[^\u3131-\u318e\uac00-\ud7a3]+$/gu, ''))
703
+ .filter((chunk) => HANGUL_RE.test(chunk));
704
+ }
705
+
706
+ function koreanLength(value) {
707
+ return Array.from(value.match(/[\u3131-\u318e\uac00-\ud7a3]/gu) ?? []).length;
708
+ }
709
+
710
+ function mean(values) {
711
+ if (!Array.isArray(values) || values.length === 0) return null;
712
+ return values.reduce((sum, value) => sum + value, 0) / values.length;
713
+ }
714
+
715
+ function coefficientOfVariation(values) {
716
+ if (!Array.isArray(values) || values.length < 2) return null;
717
+ const avg = mean(values);
718
+ if (!avg) return null;
719
+ const variance = values.reduce((sum, value) => sum + (value - avg) ** 2, 0) / values.length;
720
+ return Math.sqrt(variance) / avg;
721
+ }
722
+
723
+ function mergeKoreanDiagnosticBands(bands = {}) {
724
+ return {
725
+ minSentences: resolveNumber(bands.minSentences, DEFAULT_KO_DIAGNOSTIC_BANDS.minSentences),
726
+ minEojeols: resolveNumber(bands.minEojeols, DEFAULT_KO_DIAGNOSTIC_BANDS.minEojeols),
727
+ spacing: {
728
+ maxEojeolLengthCV: resolveNumber(
729
+ bands.spacing?.maxEojeolLengthCV,
730
+ DEFAULT_KO_DIAGNOSTIC_BANDS.spacing.maxEojeolLengthCV
731
+ ),
732
+ },
733
+ comma: {
734
+ maxPerSentence: resolveNumber(
735
+ bands.comma?.maxPerSentence,
736
+ DEFAULT_KO_DIAGNOSTIC_BANDS.comma.maxPerSentence
737
+ ),
738
+ },
739
+ posProxy: {
740
+ minMatchedCount: resolveNumber(
741
+ bands.posProxy?.minMatchedCount,
742
+ DEFAULT_KO_DIAGNOSTIC_BANDS.posProxy.minMatchedCount
743
+ ),
744
+ maxClassDiversity: resolveNumber(
745
+ bands.posProxy?.maxClassDiversity,
746
+ DEFAULT_KO_DIAGNOSTIC_BANDS.posProxy.maxClassDiversity
747
+ ),
748
+ },
749
+ };
750
+ }
751
+
752
+ function resolveNumber(value, fallback) {
753
+ return typeof value === 'number' && Number.isFinite(value) ? value : fallback;
754
+ }
755
+
756
+ function lowThresholdStrength(value, threshold) {
757
+ if (typeof value !== 'number' || !Number.isFinite(value)) return 0;
758
+ if (threshold === 0) return value <= 0 ? 100 : 0;
759
+ if (!threshold || threshold < 0 || value > threshold) return 0;
760
+ return Math.max(0, Math.min(100, (1 - value / threshold) * 100));
761
+ }
762
+
763
+ function collectHitRanges(text, hits) {
764
+ const lower = text.toLowerCase();
765
+ const ranges = [];
766
+ for (const hit of [...hits].sort((a, b) => b.length - a.length)) {
767
+ if (hit.includes('~')) continue;
768
+ const needle = hit.toLowerCase();
769
+ let from = 0;
770
+ while (needle && from < lower.length) {
771
+ const idx = lower.indexOf(needle, from);
772
+ if (idx === -1) break;
773
+ ranges.push({ start: idx, end: idx + hit.length });
774
+ from = idx + Math.max(needle.length, 1);
775
+ }
776
+ }
777
+ ranges.sort((a, b) => a.start - b.start || b.end - a.end);
778
+ const merged = [];
779
+ for (const range of ranges) {
780
+ const prev = merged[merged.length - 1];
781
+ if (!prev || range.start >= prev.end) merged.push({ ...range });
782
+ }
783
+ return merged;
784
+ }
785
+
786
+ export function highlightLexiconHits(text, hits) {
787
+ const ranges = collectHitRanges(text, hits);
788
+ if (ranges.length === 0) return escapeHtml(text);
789
+ let html = '';
790
+ let cursor = 0;
791
+ for (const range of ranges) {
792
+ html += escapeHtml(text.slice(cursor, range.start));
793
+ html += `<mark>${escapeHtml(text.slice(range.start, range.end))}</mark>`;
794
+ cursor = range.end;
795
+ }
796
+ html += escapeHtml(text.slice(cursor));
797
+ return html;
798
+ }
799
+
800
+ export function renderAuditDiff(analysis) {
801
+ if (analysis.paragraphCount === 0) {
802
+ return '<p class="empty-state">Paste text to see suspect zones. v1 does not rewrite.</p>';
803
+ }
804
+ return analysis.paragraphs.map((paragraph) => {
805
+ const state = paragraph.hot ? 'hot' : 'clean';
806
+ const badge = paragraph.hot ? 'review' : 'ok';
807
+ const reasons = paragraph.reasons.length > 0
808
+ ? `<ul>${paragraph.reasons.map((r) => `<li><strong>${escapeHtml(r.label)}</strong>: ${escapeHtml(r.detail)}</li>`).join('')}</ul>`
809
+ : '<p class="quiet">No deterministic hotspot in this paragraph.</p>';
810
+ const hits = paragraph.lexicon.hits.length > 0
811
+ ? `<p class="hits">Lexicon hits: ${paragraph.lexicon.hits.map((hit) => `<code>${escapeHtml(hit)}</code>`).join(' ')}</p>`
812
+ : '';
813
+ return `<section class="diff-card ${state}">
814
+ <div class="diff-card__head"><span>${escapeHtml(paragraph.id)}</span><span class="pill ${state}">${badge}</span></div>
815
+ <p>${highlightLexiconHits(paragraph.text, paragraph.lexicon.hits)}</p>
816
+ ${hits}
817
+ ${reasons}
818
+ </section>`;
819
+ }).join('\n');
820
+ }
821
+
822
+ function heredocDelimiter(text) {
823
+ const base = 'PATINA_TEXT';
824
+ let delimiter = base;
825
+ let i = 2;
826
+ while (new RegExp(`^${delimiter}$`, 'm').test(text)) {
827
+ delimiter = `${base}_${i}`;
828
+ i++;
829
+ }
830
+ return delimiter;
831
+ }
832
+
833
+ export function buildCliCommand(text, lang = DEFAULT_LANG) {
834
+ const safeLang = normalizeLang(lang);
835
+ const normalized = (text || '').replace(/\r\n?/g, '\n').trimEnd();
836
+ const delimiter = heredocDelimiter(normalized);
837
+ return [
838
+ `cat > patina-input.txt <<'${delimiter}'`,
839
+ normalized,
840
+ delimiter,
841
+ `npx patina-cli --lang ${safeLang} --score patina-input.txt`,
842
+ `npx patina-cli --lang ${safeLang} --audit patina-input.txt`,
843
+ ].join('\n');
844
+ }
845
+
846
+ export const FALSE_POSITIVE_ISSUE_URL = 'https://github.com/devswha/patina/issues/new';
847
+ const FALSE_POSITIVE_MAX_PARAGRAPH_CHARS = 1500;
848
+
849
+ // Build a GitHub issue URL with the false-positive template pre-filled from the
850
+ // current audit. Nothing is sent anywhere — the text only leaves the browser if
851
+ // the user chooses to submit the GitHub issue, preserving the in-browser privacy
852
+ // promise while removing the copy/paste friction of reporting by hand.
853
+ export function buildFalsePositiveReportUrl(text, lang = DEFAULT_LANG, analysis = null) {
854
+ const safeLang = normalizeLang(lang);
855
+ const result = analysis ?? analyzePlaygroundText(text || '', { lang: safeLang });
856
+ const hotParas = result.paragraphs.filter((p) => p.hot);
857
+ const source = hotParas.length ? hotParas : result.paragraphs;
858
+
859
+ let fired = source.map((p) => p.text).join('\n\n').trim();
860
+ if (!fired) fired = (text || '').trim();
861
+ if (fired.length > FALSE_POSITIVE_MAX_PARAGRAPH_CHARS) {
862
+ fired = `${fired.slice(0, FALSE_POSITIVE_MAX_PARAGRAPH_CHARS)}\n…(truncated — paste the rest if it matters)`;
863
+ }
864
+
865
+ const signals =
866
+ [...new Set(source.flatMap((p) => p.reasons.map((r) => r.label)))].join(', ') || 'none';
867
+ const lexiconHits = result.paragraphs.reduce((sum, p) => sum + (p.lexicon?.matches ?? 0), 0);
868
+ const scoreOutput = [
869
+ 'Source: patina playground (https://patina.vibetip.help/)',
870
+ `Score: ${result.overall}/100 (${result.band.label})`,
871
+ `Hot paragraphs: ${result.hotCount}/${result.paragraphCount}`,
872
+ `Signals: ${signals}`,
873
+ `Lexicon hits: ${lexiconHits}`,
874
+ ].join('\n');
875
+
876
+ const params = new globalThis.URLSearchParams({
877
+ template: 'false_positive.yml',
878
+ language: safeLang,
879
+ fired_paragraph: fired,
880
+ score_output: scoreOutput,
881
+ });
882
+ return `${FALSE_POSITIVE_ISSUE_URL}?${params.toString()}`;
883
+ }