npm - patina-cli - Versions diffs - 3.11.0 → 4.0.0 - Mend

patina-cli 3.11.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (193) hide show

package/.patina.default.yaml +29 -29
package/CHANGELOG.md +53 -0
package/NOTICE +21 -0
package/README.md +117 -224
package/README_JA.md +134 -77
package/README_KR.md +132 -74
package/README_ZH.md +137 -80
package/SKILL.md +11 -20
package/artifacts/rebaseline-2025/README.md +147 -0
package/artifacts/rebaseline-2025/human-controls.public.jsonl +250 -0
package/artifacts/rebaseline-2025/intake.example.jsonl +2 -0
package/artifacts/rebaseline-2025/intake.local.example.jsonl +25 -0
package/artifacts/rebaseline-2025/prompts.template.jsonl +7 -0
package/artifacts/rebaseline-2025/sources.ko-public.jsonl +39 -0
package/assets/brand/patina-badge.svg +18 -0
package/assets/brand/patina-mark.svg +8 -0
package/assets/demo/README.md +79 -0
package/core/scoring.md +12 -12
package/core/standalone-prompt.md +3 -1
package/core/stylometry.md +93 -22
package/docs/API.md +1554 -0
package/docs/AUTHENTICATION.md +50 -26
package/docs/AUTHENTICATION_KR.md +54 -29
package/docs/BRANDING.md +9 -8
package/docs/CLI.md +55 -14
package/docs/COOKBOOK.md +8 -21
package/docs/DEMO.md +32 -5
package/docs/EXIT-CODES.md +2 -3
package/docs/FALSE-POSITIVES.md +63 -0
package/docs/FAQ.md +9 -1
package/docs/FAQ_KR.md +3 -1
package/docs/FLAG-PARITY.md +33 -47
package/docs/ISSUE-WAVES.md +57 -0
package/docs/PATTERNS-EN.md +67 -3
package/docs/PATTERNS-JA.md +68 -2
package/docs/PATTERNS-KO.md +70 -7
package/docs/PATTERNS-ZH.md +67 -3
package/docs/PATTERNS.md +5 -5
package/docs/RESEARCH-DOCS-PLATFORM.md +54 -0
package/docs/ROADMAP.md +46 -66
package/docs/TRANSLATIONESE-KO.md +51 -0
package/docs/audits/2026-05-deep-research.md +3 -1
package/docs/benchmarks/README.md +51 -0
package/docs/benchmarks/detector-comparison.json +69 -9
package/docs/benchmarks/detector-comparison.md +10 -5
package/docs/benchmarks/katfish-ko-latest.json +657 -0
package/docs/benchmarks/katfish-ko-latest.md +77 -0
package/docs/benchmarks/latest.json +1183 -108
package/docs/benchmarks/latest.md +84 -60
package/docs/benchmarks/lexicon-freshness-en-2026-05-22.json +1121 -0
package/docs/benchmarks/lexicon-freshness-en-2026-05-22.md +136 -0
package/docs/benchmarks/rebaseline-latest.json +381 -0
package/docs/benchmarks/rebaseline-latest.md +121 -0
package/docs/benchmarks/register-stratified-latest.json +164 -0
package/docs/benchmarks/register-stratified-latest.md +99 -0
package/docs/benchmarks/register-stratified.md +43 -0
package/docs/integrations/github-action.md +44 -11
package/docs/integrations/playground.md +58 -0
package/docs/integrations/pre-commit.md +5 -5
package/docs/integrations/release.md +5 -3
package/docs/integrations/static-sites.md +83 -0
package/docs/research/2025-rebaseline-plan.md +71 -2
package/docs/research/2026-rebaseline.md +102 -0
package/docs/research/adversarial-mps.md +41 -0
package/docs/research/ai-human-metrics.md +35 -23
package/docs/research/human-eval-panel.md +42 -0
package/docs/research/judge-agreement.md +24 -0
package/docs/research/ko-2025-corpus-sources.md +135 -0
package/docs/research/lexicon-freshness-audit.md +64 -0
package/docs/research/zh-ja-lexicon-calibration.md +60 -0
package/docs/social/patina-launch-copy.md +173 -100
package/docs/social/patina-launch-execution.md +94 -0
package/docs/social/patina-launch-korean-first.md +83 -0
package/docs/social/signs-of-ai-writing.md +26 -0
package/docs/social/signs-of-ai-writing_KR.md +26 -0
package/lexicon/ai-en.md +21 -24
package/lexicon/ai-ja.md +158 -0
package/lexicon/ai-ko.md +9 -9
package/lexicon/ai-zh.md +158 -0
package/lexicon/provenance/ai-en.json +970 -0
package/lexicon/provenance/ai-ja.json +542 -0
package/lexicon/provenance/ai-ko.json +866 -0
package/lexicon/provenance/ai-zh.json +542 -0
package/package.json +49 -8
package/patterns/en-communication.md +5 -0
package/patterns/en-content.md +5 -0
package/patterns/en-filler.md +5 -0
package/patterns/en-language.md +29 -1
package/patterns/en-structure.md +5 -0
package/patterns/en-style.md +5 -0
package/patterns/en-viral-hook.md +42 -2
package/patterns/ja-communication.md +5 -0
package/patterns/ja-content.md +5 -0
package/patterns/ja-filler.md +5 -0
package/patterns/ja-language.md +33 -1
package/patterns/ja-structure.md +12 -0
package/patterns/ja-style.md +5 -0
package/patterns/ja-viral-hook.md +41 -2
package/patterns/ko-communication.md +5 -0
package/patterns/ko-content.md +5 -0
package/patterns/ko-filler.md +5 -0
package/patterns/ko-language.md +33 -1
package/patterns/ko-structure.md +25 -6
package/patterns/ko-style.md +5 -0
package/patterns/ko-viral-hook.md +38 -2
package/patterns/zh-communication.md +5 -0
package/patterns/zh-content.md +5 -0
package/patterns/zh-filler.md +5 -0
package/patterns/zh-language.md +37 -1
package/patterns/zh-structure.md +12 -0
package/patterns/zh-style.md +5 -0
package/patterns/zh-viral-hook.md +38 -2
package/playground/README.md +55 -0
package/playground/analytics.js +4 -0
package/playground/analyzer.js +883 -0
package/playground/app.js +157 -0
package/playground/data/lexicons.js +343 -0
package/playground/index.html +138 -0
package/playground/styles.css +267 -0
package/profiles/namuwiki.md +111 -0
package/scripts/adversarial-mps-report.mjs +201 -0
package/scripts/badge-json.mjs +79 -0
package/scripts/benchmark-report.mjs +56 -9
package/scripts/check-release-metadata.mjs +0 -2
package/scripts/detector-comparison.mjs +7 -7
package/scripts/generate-playground-data.mjs +77 -0
package/scripts/katfish-calibration.mjs +464 -0
package/scripts/lexicon-freshness.mjs +485 -0
package/scripts/lint.mjs +1 -1
package/scripts/precommit-score.mjs +4 -3
package/scripts/prose-score.mjs +81 -5
package/scripts/rebaseline-intake.mjs +242 -0
package/scripts/rebaseline-score.mjs +268 -0
package/scripts/rebaseline-summary.mjs +773 -0
package/scripts/rebaseline-web-collect.mjs +410 -0
package/scripts/update-benchmark-ranges.mjs +1 -0
package/src/api.js +69 -105
package/src/auth.js +50 -2
package/src/backends/claude-cli.js +19 -4
package/src/backends/codex-cli.js +19 -3
package/src/backends/contract.js +230 -1
package/src/backends/gemini-cli.js +18 -5
package/src/backends/index.js +87 -12
package/src/backends/kimi-cli.js +161 -0
package/src/cli.js +577 -567
package/src/commands/doctor.js +2 -2
package/src/config.js +29 -0
package/src/errors.js +53 -1
package/src/features/discourse-tells.js +68 -0
package/src/features/index.js +82 -8
package/src/features/lexicon.js +40 -6
package/src/features/markup-leakage.js +69 -0
package/src/features/segment.js +41 -0
package/src/features/signal-strength.js +81 -0
package/src/features/stylometry.js +231 -1
package/src/features/translationese.js +127 -0
package/src/loader.js +76 -0
package/src/logger.js +22 -23
package/src/model-defaults.js +55 -0
package/src/ouroboros.js +31 -0
package/src/output.js +102 -90
package/src/prompt-builder.js +103 -68
package/src/providers.js +51 -4
package/src/scoring.js +210 -2
package/src/security.js +75 -0
package/tests/fixtures/live-quality/en/public-docs-01.md +26 -0
package/tests/fixtures/live-quality/ko/public-docs-01.md +26 -0
package/tests/fixtures/suspect-zones/expected-ranges.json +207 -16
package/tests/fixtures/suspect-zones/ja/ai/ja-ai-04-lexicon.md +11 -0
package/tests/fixtures/suspect-zones/ja/natural/ja-nat-04-lexicon-cold.md +11 -0
package/tests/fixtures/suspect-zones/ko/ai/ko-ai-02.md +4 -5
package/tests/fixtures/suspect-zones/ko/ai/ko-ai-07-ko-diagnostic.md +11 -0
package/tests/fixtures/suspect-zones/zh/ai/zh-ai-04-lexicon.md +11 -0
package/tests/fixtures/suspect-zones/zh/natural/zh-nat-04-lexicon-cold.md +11 -0
package/tests/quality/README.md +188 -11
package/tests/quality/adversarial-mps/fixtures.jsonl +10 -0
package/tests/quality/benchmark.mjs +39 -1
package/tests/quality/dogfood.mjs +5 -3
package/tests/quality/live-fixtures.jsonl +2 -0
package/tests/quality/live-quality.mjs +596 -0
package/tests/quality/ranking-metrics.mjs +136 -0
package/tests/quality/rebaseline-manifest.example.jsonl +5 -0
package/vercel.json +53 -0
package/SKILL-MAX.md +0 -455
package/docs/internal/HARNESS.md +0 -14
package/docs/internal/README.md +0 -14
package/docs/internal/WARP.md +0 -23
package/patina-max/SKILL.md +0 -523
package/patina-max/composite.py +0 -457
package/src/cache.js +0 -106
package/src/commands/init.js +0 -208
package/src/manifest.js +0 -162
package/src/max-mode.js +0 -207

package/src/commands/doctor.js CHANGED Viewed

@@ -54,8 +54,8 @@ export function buildDoctorReport({ version } = {}) {
     status: tmux.ok ? 'ok' : 'warning',
     summary: tmux.ok ? tmux.stdout.trim() : 'tmux not found',
     detail: tmux.ok
-      ? 'available for MAX omc dispatch workflows'
-      : 'only needed for tmux-based MAX dispatch; direct/API modes still work',
+      ? 'available when you want tmux-based parallel workflows outside patina itself'
+      : 'optional; patina no longer requires tmux for any built-in mode',
   });
   const apiKeySource = inspectHttpApiKeySource();

package/src/config.js CHANGED Viewed

@@ -7,6 +7,15 @@ import yaml from 'js-yaml';
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const REPO_ROOT = resolve(__dirname, '..');
+/**
+ * Load default config and merge global/project .patina.yaml overrides.
+ *
+ * @param {string} [path] Base YAML config path.
+ * @returns {object} Merged patina configuration object.
+ * @throws {Error} When a config file is missing, invalid YAML, or not a mapping.
+ * @example
+ * const config = loadConfig();
+ */
 export function loadConfig(path = resolve(REPO_ROOT, '.patina.default.yaml')) {
   const raw = readFileSync(path, 'utf8');
   const parsed = yaml.load(raw);
@@ -54,6 +63,14 @@ function deepMerge(target, source) {
   }
 }
+/**
+ * Return the repository root inferred from this source file location.
+ *
+ * @returns {string} Absolute repository root path.
+ * @throws {Error} Propagates validation, filesystem, network, or dependency failures when the underlying operation cannot complete.
+ * @example
+ * const root = getRepoRoot();
+ */
 export function getRepoRoot() {
   return REPO_ROOT;
 }
@@ -63,6 +80,18 @@ const VALID_TONES = ['casual', 'professional', 'academic', 'narrative', 'marketi
 // Resolve the effective tone from CLI flag and config (v3.10).
 // Priority: cliTone > configTone > unset. zh/ja + explicit tone → fallback path.
 // Returns: { tone, tone_source, tone_evidence, tone_confidence, warning? }
+/**
+ * Resolve CLI/config tone settings into prompt-ready tone metadata.
+ *
+ * @param {object} options Tone inputs.
+ * @param {string|null} [options.cliTone] CLI tone override.
+ * @param {string|null} [options.configTone] Configured tone value.
+ * @param {string} [options.lang] Active language code.
+ * @returns {Object} Tone metadata.
+ * @throws {Error} When cliTone or configTone is not supported.
+ * @example
+ * const tone = resolveTone({ cliTone: 'casual', lang: 'ko' });
+ */
 export function resolveTone({ cliTone, configTone, lang }) {
   if (cliTone !== undefined && cliTone !== null) {
     if (!VALID_TONES.includes(cliTone)) {

package/src/errors.js CHANGED Viewed

@@ -1,3 +1,14 @@
+/**
+ * Structured CLI error with separate what/why/action fields and exit code.
+ *
+ * @param {object} options Error fields.
+ * @param {string} options.what Short failure headline.
+ * @param {string} options.why Explanation of the failure.
+ * @param {string} options.action Suggested user action.
+ * @param {number} [options.exitCode=1] Process exit code.
+ * @example
+ * throw new PatinaCliError({ what: 'missing input', why: 'No file was provided', action: 'Pass a file path.' });
+ */
 export class PatinaCliError extends Error {
   constructor({ what, why, action, exitCode = 1 }) {
     super([what, why, action].filter(Boolean).join('\n'));
@@ -9,14 +20,45 @@ export class PatinaCliError extends Error {
   }
 }
+/**
+ * Create a user-input error that should exit with code 2.
+ *
+ * @param {string} what Short failure headline.
+ * @param {string} why Explanation of the invalid input.
+ * @param {string} action Suggested user action.
+ * @returns {PatinaCliError} Structured input error.
+ * @throws {Error} Does not intentionally throw; returns an Error instance for callers to throw.
+ * @example
+ * throw inputError('missing input', 'No file was provided.', 'Pass a file path.');
+ */
 export function inputError(what, why, action) {
   return new PatinaCliError({ what, why, action, exitCode: 2 });
 }
+/**
+ * Create a runtime error that should exit with code 1.
+ *
+ * @param {string} what Short failure headline.
+ * @param {string} why Explanation of the runtime failure.
+ * @param {string} action Suggested user action.
+ * @returns {PatinaCliError} Structured runtime error.
+ * @throws {Error} Propagates validation, filesystem, network, or dependency failures when the underlying operation cannot complete.
+ * @example
+ * throw runtimeError('provider failed', 'The API timed out.', 'Retry later.');
+ */
 export function runtimeError(what, why, action) {
   return new PatinaCliError({ what, why, action, exitCode: 1 });
 }
+/**
+ * Render any thrown value into the patina CLI error format.
+ *
+ * @param {unknown} err Error-like value to render.
+ * @returns {string} Multi-line user-facing error text.
+ * @throws {Error} Propagates validation, filesystem, network, or dependency failures when the underlying operation cannot complete.
+ * @example
+ * const message = renderCliError(inputError('bad flag', 'Unknown flag.', 'Run --help.'));
+ */
 export function renderCliError(err) {
   const normalized = normalizeError(err);
   return [
@@ -26,8 +68,18 @@ export function renderCliError(err) {
   ].join('\n');
 }
+/**
+ * Extract a safe process exit code from an error-like value.
+ *
+ * @param {unknown} err Error-like value.
+ * @param {number} [fallback=1] Exit code used when err.exitCode is absent or invalid.
+ * @returns {number} Non-negative integer exit code.
+ * @throws {Error} Propagates validation, filesystem, network, or dependency failures when the underlying operation cannot complete.
+ * @example
+ * const code = getExitCode(inputError('bad', 'why', 'fix')); // 2
+ */
 export function getExitCode(err, fallback = 1) {
-  const n = Number(err?.exitCode);
+  const n = Number(err ? /** @type {any} */ (err).exitCode : undefined);
   return Number.isInteger(n) && n >= 0 ? n : fallback;
 }

package/src/features/discourse-tells.js ADDED Viewed

@@ -0,0 +1,68 @@
+// Density-gated discourse tells (issue #334). Unlike markup-leakage (a single
+// near-proof-grade hit), these are constructions humans also use, so each fires
+// only past a density threshold to keep false positives low.
+//
+// 1. Fake-candor / manufactured-intimacy openers (English): AI overuses
+//    intimacy-signaling openers ("here's the thing", "let's be honest") that
+//    real writers use sparingly. Fires at >= 2 per document.
+// 2. Decorative thematic breaks: AI sprinkles `---` / `***` / `___` dividers,
+//    often before every heading. Fires at >= 3 per document.
+const FAKE_CANDOR_RULES = [
+  /\bhere'?s the thing\b/gi,
+  /\bhere'?s the kicker\b/gi,
+  /\blet'?s be honest\b/gi,
+  /\blet'?s be real\b/gi,
+  /\bthe truth is\b/gi,
+  /\bi'?ll be honest(?: with you)?\b/gi,
+  /\breal talk\b/gi,
+];
+const FAKE_CANDOR_MIN = 2;
+const THEMATIC_BREAK_MIN = 3;
+// A markdown thematic break: a line that is only ---, ***, or ___ (3+), optionally spaced.
+const THEMATIC_BREAK_LINE = /^[ \t]*(?:-[ \t]*){3,}$|^[ \t]*(?:\*[ \t]*){3,}$|^[ \t]*(?:_[ \t]*){3,}$/;
+const HEADING_LINE = /^[ \t]*#{1,6}[ \t]+\S/;
+export function detectFakeCandor(text) {
+  const str = typeof text === 'string' ? text : '';
+  const hits = [];
+  let count = 0;
+  for (const re of FAKE_CANDOR_RULES) {
+    const m = str.match(re);
+    if (m && m.length) {
+      count += m.length;
+      hits.push(...new Set(m.map((x) => x.trim().toLowerCase())));
+    }
+  }
+  return { count, hits: [...new Set(hits)].slice(0, 5), hot: count >= FAKE_CANDOR_MIN, threshold: FAKE_CANDOR_MIN };
+}
+export function detectThematicBreaks(text) {
+  const lines = (typeof text === 'string' ? text : '').split(/\r?\n/);
+  let count = 0;
+  let adjacentToHeading = 0;
+  for (let i = 0; i < lines.length; i++) {
+    if (!THEMATIC_BREAK_LINE.test(lines[i])) continue;
+    count++;
+    // "adjacent to a heading" = the next non-empty line is a heading.
+    for (let j = i + 1; j < lines.length; j++) {
+      if (lines[j].trim() === '') continue;
+      if (HEADING_LINE.test(lines[j])) adjacentToHeading++;
+      break;
+    }
+  }
+  return { count, adjacentToHeading, hot: count >= THEMATIC_BREAK_MIN, threshold: THEMATIC_BREAK_MIN };
+}
+/**
+ * @returns {{ fakeCandor: object, thematicBreaks: object, hot: boolean }}
+ */
+export function detectDiscourseTells(text) {
+  const fakeCandor = detectFakeCandor(text);
+  const thematicBreaks = detectThematicBreaks(text);
+  return { fakeCandor, thematicBreaks, hot: fakeCandor.hot || thematicBreaks.hot };
+}
+export { FAKE_CANDOR_MIN, THEMATIC_BREAK_MIN };

package/src/features/index.js CHANGED Viewed

@@ -3,26 +3,45 @@
 // is the in-tree port of the algorithm previously delegated to the LLM via
 // SKILL.md Step 4.6/4.7. It does not call any LLM.
-import { splitParagraphs, splitSentences, tokenize } from './segment.js';
+import { splitParagraphs, splitSentences, splitProseSentences, tokenize } from './segment.js';
 import {
   burstinessCV,
   mattr,
   classifyBurstiness,
   classifyMattr,
+  classifyKoreanDiagnostics,
+  commaDensity,
+  koreanPosDiversityProxy,
+  koreanSpacingFeatures,
   DEFAULT_BURSTINESS_BANDS,
+  DEFAULT_KO_DIAGNOSTIC_BANDS,
   DEFAULT_MATTR_BANDS,
   DEFAULT_MATTR_WINDOW,
+  DEFAULT_MIN_BURSTINESS_SENTENCES,
 } from './stylometry.js';
-import { loadLexicon, computeDensity, DEFAULT_LEXICON_DENSITY_THRESHOLD } from './lexicon.js';
+import {
+  classifyLexiconHot,
+  loadLexicon,
+  computeDensity,
+  DEFAULT_LEXICON_DENSITY_THRESHOLD,
+  DEFAULT_LEXICON_MIN_HOT_MATCHES,
+} from './lexicon.js';
+import { detectMarkupLeakage } from './markup-leakage.js';
+import { detectDiscourseTells } from './discourse-tells.js';
+import { detectTranslationese } from './translationese.js';
 export function analyzeText(text, opts = {}) {
   const {
     lang = 'en',
     repoRoot,
     burstinessBands = DEFAULT_BURSTINESS_BANDS,
+    minBurstinessSentences = DEFAULT_MIN_BURSTINESS_SENTENCES,
     mattrBands = DEFAULT_MATTR_BANDS,
     mattrWindow = DEFAULT_MATTR_WINDOW,
+    koDiagnosticsEnabled = true,
+    koDiagnosticBands = DEFAULT_KO_DIAGNOSTIC_BANDS,
     lexiconDensityThreshold = DEFAULT_LEXICON_DENSITY_THRESHOLD,
+    lexiconMinHotMatches = DEFAULT_LEXICON_MIN_HOT_MATCHES,
     lexicon: providedLexicon,
   } = opts;
@@ -31,6 +50,18 @@ export function analyzeText(text, opts = {}) {
   // vs decomposed) would otherwise yield different MATTR/lexicon hits.
   const normalized = text ? text.normalize('NFC') : '';
   const paragraphs = splitParagraphs(normalized);
+  // Document-level leakage scan (issue #332). Near-proof-grade: a single hit is
+  // strong evidence of pasted model output, so it forces the document hot
+  // regardless of the per-paragraph stylometry/lexicon signals.
+  const markupLeakage = detectMarkupLeakage(normalized);
+  // Density-gated discourse tells (issue #334): fake-candor openers (>=2) and
+  // decorative thematic breaks (>=3). Document-level, weaker than leakage.
+  const discourseTells = detectDiscourseTells(normalized);
+  // ko translationese (번역투/calque) — lexical, NOT structural. Advisory signal:
+  // surfaced for callers/SKILL but deliberately NOT folded into `hot` (these
+  // constructions appear in good Korean too; gating hot would regress FP).
+  const translationese = detectTranslationese(normalized, { lang });
   const lexicon =
     providedLexicon ??
     (repoRoot ? loadLexicon(lang, repoRoot) : { strict: [], phrases: [] });
@@ -39,7 +70,7 @@ export function analyzeText(text, opts = {}) {
   // can suppress meta-block emission, but the benchmark wants raw signals on
   // single-paragraph fixtures so we compute them unconditionally.
   const totalSentences = paragraphs.reduce(
-    (n, p) => n + splitSentences(p).length,
+    (n, p) => n + splitProseSentences(p).length,
     0
   );
   const skipReason =
@@ -48,20 +79,33 @@ export function analyzeText(text, opts = {}) {
     null;
   const analyzed = paragraphs.map((paragraph, idx) => {
-    const sentences = splitSentences(paragraph);
+    const sentences = splitProseSentences(paragraph);
     const sentenceTokens = sentences.map((sentence) => tokenize(sentence, { lang }));
     const sentenceTokenCounts = sentenceTokens.map((t) => t.length);
     const allTokens = sentenceTokens.flat();
     const cv = burstinessCV(sentenceTokenCounts);
-    const cvBand = classifyBurstiness(cv, burstinessBands);
+    const cvBand =
+      sentences.length >= minBurstinessSentences
+        ? classifyBurstiness(cv, burstinessBands)
+        : null;
     const mattrValue = mattr(allTokens, mattrWindow);
     const mattrBand = classifyMattr(mattrValue, mattrBands);
     const lex = computeDensity(paragraph, allTokens, lexicon);
+    const koSignals = lang === 'ko'
+      ? buildKoreanSignals(paragraph, sentences.length, {
+          enabled: koDiagnosticsEnabled,
+          bands: koDiagnosticBands,
+        })
+      : {};
-    const lexiconHot = lex.density > lexiconDensityThreshold;
+    const lexiconHot = classifyLexiconHot(lex, {
+      lang,
+      densityThreshold: lexiconDensityThreshold,
+      minHotMatches: lexiconMinHotMatches,
+    });
     const hot =
-      cvBand === 'low' || mattrBand === 'low' || lexiconHot;
+      cvBand === 'low' || mattrBand === 'low' || lexiconHot || Boolean(koSignals.koDiagnostics?.hot);
     return {
       id: `P${idx + 1}`,
@@ -70,6 +114,7 @@ export function analyzeText(text, opts = {}) {
       burstiness: { cv, band: cvBand },
       mattr: { value: mattrValue, band: mattrBand },
       lexicon: { ...lex, hot: lexiconHot },
+      ...koSignals,
       hot,
     };
   });
@@ -79,18 +124,47 @@ export function analyzeText(text, opts = {}) {
     skipped: Boolean(skipReason),
     skipReason,
     paragraphs: analyzed,
-    hot: analyzed.some((p) => p.hot),
+    markupLeakage,
+    discourseTells,
+    translationese,
+    hot: markupLeakage.leaked || discourseTells.hot || analyzed.some((p) => p.hot),
   };
 }
 export {
   splitParagraphs,
   splitSentences,
+  splitProseSentences,
   tokenize,
   burstinessCV,
   mattr,
   classifyBurstiness,
   classifyMattr,
+  classifyKoreanDiagnostics,
+  commaDensity,
+  koreanPosDiversityProxy,
+  koreanSpacingFeatures,
   loadLexicon,
   computeDensity,
 };
+function buildKoreanSignals(paragraph, sentenceCount, { enabled, bands }) {
+  const spacing = koreanSpacingFeatures(paragraph);
+  const comma = commaDensity(paragraph, sentenceCount);
+  const posDiversity = koreanPosDiversityProxy(paragraph);
+  const koDiagnostics = enabled
+    ? classifyKoreanDiagnostics({
+        sentenceCount,
+        spacing,
+        comma,
+        posDiversity,
+      }, bands)
+    : { hot: false, strength: 0, reasons: [], thresholds: bands };
+  return {
+    spacing,
+    comma,
+    posDiversity,
+    koDiagnostics,
+  };
+}

package/src/features/lexicon.js CHANGED Viewed

@@ -7,6 +7,12 @@ import { readFileSync, existsSync } from 'node:fs';
 import { resolve } from 'node:path';
 export const DEFAULT_LEXICON_DENSITY_THRESHOLD = 2.0;
+export const DEFAULT_LEXICON_MIN_HOT_MATCHES = {
+  default: 1,
+  ko: 2,
+  zh: 2,
+  ja: 2,
+};
 // Parses the two well-known sections out of a lexicon markdown file.
 // Returns { strict: string[], phrases: string[] }.
@@ -65,11 +71,12 @@ export function computeDensity(paragraphText, tokens, lexicon) {
   const hits = [];
   const tokenSet = new Set(tokens.map((t) => t.toLowerCase()));
-  // §16: English strict entries match whole-word; Korean strict entries are
-  // approximated by substring (어절 inflection means `자리매김` should also
-  // hit `자리매김했다`, `자리매김으로`, etc.). Punctuated entries always need
-  // substring fallback because tokenization strips edge punct.
-  const koSubstring = lexicon.lang === 'ko';
+  // §16: English strict entries match whole-word; CJK strict entries are
+  // approximated by substring. Korean inflection and zh/ja character fallback
+  // mean `자리매김`, `可以说`, or `まとめると` may not survive as whole tokens.
+  // Punctuated entries always need substring fallback because tokenization
+  // strips edge punct.
+  const cjkSubstring = ['ko', 'zh', 'ja'].includes(lexicon.lang);
   for (const entry of lexicon.strict) {
     const lowerEntry = entry.toLowerCase();
     if (tokenSet.has(lowerEntry)) {
@@ -77,7 +84,7 @@ export function computeDensity(paragraphText, tokens, lexicon) {
       continue;
     }
     const hasInternalPunct = /[^\p{L}\p{N}]/u.test(lowerEntry);
-    if ((koSubstring || hasInternalPunct) && lowerText.includes(lowerEntry)) {
+    if ((cjkSubstring || hasInternalPunct) && lowerText.includes(lowerEntry)) {
       hits.push(entry);
     }
   }
@@ -88,3 +95,30 @@ export function computeDensity(paragraphText, tokens, lexicon) {
   const density = tokens.length > 0 ? (hits.length / tokens.length) * 1000 : 0;
   return { matches: hits.length, density, hits };
 }
+/**
+ * @param {{ matches?: number, density?: number }} [lexiconStats]
+ * @param {{ lang?: string, densityThreshold?: number, minHotMatches?: (number|Record<string, number>) }} [options]
+ */
+export function classifyLexiconHot(
+  lexiconStats,
+  {
+    lang,
+    densityThreshold = DEFAULT_LEXICON_DENSITY_THRESHOLD,
+    minHotMatches = DEFAULT_LEXICON_MIN_HOT_MATCHES,
+  } = {}
+) {
+  const matches = lexiconStats?.matches ?? 0;
+  const density = lexiconStats?.density ?? 0;
+  const minMatches = resolveMinHotMatches(lang, minHotMatches);
+  return matches >= minMatches && density > densityThreshold;
+}
+function resolveMinHotMatches(lang, minHotMatches) {
+  if (typeof minHotMatches === 'number' && Number.isFinite(minHotMatches)) {
+    return Math.max(1, minHotMatches);
+  }
+  const normalized = typeof lang === 'string' ? lang.toLowerCase() : 'default';
+  const value = minHotMatches?.[normalized] ?? minHotMatches?.default;
+  return typeof value === 'number' && Number.isFinite(value) ? Math.max(1, value) : 1;
+}

package/src/features/markup-leakage.js ADDED Viewed

@@ -0,0 +1,69 @@
+// Deterministic detection of model-output *leakage* artifacts: tokens that LLM
+// web-search / tooling inject and that essentially never appear in human-written
+// prose. Unlike the stylometry/lexicon signals (which are probabilistic and
+// fire on clusters), a single hit here is near-proof-grade evidence of pasted
+// model output, so it fires hard at the document level. See issue #332.
+//
+// Language-agnostic literal token set — applies to ko/en/zh/ja alike.
+//
+// Self-scan caveat: patina's own docs, fixtures, and issues that *discuss* these
+// tokens will match. That is correct behavior (the text genuinely contains
+// them); callers scanning the repo's own meta-content should expect hits.
+const OBJECT_REPLACEMENT_CHAR = '';
+// Each entry: { id, label, build() => fresh RegExp }. We build a fresh regex per
+// scan so the shared module is reentrant (no leaking lastIndex across calls).
+const MARKUP_RULES = [
+  {
+    id: 'oai-citation-markup',
+    label: 'OpenAI citation markup',
+    build: () => /:contentReference|oaicite|oai_citation/gi,
+  },
+  {
+    id: 'model-tool-token',
+    label: 'Model tool token',
+    build: () => /\bturn\d+(?:search|view|news|image|forecast|finance|fetch)\d*\b|\bnavlist\b|\bgrok_card\b/gi,
+  },
+  {
+    id: 'object-replacement-char',
+    label: 'Object-replacement character ()',
+    build: () => new RegExp(OBJECT_REPLACEMENT_CHAR, 'g'),
+  },
+  {
+    id: 'ai-tracking-param',
+    label: 'AI-tool tracking parameter in URL',
+    build: () => /utm_source=(?:chatgpt\.com|openai\.com|perplexity\.ai|claude\.ai|gemini\.google\.com)|[?&](?:ref|utm_source)=chatgpt/gi,
+  },
+  {
+    id: 'explicit-self-identification',
+    label: 'Explicit AI self-identification',
+    build: () => /\bas an? (?:AI|artificial intelligence) language model\b|\bas a large language model\b|\bas a language model\b|\bas an AI assistant\b|\bI am an AI\b|\bI'?m an AI\b/gi,
+  },
+];
+/**
+ * Scan raw text for model-output leakage artifacts.
+ * @param {string} text
+ * @returns {{ leaked: boolean, hits: Array<{id:string,label:string,count:number,samples:string[]}> }}
+ */
+export function detectMarkupLeakage(text) {
+  const str = typeof text === 'string' ? text : '';
+  const hits = [];
+  if (!str) return { leaked: false, hits };
+  for (const rule of MARKUP_RULES) {
+    const matches = str.match(rule.build());
+    if (matches && matches.length > 0) {
+      hits.push({
+        id: rule.id,
+        label: rule.label,
+        count: matches.length,
+        samples: [...new Set(matches.map((m) => m.trim()).filter(Boolean))].slice(0, 3),
+      });
+    }
+  }
+  return { leaked: hits.length > 0, hits };
+}
+export { MARKUP_RULES, OBJECT_REPLACEMENT_CHAR };

package/src/features/segment.js CHANGED Viewed

@@ -9,6 +9,7 @@
 const SENTENCE_SPLIT_RE = /[.!?]+\s+|(?<=[。！？…])|\n+/u;
 const PARAGRAPH_SPLIT_RE = /\n\s*\n/;
+const LIST_LINE_RE = /^\s*(?:[-*+]\s+|\d+[.)]\s+)/u;
 // \W in Unicode-aware mode. Strips edge punctuation but keeps internal
 // hyphens / apostrophes (e.g. "don't", "좋은-도구") as a single token.
 const EDGE_PUNCT_RE = /^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu;
@@ -22,6 +23,42 @@ export function splitParagraphs(text) {
     .filter((p) => p.length > 0);
 }
+function stripListBlocks(paragraph) {
+  const lines = String(paragraph ?? '').split(/\r?\n/);
+  const proseLines = [];
+  let colonListRemaining = 0;
+  for (let i = 0; i < lines.length; i++) {
+    const rawLine = lines[i];
+    const trimmed = rawLine.trim();
+    if (trimmed === '') {
+      colonListRemaining = 0;
+      proseLines.push(rawLine);
+      continue;
+    }
+    if (LIST_LINE_RE.test(rawLine)) continue;
+    if (colonListRemaining > 0) {
+      colonListRemaining--;
+      continue;
+    }
+    if (trimmed.endsWith(':')) {
+      colonListRemaining = countFollowingPlainListLines(lines, i + 1);
+    }
+    proseLines.push(rawLine);
+  }
+  return proseLines.join('\n');
+}
+function countFollowingPlainListLines(lines, start) {
+  let count = 0;
+  for (let i = start; i < lines.length; i++) {
+    const trimmed = lines[i].trim();
+    if (trimmed === '') break;
+    if (LIST_LINE_RE.test(lines[i])) continue;
+    count++;
+  }
+  return count >= 2 ? count : 0;
+}
 export function splitSentences(paragraph) {
   if (!paragraph) return [];
   return paragraph
@@ -30,6 +67,10 @@ export function splitSentences(paragraph) {
     .filter((s) => s.length > 0);
 }
+export function splitProseSentences(paragraph) {
+  return splitSentences(stripListBlocks(paragraph));
+}
 function tokenizeCjk(text) {
   const tokens = [];
   for (const match of text.matchAll(CJK_TOKEN_RE)) {

package/src/features/signal-strength.js ADDED Viewed

@@ -0,0 +1,81 @@
+import { DEFAULT_LEXICON_DENSITY_THRESHOLD } from './lexicon.js';
+import { DEFAULT_BURSTINESS_BANDS, DEFAULT_MATTR_BANDS } from './stylometry.js';
+/**
+ * Average the strongest deterministic signal for each paragraph.
+ *
+ * This is diagnostic-only. It intentionally does not replace the existing
+ * hot-paragraph ratio used by gates and reconciliation.
+ *
+ * @param {object[]} [paragraphs] Analyzer paragraph payloads.
+ * @param {object} [options] Thresholds used by the analyzer.
+ * @returns {number} 0..100 average signal strength.
+ */
+export function summarizeSignalStrength(paragraphs = [], options = {}) {
+  if (!Array.isArray(paragraphs) || paragraphs.length === 0) return 0;
+  const total = paragraphs.reduce(
+    (sum, paragraph) => sum + paragraphSignalStrength(paragraph, options),
+    0
+  );
+  return total / paragraphs.length;
+}
+/**
+ * Score how deep a paragraph is inside its strongest deterministic trigger.
+ *
+ * @param {object} [paragraph] Analyzer paragraph payload.
+ * @param {object} [options] Thresholds used by the analyzer.
+ * @returns {number} 0..100 paragraph signal strength.
+ */
+export function paragraphSignalStrength(paragraph = {}, options = {}) {
+  const burstiness = lowBandStrength(
+    paragraph.burstiness?.cv,
+    resolveLowThreshold(options.burstinessBands, DEFAULT_BURSTINESS_BANDS.low),
+    paragraph.burstiness?.band
+  );
+  const mattr = lowBandStrength(
+    paragraph.mattr?.value,
+    resolveLowThreshold(options.mattrBands, DEFAULT_MATTR_BANDS.low),
+    paragraph.mattr?.band
+  );
+  const lexicon = highThresholdStrength(
+    paragraph.lexicon?.density,
+    resolveThreshold(
+      options.lexiconDensityThreshold,
+      DEFAULT_LEXICON_DENSITY_THRESHOLD
+    ),
+    paragraph.lexicon?.hot
+  );
+  const koDiagnostics =
+    paragraph.koDiagnostics?.hot &&
+    typeof paragraph.koDiagnostics?.strength === 'number' &&
+    Number.isFinite(paragraph.koDiagnostics.strength)
+      ? paragraph.koDiagnostics.strength
+      : 0;
+  return Math.max(burstiness, mattr, lexicon, koDiagnostics);
+}
+function resolveLowThreshold(bands, fallback) {
+  return resolveThreshold(bands?.low, fallback);
+}
+function resolveThreshold(value, fallback) {
+  return typeof value === 'number' && Number.isFinite(value) ? value : fallback;
+}
+function lowBandStrength(value, threshold, band) {
+  if (band !== 'low' || typeof value !== 'number' || !Number.isFinite(value)) return 0;
+  if (!threshold || threshold <= 0) return 0;
+  return clampPercent((1 - value / threshold) * 100);
+}
+function highThresholdStrength(value, threshold, isHot) {
+  if (!isHot || typeof value !== 'number' || !Number.isFinite(value)) return 0;
+  if (!threshold || threshold <= 0) return 0;
+  return clampPercent(((value - threshold) / threshold) * 100);
+}
+function clampPercent(value) {
+  if (!Number.isFinite(value)) return 0;
+  return Math.max(0, Math.min(100, value));
+}