npm - patina-cli - Versions diffs - 3.11.0 → 4.0.0 - Mend

patina-cli 3.11.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (193) hide show

package/.patina.default.yaml +29 -29
package/CHANGELOG.md +53 -0
package/NOTICE +21 -0
package/README.md +117 -224
package/README_JA.md +134 -77
package/README_KR.md +132 -74
package/README_ZH.md +137 -80
package/SKILL.md +11 -20
package/artifacts/rebaseline-2025/README.md +147 -0
package/artifacts/rebaseline-2025/human-controls.public.jsonl +250 -0
package/artifacts/rebaseline-2025/intake.example.jsonl +2 -0
package/artifacts/rebaseline-2025/intake.local.example.jsonl +25 -0
package/artifacts/rebaseline-2025/prompts.template.jsonl +7 -0
package/artifacts/rebaseline-2025/sources.ko-public.jsonl +39 -0
package/assets/brand/patina-badge.svg +18 -0
package/assets/brand/patina-mark.svg +8 -0
package/assets/demo/README.md +79 -0
package/core/scoring.md +12 -12
package/core/standalone-prompt.md +3 -1
package/core/stylometry.md +93 -22
package/docs/API.md +1554 -0
package/docs/AUTHENTICATION.md +50 -26
package/docs/AUTHENTICATION_KR.md +54 -29
package/docs/BRANDING.md +9 -8
package/docs/CLI.md +55 -14
package/docs/COOKBOOK.md +8 -21
package/docs/DEMO.md +32 -5
package/docs/EXIT-CODES.md +2 -3
package/docs/FALSE-POSITIVES.md +63 -0
package/docs/FAQ.md +9 -1
package/docs/FAQ_KR.md +3 -1
package/docs/FLAG-PARITY.md +33 -47
package/docs/ISSUE-WAVES.md +57 -0
package/docs/PATTERNS-EN.md +67 -3
package/docs/PATTERNS-JA.md +68 -2
package/docs/PATTERNS-KO.md +70 -7
package/docs/PATTERNS-ZH.md +67 -3
package/docs/PATTERNS.md +5 -5
package/docs/RESEARCH-DOCS-PLATFORM.md +54 -0
package/docs/ROADMAP.md +46 -66
package/docs/TRANSLATIONESE-KO.md +51 -0
package/docs/audits/2026-05-deep-research.md +3 -1
package/docs/benchmarks/README.md +51 -0
package/docs/benchmarks/detector-comparison.json +69 -9
package/docs/benchmarks/detector-comparison.md +10 -5
package/docs/benchmarks/katfish-ko-latest.json +657 -0
package/docs/benchmarks/katfish-ko-latest.md +77 -0
package/docs/benchmarks/latest.json +1183 -108
package/docs/benchmarks/latest.md +84 -60
package/docs/benchmarks/lexicon-freshness-en-2026-05-22.json +1121 -0
package/docs/benchmarks/lexicon-freshness-en-2026-05-22.md +136 -0
package/docs/benchmarks/rebaseline-latest.json +381 -0
package/docs/benchmarks/rebaseline-latest.md +121 -0
package/docs/benchmarks/register-stratified-latest.json +164 -0
package/docs/benchmarks/register-stratified-latest.md +99 -0
package/docs/benchmarks/register-stratified.md +43 -0
package/docs/integrations/github-action.md +44 -11
package/docs/integrations/playground.md +58 -0
package/docs/integrations/pre-commit.md +5 -5
package/docs/integrations/release.md +5 -3
package/docs/integrations/static-sites.md +83 -0
package/docs/research/2025-rebaseline-plan.md +71 -2
package/docs/research/2026-rebaseline.md +102 -0
package/docs/research/adversarial-mps.md +41 -0
package/docs/research/ai-human-metrics.md +35 -23
package/docs/research/human-eval-panel.md +42 -0
package/docs/research/judge-agreement.md +24 -0
package/docs/research/ko-2025-corpus-sources.md +135 -0
package/docs/research/lexicon-freshness-audit.md +64 -0
package/docs/research/zh-ja-lexicon-calibration.md +60 -0
package/docs/social/patina-launch-copy.md +173 -100
package/docs/social/patina-launch-execution.md +94 -0
package/docs/social/patina-launch-korean-first.md +83 -0
package/docs/social/signs-of-ai-writing.md +26 -0
package/docs/social/signs-of-ai-writing_KR.md +26 -0
package/lexicon/ai-en.md +21 -24
package/lexicon/ai-ja.md +158 -0
package/lexicon/ai-ko.md +9 -9
package/lexicon/ai-zh.md +158 -0
package/lexicon/provenance/ai-en.json +970 -0
package/lexicon/provenance/ai-ja.json +542 -0
package/lexicon/provenance/ai-ko.json +866 -0
package/lexicon/provenance/ai-zh.json +542 -0
package/package.json +49 -8
package/patterns/en-communication.md +5 -0
package/patterns/en-content.md +5 -0
package/patterns/en-filler.md +5 -0
package/patterns/en-language.md +29 -1
package/patterns/en-structure.md +5 -0
package/patterns/en-style.md +5 -0
package/patterns/en-viral-hook.md +42 -2
package/patterns/ja-communication.md +5 -0
package/patterns/ja-content.md +5 -0
package/patterns/ja-filler.md +5 -0
package/patterns/ja-language.md +33 -1
package/patterns/ja-structure.md +12 -0
package/patterns/ja-style.md +5 -0
package/patterns/ja-viral-hook.md +41 -2
package/patterns/ko-communication.md +5 -0
package/patterns/ko-content.md +5 -0
package/patterns/ko-filler.md +5 -0
package/patterns/ko-language.md +33 -1
package/patterns/ko-structure.md +25 -6
package/patterns/ko-style.md +5 -0
package/patterns/ko-viral-hook.md +38 -2
package/patterns/zh-communication.md +5 -0
package/patterns/zh-content.md +5 -0
package/patterns/zh-filler.md +5 -0
package/patterns/zh-language.md +37 -1
package/patterns/zh-structure.md +12 -0
package/patterns/zh-style.md +5 -0
package/patterns/zh-viral-hook.md +38 -2
package/playground/README.md +55 -0
package/playground/analytics.js +4 -0
package/playground/analyzer.js +883 -0
package/playground/app.js +157 -0
package/playground/data/lexicons.js +343 -0
package/playground/index.html +138 -0
package/playground/styles.css +267 -0
package/profiles/namuwiki.md +111 -0
package/scripts/adversarial-mps-report.mjs +201 -0
package/scripts/badge-json.mjs +79 -0
package/scripts/benchmark-report.mjs +56 -9
package/scripts/check-release-metadata.mjs +0 -2
package/scripts/detector-comparison.mjs +7 -7
package/scripts/generate-playground-data.mjs +77 -0
package/scripts/katfish-calibration.mjs +464 -0
package/scripts/lexicon-freshness.mjs +485 -0
package/scripts/lint.mjs +1 -1
package/scripts/precommit-score.mjs +4 -3
package/scripts/prose-score.mjs +81 -5
package/scripts/rebaseline-intake.mjs +242 -0
package/scripts/rebaseline-score.mjs +268 -0
package/scripts/rebaseline-summary.mjs +773 -0
package/scripts/rebaseline-web-collect.mjs +410 -0
package/scripts/update-benchmark-ranges.mjs +1 -0
package/src/api.js +69 -105
package/src/auth.js +50 -2
package/src/backends/claude-cli.js +19 -4
package/src/backends/codex-cli.js +19 -3
package/src/backends/contract.js +230 -1
package/src/backends/gemini-cli.js +18 -5
package/src/backends/index.js +87 -12
package/src/backends/kimi-cli.js +161 -0
package/src/cli.js +577 -567
package/src/commands/doctor.js +2 -2
package/src/config.js +29 -0
package/src/errors.js +53 -1
package/src/features/discourse-tells.js +68 -0
package/src/features/index.js +82 -8
package/src/features/lexicon.js +40 -6
package/src/features/markup-leakage.js +69 -0
package/src/features/segment.js +41 -0
package/src/features/signal-strength.js +81 -0
package/src/features/stylometry.js +231 -1
package/src/features/translationese.js +127 -0
package/src/loader.js +76 -0
package/src/logger.js +22 -23
package/src/model-defaults.js +55 -0
package/src/ouroboros.js +31 -0
package/src/output.js +102 -90
package/src/prompt-builder.js +103 -68
package/src/providers.js +51 -4
package/src/scoring.js +210 -2
package/src/security.js +75 -0
package/tests/fixtures/live-quality/en/public-docs-01.md +26 -0
package/tests/fixtures/live-quality/ko/public-docs-01.md +26 -0
package/tests/fixtures/suspect-zones/expected-ranges.json +207 -16
package/tests/fixtures/suspect-zones/ja/ai/ja-ai-04-lexicon.md +11 -0
package/tests/fixtures/suspect-zones/ja/natural/ja-nat-04-lexicon-cold.md +11 -0
package/tests/fixtures/suspect-zones/ko/ai/ko-ai-02.md +4 -5
package/tests/fixtures/suspect-zones/ko/ai/ko-ai-07-ko-diagnostic.md +11 -0
package/tests/fixtures/suspect-zones/zh/ai/zh-ai-04-lexicon.md +11 -0
package/tests/fixtures/suspect-zones/zh/natural/zh-nat-04-lexicon-cold.md +11 -0
package/tests/quality/README.md +188 -11
package/tests/quality/adversarial-mps/fixtures.jsonl +10 -0
package/tests/quality/benchmark.mjs +39 -1
package/tests/quality/dogfood.mjs +5 -3
package/tests/quality/live-fixtures.jsonl +2 -0
package/tests/quality/live-quality.mjs +596 -0
package/tests/quality/ranking-metrics.mjs +136 -0
package/tests/quality/rebaseline-manifest.example.jsonl +5 -0
package/vercel.json +53 -0
package/SKILL-MAX.md +0 -455
package/docs/internal/HARNESS.md +0 -14
package/docs/internal/README.md +0 -14
package/docs/internal/WARP.md +0 -23
package/patina-max/SKILL.md +0 -523
package/patina-max/composite.py +0 -457
package/src/cache.js +0 -106
package/src/commands/init.js +0 -208
package/src/manifest.js +0 -162
package/src/max-mode.js +0 -207

package/scripts/lexicon-freshness.mjs ADDED Viewed

@@ -0,0 +1,485 @@
+#!/usr/bin/env node
+// Validate lexicon per-entry provenance and produce public-safe lift reports
+// from local/private JSONL corpora. Raw text is never written to reports.
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
+import { dirname, relative, resolve, basename } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import yaml from 'js-yaml';
+import { computeDensity } from '../src/features/lexicon.js';
+import { tokenize } from '../src/features/segment.js';
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const REPO_ROOT = resolve(__dirname, '..');
+const DEFAULT_LEXICON_DIR = 'lexicon';
+const REQUIRED_PROVENANCE_FIELDS = ['entry', 'kind', 'added', 'source', 'last_validated', 'lift', 'status'];
+const HOT_CLASSES = new Set(['ai', 'ai-like', 'synthetic-ai', 'generated', 'lightly-edited-ai', 'heavily-edited-ai']);
+const COLD_CLASSES = new Set(['human', 'natural', 'natural-human', 'human-reference']);
+export function parseArgs(argv = process.argv.slice(2)) {
+  const args = {
+    check: false,
+    input: null,
+    lang: 'en',
+    sourceId: null,
+    sourceNote: null,
+    validatedAt: new Date().toISOString().slice(0, 10),
+    outputJson: null,
+    outputMd: null,
+    sourceUrls: [],
+    json: false,
+    help: false,
+  };
+  for (let i = 0; i < argv.length; i++) {
+    const arg = argv[i];
+    if (arg === '--check') args.check = true;
+    else if (arg === '--input') args.input = argv[++i];
+    else if (arg === '--lang') args.lang = argv[++i];
+    else if (arg === '--source-id') args.sourceId = argv[++i];
+    else if (arg === '--source-note') args.sourceNote = argv[++i];
+    else if (arg === '--validated-at') args.validatedAt = argv[++i];
+    else if (arg === '--output-json') args.outputJson = argv[++i];
+    else if (arg === '--output-md') args.outputMd = argv[++i];
+    else if (arg === '--source-url') args.sourceUrls.push(argv[++i]);
+    else if (arg === '--json') args.json = true;
+    else if (arg === '--help' || arg === '-h') args.help = true;
+    else throw new Error(`Unknown argument: ${arg}`);
+  }
+  if (!args.check && !args.input) args.check = true;
+  return args;
+}
+export function parseFrontmatterFile(path) {
+  const raw = readFileSync(path, 'utf8');
+  const match = raw.match(/^---\n([\s\S]*?)\n---\n?/u);
+  if (!match) return { meta: {}, body: raw, raw };
+  return {
+    meta: yaml.load(match[1]) || {},
+    body: raw.slice(match[0].length),
+    raw,
+  };
+}
+export function parseLexiconEntries(body) {
+  const strict = [];
+  const phrases = [];
+  let mode = null;
+  for (const rawLine of body.split('\n')) {
+    const line = rawLine.trim();
+    if (line.startsWith('## ')) {
+      const heading = line.toLowerCase();
+      if (heading.includes('strict matches')) mode = 'strict';
+      else if (heading.includes('multi-word phrases')) mode = 'phrase';
+      else mode = null;
+      continue;
+    }
+    if (mode && line.startsWith('- ')) {
+      const entry = line.slice(2).trim().normalize('NFC');
+      if (entry) (mode === 'strict' ? strict : phrases).push(entry);
+    }
+  }
+  return { strict, phrases, all: [...strict.map((entry) => ({ kind: 'strict', entry })), ...phrases.map((entry) => ({ kind: 'phrase', entry }))] };
+}
+export function checkLexiconProvenance(options = {}) {
+  const repoRoot = options.repoRoot || REPO_ROOT;
+  const lexiconDir = resolve(repoRoot, options.lexiconDir || DEFAULT_LEXICON_DIR);
+  const langs = options.langs || ['en', 'ko', 'zh', 'ja'];
+  const errors = [];
+  const warnings = [];
+  const files = [];
+  for (const lang of langs) {
+    const lexiconPath = resolve(lexiconDir, `ai-${lang}.md`);
+    if (!existsSync(lexiconPath)) {
+      errors.push(`missing lexicon: ${toRepoRelative(lexiconPath, repoRoot)}`);
+      continue;
+    }
+    const parsed = parseFrontmatterFile(lexiconPath);
+    const entries = parseLexiconEntries(parsed.body).all;
+    const fileLabel = toRepoRelative(lexiconPath, repoRoot);
+    const provenanceRef = parsed.meta['entry-provenance'];
+    if (typeof provenanceRef !== 'string' || provenanceRef.length === 0) {
+      errors.push(`${fileLabel}: entry-provenance frontmatter is required`);
+      continue;
+    }
+    if (typeof parsed.meta.entries === 'number' && parsed.meta.entries !== entries.length) {
+      errors.push(`${fileLabel}: frontmatter entries=${parsed.meta.entries} but parsed ${entries.length}`);
+    }
+    const provenancePath = resolve(repoRoot, provenanceRef);
+    if (!existsSync(provenancePath)) {
+      errors.push(`${fileLabel}: provenance file not found: ${provenanceRef}`);
+      continue;
+    }
+    let provenance;
+    try {
+      provenance = JSON.parse(readFileSync(provenancePath, 'utf8'));
+    } catch (error) {
+      errors.push(`${provenanceRef}: invalid JSON (${error.message})`);
+      continue;
+    }
+    if (!Array.isArray(provenance)) {
+      errors.push(`${provenanceRef}: provenance must be a JSON array`);
+      continue;
+    }
+    const entryKeys = new Set(entries.map(entryKey));
+    const seen = new Set();
+    for (const item of provenance) {
+      if (!item || typeof item !== 'object' || Array.isArray(item)) {
+        errors.push(`${provenanceRef}: each provenance row must be an object`);
+        continue;
+      }
+      for (const field of REQUIRED_PROVENANCE_FIELDS) {
+        if (!Object.prototype.hasOwnProperty.call(item, field)) {
+          errors.push(`${provenanceRef}: ${entryLabel(item)} missing field ${field}`);
+        }
+      }
+      if (!['strict', 'phrase'].includes(item.kind)) {
+        errors.push(`${provenanceRef}: ${entryLabel(item)} kind must be strict or phrase`);
+      }
+      if (typeof item.entry !== 'string' || item.entry.length === 0) {
+        errors.push(`${provenanceRef}: entry must be a non-empty string`);
+      }
+      if (typeof item.source !== 'string' || item.source.length === 0) {
+        errors.push(`${provenanceRef}: ${entryLabel(item)} source must be a non-empty string`);
+      }
+      if (typeof item.status !== 'string' || item.status.length === 0) {
+        errors.push(`${provenanceRef}: ${entryLabel(item)} status must be a non-empty string`);
+      }
+      if (item.last_validated !== null && typeof item.last_validated !== 'string') {
+        errors.push(`${provenanceRef}: ${entryLabel(item)} last_validated must be string or null`);
+      }
+      if (item.added !== null && typeof item.added !== 'string') {
+        errors.push(`${provenanceRef}: ${entryLabel(item)} added must be string or null`);
+      }
+      const key = entryKey(item);
+      if (seen.has(key)) errors.push(`${provenanceRef}: duplicate provenance for ${key}`);
+      seen.add(key);
+      if (!entryKeys.has(key)) errors.push(`${provenanceRef}: orphan provenance for ${key}`);
+    }
+    for (const entry of entries) {
+      const key = entryKey(entry);
+      if (!seen.has(key)) errors.push(`${provenanceRef}: missing provenance for ${key}`);
+    }
+    files.push({ file: fileLabel, provenance: provenanceRef, entries: entries.length, provenanceRows: provenance.length });
+  }
+  return { ok: errors.length === 0, files, errors, warnings };
+}
+export function loadJsonlRows(inputPath, options = {}) {
+  const repoRoot = options.repoRoot || REPO_ROOT;
+  const abs = resolvePath(inputPath, repoRoot);
+  const rows = [];
+  const errors = [];
+  if (!existsSync(abs)) {
+    return { input: toRepoRelative(abs, repoRoot), rows, errors: [`input not found: ${toRepoRelative(abs, repoRoot)}`] };
+  }
+  const lines = readFileSync(abs, 'utf8').split(/\r?\n/u);
+  for (let index = 0; index < lines.length; index++) {
+    const line = lines[index].trim();
+    if (!line) continue;
+    try {
+      rows.push({ lineNumber: index + 1, value: JSON.parse(line) });
+    } catch (error) {
+      errors.push(`line ${index + 1}: invalid JSON (${error.message})`);
+    }
+  }
+  return { input: toRepoRelative(abs, repoRoot), rows, errors };
+}
+export function mineLexiconLift(rows, entries, options = {}) {
+  const lang = options.lang || 'en';
+  const hotDocs = [];
+  const coldDocs = [];
+  const errors = [];
+  const warnings = [];
+  for (const row of rows) {
+    const raw = row.value || row;
+    const label = row.lineNumber ? `line ${row.lineNumber}` : raw.sample_id || 'row';
+    if (!raw || typeof raw !== 'object' || Array.isArray(raw)) {
+      errors.push(`${label}: record must be an object`);
+      continue;
+    }
+    if ((raw.language || lang) !== lang) continue;
+    if (typeof raw.text !== 'string' || raw.text.length === 0) {
+      warnings.push(`${label}: skipped row without local text`);
+      continue;
+    }
+    if (HOT_CLASSES.has(raw.class)) hotDocs.push(raw);
+    else if (COLD_CLASSES.has(raw.class)) coldDocs.push(raw);
+    else warnings.push(`${label}: skipped unknown class ${raw.class}`);
+  }
+  const hotCounts = countEntryDocumentFrequency(hotDocs, entries, lang);
+  const coldCounts = countEntryDocumentFrequency(coldDocs, entries, lang);
+  const decisions = entries.map((entry) => {
+    const key = entryKey(entry);
+    const hot = hotCounts.counts.get(key) || 0;
+    const cold = coldCounts.counts.get(key) || 0;
+    const hotRate = hotDocs.length ? hot / hotDocs.length : 0;
+    const coldRate = coldDocs.length ? cold / coldDocs.length : 0;
+    const lift = cold === 0 ? (hot === 0 ? 0 : 'Infinity') : round(hotRate / coldRate, 3);
+    const numericLift = lift === 'Infinity' ? Infinity : lift;
+    const keep = hot > 0 && numericLift >= 4 && coldRate <= 0.05;
+    return {
+      kind: entry.kind,
+      entry: entry.entry,
+      hot_docs: hot,
+      cold_docs: cold,
+      hot_rate: round(hotRate, 5),
+      cold_rate: round(coldRate, 5),
+      lift,
+      decision: keep ? 'keep' : 'drop',
+    };
+  });
+  decisions.sort((a, b) => a.decision.localeCompare(b.decision) || a.kind.localeCompare(b.kind) || a.entry.localeCompare(b.entry));
+  const gate = {
+    hot_docs: hotDocs.length,
+    cold_docs: coldDocs.length,
+    hot_registers: hotCounts.registers,
+    cold_registers: coldCounts.registers,
+    min_docs_per_class: 25,
+    min_registers_per_class: 2,
+  };
+  gate.ready = gate.hot_docs >= gate.min_docs_per_class && gate.cold_docs >= gate.min_docs_per_class
+    && Object.keys(gate.hot_registers).length >= gate.min_registers_per_class
+    && Object.keys(gate.cold_registers).length >= gate.min_registers_per_class;
+  return {
+    language: lang,
+    source_id: options.sourceId || null,
+    source_note: options.sourceNote || null,
+    source_urls: options.sourceUrls || [],
+    validated_at: options.validatedAt || null,
+    input: options.input || null,
+    entries: entries.length,
+    kept: decisions.filter((row) => row.decision === 'keep').length,
+    dropped: decisions.filter((row) => row.decision === 'drop').length,
+    gate,
+    decisions,
+    errors,
+    warnings,
+  };
+}
+export function writeLiftReport(result, options = {}) {
+  const written = {};
+  if (options.outputJson) {
+    const outputJson = resolvePath(options.outputJson, options.repoRoot || REPO_ROOT);
+    mkdirSync(dirname(outputJson), { recursive: true });
+    writeFileSync(outputJson, `${JSON.stringify(result, null, 2)}\n`);
+    written.json = toRepoRelative(outputJson, options.repoRoot || REPO_ROOT);
+  }
+  if (options.outputMd) {
+    const outputMd = resolvePath(options.outputMd, options.repoRoot || REPO_ROOT);
+    mkdirSync(dirname(outputMd), { recursive: true });
+    writeFileSync(outputMd, renderLiftMarkdown(result));
+    written.markdown = toRepoRelative(outputMd, options.repoRoot || REPO_ROOT);
+  }
+  return written;
+}
+export function renderCheckMarkdown(result) {
+  const lines = [
+    '# Lexicon Freshness Check',
+    '',
+    `- Validation: **${result.ok ? 'PASS' : 'FAIL'}**`,
+    `- Files checked: ${result.files.length}`,
+  ];
+  if (result.files.length) {
+    lines.push('', '| lexicon | provenance | entries | provenance rows |', '|---|---|---:|---:|');
+    for (const file of result.files) {
+      lines.push(`| \`${file.file}\` | \`${file.provenance}\` | ${file.entries} | ${file.provenanceRows} |`);
+    }
+  }
+  if (result.errors.length) lines.push('', '## Errors', ...result.errors.map((error) => `- ${escapeMarkdown(error)}`));
+  if (result.warnings.length) lines.push('', '## Warnings', ...result.warnings.map((warning) => `- ${escapeMarkdown(warning)}`));
+  return `${lines.join('\n')}\n`;
+}
+export function renderLiftMarkdown(result) {
+  const lines = [
+    '# Lexicon Freshness Lift Report',
+    '',
+    `- Language: ${result.language}`,
+    `- Source: ${result.source_id || 'not recorded'}`,
+    `- Validated at: ${result.validated_at || 'not recorded'}`,
+    `- Input: ${result.input || 'not recorded'}`,
+    `- Entries evaluated: ${result.entries}`,
+    `- Decision summary: ${result.kept} keep / ${result.dropped} drop`,
+    `- Gate: **${result.gate.ready ? 'PASS' : 'BLOCKED'}** (${result.gate.hot_docs} hot docs, ${result.gate.cold_docs} cold docs)`,
+  ];
+  if (result.source_note) lines.push(`- Source note: ${escapeMarkdown(result.source_note)}`);
+  if (result.source_urls?.length) {
+    lines.push('', '## Source provenance', '');
+    for (const url of result.source_urls) lines.push(`- <${url}>`);
+    lines.push('- Public report policy: aggregate counts only; raw corpus rows stay local/private.');
+  }
+  lines.push(
+    '',
+    '## Register coverage',
+    '',
+    '| class | registers |',
+    '|---|---|',
+    `| hot | ${formatRegisterCounts(result.gate.hot_registers)} |`,
+    `| cold | ${formatRegisterCounts(result.gate.cold_registers)} |`,
+    '',
+    '## Entry decisions',
+    '',
+    '| decision | kind | entry | hot docs | cold docs | lift | cold rate |',
+    '|---|---|---|---:|---:|---:|---:|'
+  );
+  for (const row of result.decisions) {
+    lines.push(`| ${row.decision} | ${row.kind} | ${escapeMarkdown(row.entry)} | ${row.hot_docs} | ${row.cold_docs} | ${row.lift} | ${(row.cold_rate * 100).toFixed(2)}% |`);
+  }
+  if (result.errors.length) lines.push('', '## Errors', ...result.errors.map((error) => `- ${escapeMarkdown(error)}`));
+  if (result.warnings.length) lines.push('', '## Warnings', ...result.warnings.map((warning) => `- ${escapeMarkdown(warning)}`));
+  return `${lines.join('\n')}\n`;
+}
+function countEntryDocumentFrequency(docs, entries, lang) {
+  const counts = new Map(entries.map((entry) => [entryKey(entry), 0]));
+  const registers = {};
+  for (const doc of docs) {
+    const register = doc.register || 'unspecified';
+    registers[register] = (registers[register] || 0) + 1;
+    const tokens = tokenize(doc.text, { lang });
+    for (const entry of entries) {
+      const lexicon = {
+        lang,
+        strict: entry.kind === 'strict' ? [entry.entry] : [],
+        phrases: entry.kind === 'phrase' ? [entry.entry] : [],
+      };
+      if (computeDensity(doc.text, tokens, lexicon).matches > 0) {
+        counts.set(entryKey(entry), (counts.get(entryKey(entry)) || 0) + 1);
+      }
+    }
+  }
+  return { counts, registers: sortObject(registers) };
+}
+function loadEntriesForLang(lang, repoRoot = REPO_ROOT) {
+  const file = resolve(repoRoot, 'lexicon', `ai-${lang}.md`);
+  const parsed = parseFrontmatterFile(file);
+  return parseLexiconEntries(parsed.body).all;
+}
+function formatRegisterCounts(registers = {}) {
+  const entries = Object.entries(registers);
+  if (!entries.length) return '—';
+  return entries.map(([key, value]) => `${escapeMarkdown(key)}=${value}`).join(', ');
+}
+function sortObject(value) {
+  return Object.fromEntries(Object.entries(value).sort(([a], [b]) => a.localeCompare(b)));
+}
+function entryKey(row) {
+  return `${row.kind}:${row.entry}`;
+}
+function entryLabel(row) {
+  return row?.entry ? `${row.kind || '?'}:${row.entry}` : 'row';
+}
+function resolvePath(path, repoRoot = REPO_ROOT) {
+  if (path.startsWith('/')) return path;
+  return resolve(repoRoot, path);
+}
+function toRepoRelative(path, repoRoot = REPO_ROOT) {
+  return relative(repoRoot, path) || basename(path);
+}
+function round(value, digits = 3) {
+  return Math.round(value * 10 ** digits) / 10 ** digits;
+}
+function escapeMarkdown(value) {
+  return String(value ?? '—').replace(/\|/gu, '\\|').replace(/\n/gu, ' ');
+}
+function printHelp() {
+  console.log(`Usage: node scripts/lexicon-freshness.mjs [--check] [--input <private.jsonl>] [options]
+Default mode validates lexicon/ai-*.md entry-provenance sidecars.
+When --input is provided, it also computes an EN hot/cold document-frequency
+lift report from local JSONL rows. Reports never include raw text.
+Options:
+  --check                    Validate lexicon provenance sidecars
+  --input <path>             Local/private JSONL corpus with text fields
+  --lang <lang>              Language to mine (default: en)
+  --source-id <id>           Stable source id for the report
+  --source-note <text>       Human-readable source note
+  --source-url <url>         Source URL for the report (repeatable)
+  --validated-at <date>      Validation date (default: today)
+  --output-json <path>       Write public-safe aggregate JSON
+  --output-md <path>         Write public-safe Markdown report
+  --json                     Print JSON instead of Markdown`);
+}
+function main() {
+  const args = parseArgs();
+  if (args.help) {
+    printHelp();
+    return;
+  }
+  const outputs = {};
+  let failed = false;
+  if (args.check) {
+    const check = checkLexiconProvenance();
+    outputs.check = check;
+    if (!args.json) console.log(renderCheckMarkdown(check));
+    if (!check.ok) failed = true;
+  }
+  if (args.input) {
+    const loaded = loadJsonlRows(args.input);
+    const entries = loadEntriesForLang(args.lang);
+    const report = loaded.errors.length
+      ? { language: args.lang, errors: loaded.errors, warnings: [], decisions: [], gate: { ready: false, hot_docs: 0, cold_docs: 0, hot_registers: {}, cold_registers: {} }, entries: 0, kept: 0, dropped: 0 }
+      : mineLexiconLift(loaded.rows, entries, {
+          lang: args.lang,
+          input: loaded.input,
+          sourceId: args.sourceId,
+          sourceNote: args.sourceNote,
+          sourceUrls: args.sourceUrls,
+          validatedAt: args.validatedAt,
+        });
+    const written = report.errors.length === 0 ? writeLiftReport(report, { outputJson: args.outputJson, outputMd: args.outputMd }) : {};
+    outputs.report = report;
+    outputs.written = written;
+    if (!args.json) {
+      console.log(renderLiftMarkdown(report));
+      for (const path of Object.values(written)) console.log(`Wrote ${path}`);
+    }
+    if (report.errors.length) failed = true;
+  }
+  if (args.json) console.log(JSON.stringify(outputs, null, 2));
+  if (failed) process.exit(1);
+}
+if (process.argv[1] && resolve(process.argv[1]) === fileURLToPath(import.meta.url)) {
+  main();
+}

package/scripts/lint.mjs CHANGED Viewed

@@ -8,7 +8,7 @@ import { fileURLToPath } from 'node:url';
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const REPO_ROOT = resolve(__dirname, '..');
-const ROOTS = ['bin', 'scripts', 'src', 'tests'];
+const ROOTS = ['bin', 'scripts', 'src', 'tests', 'playground'];
 const EXT_RE = /\.(?:js|mjs)$/;
 function walk(dir, out = []) {

package/scripts/precommit-score.mjs CHANGED Viewed

@@ -5,13 +5,14 @@ function parseArgs(argv) {
   const out = { files: [], gate: 30, lang: 'auto', maxFiles: 200 };
   for (let i = 0; i < argv.length; i++) {
     const arg = argv[i];
-    if (arg === '--gate' || arg === '--score-threshold') out.gate = Number(argv[++i]);
+    if (arg === '--score-threshold') out.gate = Number(argv[++i]);
     else if (arg === '--lang') out.lang = argv[++i] || 'auto';
     else if (arg === '--max-files') out.maxFiles = Number(argv[++i]);
-    else if (!arg.startsWith('-')) out.files.push(arg);
+    else if (arg.startsWith('-')) throw new Error(`unknown option ${arg}`);
+    else out.files.push(arg);
   }
   if (!Number.isFinite(out.gate) || out.gate < 0 || out.gate > 100) {
-    throw new Error(`--gate expects a number from 0 to 100, got ${out.gate}`);
+    throw new Error(`--score-threshold expects a number from 0 to 100, got ${out.gate}`);
   }
   return out;
 }

package/scripts/prose-score.mjs CHANGED Viewed

@@ -4,12 +4,20 @@ import { fileURLToPath } from 'node:url';
 import { analyzeText } from '../src/features/index.js';
 import { loadLexicon } from '../src/features/lexicon.js';
+import {
+  paragraphSignalStrength,
+  summarizeSignalStrength,
+} from '../src/features/signal-strength.js';
+import { loadPatterns } from '../src/loader.js';
+export { paragraphSignalStrength, summarizeSignalStrength };
 const __dirname = dirname(fileURLToPath(import.meta.url));
 export const DEFAULT_REPO_ROOT = resolve(__dirname, '..');
 export const DEFAULT_PROSE_EXTENSIONS = ['.md', '.mdx', '.txt', '.rst', '.adoc'];
 const lexiconCache = new Map();
+const patternTermCache = new Map();
 export function parseBoolean(value, defaultValue = false) {
   if (value === undefined || value === null || value === '') return defaultValue;
@@ -33,13 +41,16 @@ export function stripNonProse(markdown) {
     .replace(/^---\n[\s\S]*?\n---\s*/, '\n')
     .replace(/```[\s\S]*?```/g, '\n')
     .replace(/~~~[\s\S]*?~~~/g, '\n')
+    // Remove Markdown tables before stripping inline HTML. Cells such as
+    // `p<0.01` are prose-visible math, not HTML tags; if HTML stripping runs
+    // first it can consume across rows and leave table fragments behind.
+    .replace(/^\s*\|.*\|\s*$/gm, '\n')
     .replace(/`[^`]*`/g, ' ')
     .replace(/!\[[^\]]*\]\([^)]*\)/g, ' ')
     .replace(/\[([^\]]+)\]\([^)]*\)/g, '$1')
     .replace(/<svg[\s\S]*?<\/svg>/gi, '\n')
     .replace(/<[^>]+>/g, ' ')
-    .replace(/^\s*\|.*\|\s*$/gm, '\n')
-    .replace(/^\s{0,3}#{1,6}\s+/gm, '')
+    .replace(/^\s{0,3}#{1,6}\s+.*$/gm, '\n')
     .replace(/^\s{0,3}>\s?/gm, '')
     .replace(/^\s*[-*+]\s+\[[ xX]\]\s+/gm, '')
     .replace(/^\s*[-*+]\s+/gm, '')
@@ -76,6 +87,14 @@ function getLexicon(lang, repoRoot) {
   return lexiconCache.get(key);
 }
+function getPatternWatchTerms(lang, repoRoot) {
+  const key = `${repoRoot}\0${lang}`;
+  if (!patternTermCache.has(key)) {
+    patternTermCache.set(key, extractPatternWatchTerms(loadPatterns(repoRoot, lang)));
+  }
+  return patternTermCache.get(key);
+}
 export function scoreText(text, { file = '', lang = 'auto', gate = 30, repoRoot = DEFAULT_REPO_ROOT } = {}) {
   const prose = stripNonProse(text);
   const resolvedLang = detectLanguage(file, prose, lang);
@@ -84,21 +103,76 @@ export function scoreText(text, { file = '', lang = 'auto', gate = 30, repoRoot
     repoRoot,
     lexicon: getLexicon(resolvedLang, repoRoot),
   });
+  const patternHits = countPatternWatchHits(prose, getPatternWatchTerms(resolvedLang, repoRoot), resolvedLang);
   const paragraphCount = result.paragraphs.length;
   const hotCount = result.paragraphs.filter((p) => p.hot).length;
   const score = paragraphCount ? (hotCount / paragraphCount) * 100 : 0;
+  const signalScore = summarizeSignalStrength(result.paragraphs);
   return {
     file,
     lang: resolvedLang,
     paragraphCount,
     hotCount,
     score,
+    signalScore,
+    patternHits,
     gate,
     overGate: score > gate,
     skipped: paragraphCount === 0,
   };
 }
+export function extractPatternWatchTerms(patterns = []) {
+  const terms = [];
+  for (const pattern of patterns) {
+    for (const line of String(pattern.body || '').split('\n')) {
+      const match = line.match(/^\*\*([^*]+)\*\*\s*(.+)$/);
+      if (!match || !isWatchLabel(match[1])) continue;
+      const value = match[2].replace(/\s+—\s+/g, ', ');
+      for (const raw of value.split(/[,，、;]/)) {
+        const term = cleanPatternTerm(raw);
+        if (term.length >= 2) terms.push(term);
+      }
+    }
+  }
+  return [...new Set(terms)];
+}
+export function countPatternWatchHits(text, terms = [], lang = 'en') {
+  if (!text || !Array.isArray(terms) || terms.length === 0) return 0;
+  const haystack = lang === 'en' ? String(text).toLowerCase() : String(text);
+  let count = 0;
+  for (const term of terms) {
+    const needle = lang === 'en' ? term.toLowerCase() : term;
+    if (needle && haystack.includes(needle)) count++;
+  }
+  return count;
+}
+function isWatchLabel(label) {
+  const normalized = label.replace(/[：:]/g, '').trim().toLowerCase();
+  return [
+    'watch words',
+    '주의 어휘',
+    '고빈도 ai 어휘',
+    '고빈도 어휘',
+    '고빈도 표현',
+    '高频词汇',
+    '注意词汇',
+    '注意词',
+    '高頻度語彙',
+    '注意語彙',
+    '注意語',
+  ].some((needle) => normalized.includes(needle.toLowerCase()));
+}
+function cleanPatternTerm(term) {
+  return String(term || '')
+    .replace(/^[\s`*_"'“”‘’「」『』()（）]+|[\s`*_"'“”‘’「」『』()（）.。]+$/g, '')
+    .replace(/\s+/g, ' ')
+    .trim();
+}
 function isInside(base, candidate) {
   const rel = relative(base, candidate);
   return rel === '' || (!rel.startsWith('..') && !rel.includes(`..${sep}`));
@@ -160,11 +234,13 @@ export function formatMarkdownReport(rows, { gate = 30, title = 'Patina prose ho
     return lines.join('\n');
   }
-  lines.push('| status | file | lang | paragraphs | hot | score |');
-  lines.push('|---|---|---:|---:|---:|---:|');
+  lines.push('| status | file | lang | paragraphs | hot | score | signal | pattern hits |');
+  lines.push('|---|---|---:|---:|---:|---:|---:|---:|');
   for (const row of rows) {
+    const signalScore = Number.isFinite(Number(row.signalScore)) ? Number(row.signalScore) : 0;
+    const patternHits = Number.isFinite(Number(row.patternHits)) ? Number(row.patternHits) : 0;
     lines.push(
-      `| ${statusIcon(row)} | ${escapeCell(row.file)} | ${row.lang} | ${row.paragraphCount} | ${row.hotCount} | ${row.score.toFixed(1)}% |`
+      `| ${statusIcon(row)} | ${escapeCell(row.file)} | ${row.lang} | ${row.paragraphCount} | ${row.hotCount} | ${row.score.toFixed(1)}% | ${signalScore.toFixed(1)} | ${patternHits} |`
     );
   }
   return lines.join('\n');