npm - proof-pr - Versions diffs - 0.1.6 → 0.1.8 - Mend

proof-pr 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.js CHANGED Viewed

@@ -23111,7 +23111,15 @@ function preprocess(fn, schema) {
 const riskLevelSchema = schemas_enum(["low", "medium", "high"]);
+const findingSeveritySchema = schemas_enum(["info", "low", "medium", "high"]);
 const localeSchema = schemas_enum(["en", "zh-CN"]);
+const evidenceRequirementSchema = schemas_enum([
+    "verification",
+    "reproduction",
+    "screenshot",
+    "changelog",
+    "permission-rationale"
+]);
 const configPresetSchema = schemas_enum([
     "balanced",
     "open-source-maintainer",
@@ -23149,6 +23157,38 @@ const DEFAULT_SENSITIVE_PATHS = [
     "go.sum"
 ];
 const DEFAULT_TEST_PATHS = ["src/**", "packages/**/src/**", "app/**", "lib/**"];
+const WORKFLOW_EVIDENCE_CONTRACTS = [
+    {
+        id: "workflow-permission-rationale",
+        title: "Workflow changes need a permission rationale",
+        paths: [".github/workflows/**", ".github/actions/**"],
+        requires: ["verification", "permission-rationale"],
+        severity: "high",
+        recommendation: "Explain why the workflow needs this trigger or permission, and include verification that untrusted PR code cannot reach privileged tokens."
+    }
+];
+const DEPENDENCY_EVIDENCE_CONTRACTS = [
+    {
+        id: "dependency-upgrade-evidence",
+        title: "Dependency changes need upgrade evidence",
+        paths: [
+            "package.json",
+            "**/package.json",
+            "pnpm-lock.yaml",
+            "package-lock.json",
+            "yarn.lock",
+            "requirements.txt",
+            "**/requirements.txt",
+            "pyproject.toml",
+            "**/pyproject.toml",
+            "go.mod",
+            "**/go.mod"
+        ],
+        requires: ["verification", "changelog"],
+        severity: "medium",
+        recommendation: "Link changelog or migration notes and include the test command or CI evidence used to validate the dependency change."
+    }
+];
 const PRESET_DEFAULTS = {
     balanced: {},
     "open-source-maintainer": {
@@ -23179,6 +23219,9 @@ const PRESET_DEFAULTS = {
         requireTests: {
             enabled: true,
             paths: ["src/**", "packages/**/src/**", "app/**", "lib/**", "server/**", "api/**"]
+        },
+        evidence: {
+            contracts: WORKFLOW_EVIDENCE_CONTRACTS
         }
     },
     "ai-generated-pr": {
@@ -23217,9 +23260,20 @@ const PRESET_DEFAULTS = {
         requireTests: {
             enabled: true,
             paths: DEFAULT_TEST_PATHS
+        },
+        evidence: {
+            contracts: DEPENDENCY_EVIDENCE_CONTRACTS
         }
     }
 };
+const evidenceContractSchema = object({
+    id: schemas_string().min(1),
+    title: schemas_string().min(1).optional(),
+    paths: array(schemas_string().min(1)).min(1),
+    requires: array(evidenceRequirementSchema).min(1),
+    severity: findingSeveritySchema.default("medium"),
+    recommendation: schemas_string().min(1).optional()
+});
 const configSchema = object({
     preset: configPresetSchema.default("balanced"),
     locale: localeSchema.default("en"),
@@ -23238,6 +23292,10 @@ const configSchema = object({
         flagLifecycleScripts: schemas_boolean().default(true)
     })
         .default({ flagNewPackages: true, flagMajorUpgrades: true, flagLifecycleScripts: true }),
+    evidence: object({
+        contracts: array(evidenceContractSchema).default([])
+    })
+        .default({ contracts: [] }),
     comment: object({ enabled: schemas_boolean().default(true) }).default({ enabled: true })
 });
 function parseConfig(input) {
@@ -23313,6 +23371,434 @@ function renderMarkdownReport(result, locale = "en") {
     }
     return renderEnglishMarkdownReport(result);
 }
+function renderHtmlReport(result, locale = "en") {
+    const labels = htmlLabels(locale);
+    const risk = locale === "zh-CN" ? translateRisk(result.risk) : result.risk;
+    const decision = formatReviewDecision(result.reviewDecision, locale);
+    const scoreGrade = formatEvidenceGrade(result.evidenceScore.grade, locale);
+    const findingsBySeverity = countFindingsBySeverity(result.findings);
+    const ruleCounts = countFindingsByRule(result.findings);
+    const evidenceSignals = [
+        [labels.prDescription, locale === "zh-CN" ? translateDescriptionState(result.summary.pullRequestDescription) : result.summary.pullRequestDescription, result.summary.pullRequestDescription === "present"],
+        [labels.verification, yesNo(result.summary.verificationEvidence, locale), result.summary.verificationEvidence],
+        [labels.reproduction, yesNo(result.summary.reproductionEvidence, locale), result.summary.reproductionEvidence],
+        [labels.screenshot, yesNo(result.summary.screenshotEvidence, locale), result.summary.screenshotEvidence],
+        [labels.changelog, yesNo(result.summary.changelogEvidence, locale), result.summary.changelogEvidence],
+        [labels.permissionRationale, yesNo(result.summary.permissionRationaleEvidence, locale), result.summary.permissionRationaleEvidence]
+    ];
+    return `<!doctype html>
+<html lang="${locale === "zh-CN" ? "zh-CN" : "en"}">
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <title>ProofPR ${labels.report}</title>
+  <style>
+    :root {
+      color-scheme: light;
+      --bg: #f6f7f9;
+      --panel: #ffffff;
+      --ink: #17202a;
+      --muted: #667085;
+      --line: #d9dee7;
+      --green: #138a5e;
+      --amber: #b7791f;
+      --red: #c24135;
+      --blue: #2563a9;
+      --soft-green: #e8f6ef;
+      --soft-amber: #fff3d6;
+      --soft-red: #fdebea;
+      --soft-blue: #eaf2fb;
+    }
+    * { box-sizing: border-box; }
+    body {
+      margin: 0;
+      background: var(--bg);
+      color: var(--ink);
+      font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Microsoft YaHei", sans-serif;
+      line-height: 1.5;
+    }
+    main {
+      width: min(1180px, calc(100vw - 32px));
+      margin: 0 auto;
+      padding: 32px 0 48px;
+    }
+    .topbar {
+      display: flex;
+      justify-content: space-between;
+      gap: 18px;
+      align-items: flex-start;
+      margin-bottom: 20px;
+    }
+    h1, h2, h3, p { margin: 0; }
+    h1 {
+      font-size: 28px;
+      line-height: 1.2;
+    }
+    h2 {
+      font-size: 17px;
+      margin-bottom: 14px;
+    }
+    h3 {
+      font-size: 15px;
+      margin-bottom: 8px;
+    }
+    .subtitle {
+      color: var(--muted);
+      margin-top: 8px;
+      max-width: 760px;
+    }
+    .pill {
+      display: inline-flex;
+      align-items: center;
+      border: 1px solid var(--line);
+      border-radius: 999px;
+      padding: 5px 10px;
+      background: var(--panel);
+      color: var(--muted);
+      font-size: 13px;
+      white-space: nowrap;
+    }
+    .grid {
+      display: grid;
+      grid-template-columns: repeat(12, 1fr);
+      gap: 14px;
+    }
+    .card {
+      background: var(--panel);
+      border: 1px solid var(--line);
+      border-radius: 8px;
+      padding: 18px;
+      box-shadow: 0 1px 2px rgba(16, 24, 40, 0.04);
+    }
+    .metric { grid-column: span 3; }
+    .wide { grid-column: span 8; }
+    .side { grid-column: span 4; }
+    .full { grid-column: 1 / -1; }
+    .metric-label {
+      color: var(--muted);
+      font-size: 13px;
+      margin-bottom: 8px;
+    }
+    .metric-value {
+      font-size: 27px;
+      font-weight: 720;
+      line-height: 1.1;
+    }
+    .tone-low { color: var(--green); background: var(--soft-green); border-color: #b8e5cf; }
+    .tone-medium { color: var(--amber); background: var(--soft-amber); border-color: #f1d28a; }
+    .tone-high { color: var(--red); background: var(--soft-red); border-color: #f3b6b1; }
+    .scorebar {
+      width: 100%;
+      height: 16px;
+      border: 1px solid var(--line);
+      border-radius: 999px;
+      overflow: hidden;
+      margin: 14px 0 10px;
+      background: #eef1f5;
+    }
+    .scorefill {
+      height: 100%;
+      width: ${result.evidenceScore.value}%;
+      background: ${scoreColor(result.evidenceScore.value)};
+    }
+    .summary-grid {
+      display: grid;
+      grid-template-columns: repeat(4, minmax(0, 1fr));
+      gap: 10px;
+    }
+    .summary-item {
+      border: 1px solid var(--line);
+      border-radius: 8px;
+      padding: 10px 12px;
+      background: #fbfcfd;
+    }
+    .summary-item strong {
+      display: block;
+      font-size: 20px;
+      margin-bottom: 2px;
+    }
+    .summary-item span {
+      color: var(--muted);
+      font-size: 12px;
+    }
+    .signal-list, .action-list, .finding-list, .focus-list, .deduction-list, .rule-list {
+      display: grid;
+      gap: 10px;
+    }
+    .signal, .action, .focus, .deduction, .rule-row {
+      border: 1px solid var(--line);
+      border-radius: 8px;
+      padding: 10px 12px;
+      background: #fbfcfd;
+    }
+    .signal {
+      display: flex;
+      justify-content: space-between;
+      gap: 12px;
+      align-items: center;
+    }
+    .signal-name, .action-title, .finding-title {
+      font-weight: 680;
+    }
+    .signal-state {
+      font-size: 12px;
+      border-radius: 999px;
+      padding: 3px 8px;
+      border: 1px solid var(--line);
+      white-space: nowrap;
+    }
+    .severity-grid {
+      display: grid;
+      grid-template-columns: repeat(4, minmax(0, 1fr));
+      gap: 8px;
+    }
+    .severity {
+      border: 1px solid var(--line);
+      border-radius: 8px;
+      padding: 10px;
+      background: #fbfcfd;
+    }
+    .severity strong {
+      display: block;
+      font-size: 22px;
+    }
+    .muted {
+      color: var(--muted);
+      font-size: 13px;
+    }
+    .action {
+      display: grid;
+      grid-template-columns: auto 1fr;
+      gap: 10px;
+    }
+    .box {
+      width: 18px;
+      height: 18px;
+      border: 2px solid var(--blue);
+      border-radius: 4px;
+      margin-top: 2px;
+    }
+    .priority {
+      display: inline-flex;
+      margin-left: 6px;
+      color: var(--muted);
+      font-size: 12px;
+      font-weight: 560;
+    }
+    .finding {
+      border: 1px solid var(--line);
+      border-radius: 8px;
+      padding: 14px;
+      background: #fff;
+    }
+    .finding-head {
+      display: flex;
+      justify-content: space-between;
+      gap: 12px;
+      align-items: flex-start;
+      margin-bottom: 8px;
+    }
+    code {
+      font-family: "SFMono-Regular", Consolas, "Liberation Mono", monospace;
+      font-size: 12px;
+      background: #f0f3f7;
+      border: 1px solid var(--line);
+      border-radius: 6px;
+      padding: 2px 5px;
+      word-break: break-word;
+    }
+    .evidence-list {
+      margin: 10px 0 0;
+      padding-left: 18px;
+      color: var(--muted);
+    }
+    .footer {
+      color: var(--muted);
+      font-size: 12px;
+      margin-top: 18px;
+      text-align: center;
+    }
+    @media (max-width: 860px) {
+      main { width: min(100vw - 20px, 1180px); padding-top: 20px; }
+      .topbar { display: block; }
+      .pill { margin-top: 12px; }
+      .metric, .wide, .side { grid-column: 1 / -1; }
+      .summary-grid, .severity-grid { grid-template-columns: repeat(2, minmax(0, 1fr)); }
+    }
+  </style>
+</head>
+<body>
+  <main>
+    <section class="topbar">
+      <div>
+        <h1>ProofPR ${labels.report}</h1>
+        <p class="subtitle">${labels.subtitle}</p>
+      </div>
+      <span class="pill">${labels.generated}</span>
+    </section>
+    <section class="grid">
+      <article class="card metric">
+        <div class="metric-label">${labels.risk}</div>
+        <div class="metric-value">${escapeHtml(risk)}</div>
+        <span class="pill tone-${result.risk}">${escapeHtml(result.risk)}</span>
+      </article>
+      <article class="card metric">
+        <div class="metric-label">${labels.evidenceScore}</div>
+        <div class="metric-value">${result.evidenceScore.value}/100</div>
+        <div class="scorebar" aria-label="${labels.evidenceScore}">
+          <div class="scorefill"></div>
+        </div>
+        <div class="muted">${escapeHtml(scoreGrade)}</div>
+      </article>
+      <article class="card metric">
+        <div class="metric-label">${labels.reviewGate}</div>
+        <div class="metric-value" style="font-size: 20px;">${escapeHtml(decision)}</div>
+      </article>
+      <article class="card metric">
+        <div class="metric-label">${labels.findings}</div>
+        <div class="metric-value">${result.findings.length}</div>
+        <div class="muted">${labels.findingsHint}</div>
+      </article>
+      <article class="card wide">
+        <h2>${labels.changeSummary}</h2>
+        <div class="summary-grid">
+          ${summaryItem(labels.filesChanged, result.summary.filesChanged)}
+          ${summaryItem(labels.additions, result.summary.additions)}
+          ${summaryItem(labels.deletions, result.summary.deletions)}
+          ${summaryItem(labels.sensitiveFiles, result.summary.sensitiveFilesChanged)}
+          ${summaryItem(labels.testFiles, result.summary.testFilesChanged)}
+          ${summaryItem(labels.highFindings, findingsBySeverity.high)}
+          ${summaryItem(labels.mediumFindings, findingsBySeverity.medium)}
+          ${summaryItem(labels.lowFindings, findingsBySeverity.low)}
+        </div>
+      </article>
+      <article class="card side">
+        <h2>${labels.evidenceSignals}</h2>
+        <div class="signal-list">
+          ${evidenceSignals.map(([name, state, ok]) => signalItem(name, state, ok)).join("\n")}
+        </div>
+      </article>
+      <article class="card wide">
+        <h2>${labels.reviewPlan}</h2>
+        <div class="action-list">
+          ${result.reviewPlan.actionItems.length > 0
+        ? result.reviewPlan.actionItems.map((action) => `
+          <div class="action">
+            <span class="box"></span>
+            <div>
+              <div class="action-title">${escapeHtml(localizeActionTitle(action.actionId, action.title, locale))}<span class="priority">${escapeHtml(formatPriority(action.priority, locale))}</span></div>
+              <div class="muted">${escapeHtml(localizeActionDetail(action.actionId, action.detail, locale))}</div>
+            </div>
+          </div>`).join("\n")
+        : `<div class="muted">${labels.noActions}</div>`}
+        </div>
+      </article>
+      <article class="card side">
+        <h2>${labels.findingDistribution}</h2>
+        <div class="severity-grid">
+          ${severityItem("high", findingsBySeverity.high, labels.high)}
+          ${severityItem("medium", findingsBySeverity.medium, labels.medium)}
+          ${severityItem("low", findingsBySeverity.low, labels.low)}
+          ${severityItem("info", findingsBySeverity.info, labels.info)}
+        </div>
+      </article>
+      <article class="card side">
+        <h2>${labels.focusFiles}</h2>
+        <div class="focus-list">
+          ${result.reviewPlan.focusFiles.length > 0
+        ? result.reviewPlan.focusFiles.map((file) => `
+          <div class="focus">
+            <div><code>${escapeHtml(file.path)}</code></div>
+            <div class="muted">${escapeHtml(localizeFocusReason(file.reasonId, file.reason, locale))}</div>
+          </div>`).join("\n")
+        : `<div class="muted">${labels.noFocusFiles}</div>`}
+        </div>
+      </article>
+      <article class="card side">
+        <h2>${labels.scoreDetails}</h2>
+        <div class="deduction-list">
+          ${result.evidenceScore.deductions.length > 0
+        ? result.evidenceScore.deductions.map((deduction) => `
+          <div class="deduction">
+            <strong>-${deduction.points}</strong>
+            <div class="muted">${escapeHtml(localizeDeduction(deduction.reasonId, deduction.message, locale))}</div>
+          </div>`).join("\n")
+        : `<div class="muted">${labels.noDeductions}</div>`}
+        </div>
+      </article>
+      <article class="card full">
+        <h2>${labels.rulesCovered}</h2>
+        <div class="rule-list">
+          ${ruleCounts.length > 0
+        ? ruleCounts.map((item) => `<div class="rule-row"><code>${escapeHtml(item.ruleId)}</code> <span class="muted">${item.count}</span></div>`).join("\n")
+        : `<div class="muted">${labels.noRules}</div>`}
+        </div>
+      </article>
+      <article class="card full">
+        <h2>${labels.findings}</h2>
+        <div class="finding-list">
+          ${result.findings.length > 0
+        ? result.findings.map((finding) => htmlFinding(finding, locale)).join("\n")
+        : `<div class="muted">${labels.noFindings}</div>`}
+        </div>
+      </article>
+    </section>
+    <p class="footer">${labels.footer}</p>
+  </main>
+</body>
+</html>
+`;
+}
 function getReportMarker() {
     return REPORT_MARKER;
 }
@@ -23375,6 +23861,9 @@ function renderEnglishMarkdownReport(result) {
         `- PR description: ${result.summary.pullRequestDescription}`,
         `- Verification evidence: ${formatBoolean(result.summary.verificationEvidence)}`,
         `- Reproduction context: ${formatBoolean(result.summary.reproductionEvidence)}`,
+        `- Screenshot evidence: ${formatBoolean(result.summary.screenshotEvidence)}`,
+        `- Changelog evidence: ${formatBoolean(result.summary.changelogEvidence)}`,
+        `- Permission rationale: ${formatBoolean(result.summary.permissionRationaleEvidence)}`,
         ""
     ];
     appendEvidenceScoreSection(lines, result, "en");
@@ -23409,6 +23898,9 @@ function renderChineseMarkdownReport(result) {
         `- PR 描述质量：${translateDescriptionState(result.summary.pullRequestDescription)}`,
         `- 验证证据：${formatChineseBoolean(result.summary.verificationEvidence)}`,
         `- 复现上下文：${formatChineseBoolean(result.summary.reproductionEvidence)}`,
+        `- 截图或视觉证据：${formatChineseBoolean(result.summary.screenshotEvidence)}`,
+        `- Changelog 或迁移证据：${formatChineseBoolean(result.summary.changelogEvidence)}`,
+        `- 权限理由证据：${formatChineseBoolean(result.summary.permissionRationaleEvidence)}`,
         ""
     ];
     appendEvidenceScoreSection(lines, result, "zh-CN");
@@ -23519,6 +24011,11 @@ function maintainerFocus(findings, locale) {
                 ? "轮换任何可能暴露的凭证，并在移除 secret 前阻止合并。"
                 : "Rotate any exposed credential and block the PR until secrets are removed.");
         }
+        else if (finding.ruleId.startsWith("evidence-contract:")) {
+            focus.add(locale === "zh-CN"
+                ? "先要求贡献者补齐仓库定义的证据契约，再投入深度 review。"
+                : "Ask the contributor to satisfy the repository-defined evidence contract before deep review.");
+        }
         else if (finding.ruleId === "workflow-permission-change") {
             focus.add(locale === "zh-CN"
                 ? "合并前重点审查 GitHub Actions 权限。"
@@ -23559,6 +24056,11 @@ function maintainerFocus(findings, locale) {
                 ? "重点审查 pull_request_target 是否会用高权限 token 执行不可信 PR 代码。"
                 : "Review whether pull_request_target can execute untrusted PR code with privileged tokens.");
         }
+        else if (finding.ruleId === "workflow-untrusted-checkout") {
+            focus.add(locale === "zh-CN"
+                ? "重点审查 workflow 是否 checkout 并执行了不可信 PR head 代码。"
+                : "Review whether the workflow checks out and executes untrusted PR head code.");
+        }
         else if (finding.ruleId === "mcp-credential-risk") {
             focus.add(locale === "zh-CN"
                 ? "重点审查 MCP command、args 和凭证处理方式。"
@@ -23573,6 +24075,13 @@ function maintainerFocus(findings, locale) {
     return [...focus];
 }
 function translateFinding(finding) {
+    if (finding.ruleId.startsWith("evidence-contract:")) {
+        return {
+            title: "证据契约未满足",
+            message: "该 PR 命中了仓库自定义证据契约，但 PR 描述中缺少必需证据。",
+            recommendation: "建议要求贡献者补齐缺失证据后再深入 review。"
+        };
+    }
     if (finding.ruleId === "change-size") {
         const files = finding.evidence?.find((item) => item.startsWith("files: "))?.replace("files: ", "");
         const lines = finding.evidence?.find((item) => item.startsWith("changed lines: "))?.replace("changed lines: ", "");
@@ -23648,6 +24157,15 @@ function translateFinding(finding) {
             recommendation: "请确认该 workflow 不会用高权限 token、secret 或写权限执行不可信 PR 代码。"
         };
     }
+    if (finding.ruleId === "workflow-untrusted-checkout") {
+        return {
+            title: "Workflow checkout 了 PR head",
+            message: finding.path
+                ? `${finding.path} 引用了 PR head 代码来源，需要审查它是否会在高权限上下文中执行。`
+                : finding.message,
+            recommendation: "避免在 pull_request_target、写权限 token 或可读取 secret 的上下文中运行不可信 PR 代码。"
+        };
+    }
     if (finding.ruleId === "mcp-credential-risk") {
         return {
             title: "MCP 配置需要重点审查",
@@ -23666,8 +24184,15 @@ function translateFinding(finding) {
 }
 function translateEvidence(item) {
     return item
+        .replace("matched files: ", "命中文件：")
+        .replace("missing evidence: ", "缺失证据：")
         .replace("files: ", "文件数：")
         .replace("changed lines: ", "变更行数：")
+        .replace(/\bverification\b/g, "验证")
+        .replace(/\breproduction\b/g, "复现")
+        .replace(/\bscreenshot\b/g, "截图")
+        .replace(/\bchangelog\b/g, "变更日志")
+        .replace(/\bpermission-rationale\b/g, "权限理由")
         .replace("line ", "第 ")
         .replace(": ", " 行：");
 }
@@ -23719,12 +24244,14 @@ function translateReviewActionTitle(actionId, fallback) {
         "ask-for-evidence-before-review": "深入 review 前先要求补充证据",
         "review-with-focus": "带着重点清单进行 review",
         "normal-review": "进入常规 review",
+        "satisfy-evidence-contract": "要求补齐证据契约",
         "improve-pr-description": "要求补充更清楚的 PR 描述",
         "add-verification-evidence": "要求补充测试或手动验证证据",
         "add-reproduction-context": "要求补充复现或 before/after 上下文",
         "rotate-secret": "轮换并移除暴露的凭证",
         "justify-workflow-permissions": "要求说明 workflow 权限最小化理由",
         "review-privileged-pr-trigger": "审查 pull_request_target 高权限触发器",
+        "review-untrusted-checkout": "审查 PR head checkout 的权限边界",
         "review-package-lifecycle-script": "审查包生命周期脚本",
         "review-mcp-execution-surface": "审查 MCP 命令、参数和凭证处理",
         "request-review-map-or-split": "要求拆分 PR 或提供逐文件 review map",
@@ -23739,12 +24266,14 @@ function translateReviewActionDetail(actionId, fallback) {
         "ask-for-evidence-before-review": "要求测试、截图、复现步骤或更清楚的 PR 描述，再投入详细 review。",
         "review-with-focus": "优先使用下面的风险发现和重点文件作为第一轮 review map。",
         "normal-review": "当前证据足够支撑维护者进行常规 review。",
+        "satisfy-evidence-contract": "该 PR 命中了仓库自定义证据契约，但 PR 描述里缺少必需证据。",
         "improve-pr-description": "贡献者应说明为什么改、改了什么、如何验证，以及是否有发布或兼容性风险。",
         "add-verification-evidence": "要求测试输出、CI 链接、截图，或简短的手动验证说明。",
         "add-reproduction-context": "PR 应包含复现步骤、预期/实际行为，或相关 before/after 截图。",
         "rotate-secret": "在 secret 从 PR 中移除并完成轮换前，不要合并。",
         "justify-workflow-permissions": "确认写权限或 OIDC 是否必要，并检查不可信 PR 是否能触发该 workflow。",
         "review-privileged-pr-trigger": "确认 workflow 不会用写权限 token、secret 或仓库权限执行不可信 PR 代码。",
+        "review-untrusted-checkout": "确认 job 不会在写权限 token、仓库 secret 或 pull_request_target 高权限上下文中运行不可信 PR 代码。",
         "review-package-lifecycle-script": "检查 install、postinstall、prepare 或 publish 脚本是否会执行非预期代码。",
         "review-mcp-execution-surface": "检查 MCP 配置是否提交凭证，或意外扩大本地执行面。",
         "request-review-map-or-split": "要求贡献者拆分无关改动，或标出最需要重点 review 的文件。",
@@ -23754,6 +24283,9 @@ function translateReviewActionDetail(actionId, fallback) {
     }[actionId] ?? fallback;
 }
 function translateFocusReason(reasonId, fallback) {
+    if (reasonId.startsWith("evidence-contract:")) {
+        return "仓库自定义证据契约未满足";
+    }
     return {
         "change-size": "review 面积相关 finding",
         "sensitive-path": "敏感路径发生变更",
@@ -23762,6 +24294,7 @@ function translateFocusReason(reasonId, fallback) {
         "dependency-lifecycle-script": "包生命周期脚本发生变更",
         "workflow-permission-change": "workflow 权限发生变更",
         "workflow-dangerous-trigger": "workflow 使用了高风险触发器",
+        "workflow-untrusted-checkout": "workflow checkout 了不可信 PR head",
         "mcp-credential-risk": "MCP 配置存在执行面或凭证风险",
         "missing-tests": "代码改动缺少测试或验证证据"
     }[reasonId] ?? fallback;
@@ -23771,6 +24304,9 @@ function translateScoreMessage(message) {
         "PR description provides review context.": "PR 描述提供了 review 上下文。",
         "Verification evidence was found.": "检测到测试或手动验证证据。",
         "Reproduction or before/after context was found.": "检测到复现步骤或 before/after 上下文。",
+        "Screenshot or visual evidence was found.": "检测到截图或视觉证据。",
+        "Changelog or migration evidence was found.": "检测到 changelog 或迁移证据。",
+        "Permission rationale evidence was found.": "检测到权限理由证据。",
         "Test files changed with the PR.": "PR 同时修改了测试文件。",
         "No configured sensitive files changed.": "没有改动已配置的敏感文件。"
     }[message] ?? message;
@@ -23793,9 +24329,196 @@ function translateDeduction(reasonId, fallback) {
         "dependency-major-upgrade": "依赖发生大版本升级。",
         "dependency-lifecycle-script": "包生命周期脚本可能在安装或发布阶段执行代码。",
         "workflow-dangerous-trigger": "pull_request_target workflow 需要重点审查高权限触发路径。",
+        "workflow-untrusted-checkout": "Workflow checkout PR head 代码，需要审查权限边界。",
+        "evidence-contract-missing": "仓库自定义证据契约未满足。",
         "missing-tests": "代码发生变更，但缺少测试变更或验证说明。"
     }[reasonId] ?? fallback;
 }
+function htmlLabels(locale) {
+    if (locale === "zh-CN") {
+        return {
+            report: "可视化报告",
+            subtitle: "把 PR 风险、证据质量、Review 门禁和维护者行动清单整理成一个可分享的静态页面。",
+            generated: "Generated by ProofPR",
+            risk: "风险等级",
+            evidenceScore: "证据评分",
+            reviewGate: "Review 门禁",
+            findings: "风险发现",
+            findingsHint: "需要维护者优先关注的信号",
+            changeSummary: "改动概览",
+            filesChanged: "改动文件",
+            additions: "新增行",
+            deletions: "删除行",
+            sensitiveFiles: "敏感文件",
+            testFiles: "测试文件",
+            highFindings: "高风险",
+            mediumFindings: "中风险",
+            lowFindings: "低风险",
+            evidenceSignals: "证据信号",
+            prDescription: "PR 描述",
+            verification: "验证证据",
+            reproduction: "复现上下文",
+            screenshot: "截图证据",
+            changelog: "Changelog",
+            permissionRationale: "权限理由",
+            reviewPlan: "Review 行动清单",
+            noActions: "没有额外行动项。",
+            findingDistribution: "Finding 分布",
+            high: "高",
+            medium: "中",
+            low: "低",
+            info: "信息",
+            focusFiles: "重点文件",
+            noFocusFiles: "没有重点文件。",
+            scoreDetails: "证据扣分",
+            noDeductions: "没有扣分项。",
+            rulesCovered: "命中规则",
+            noRules: "没有规则命中。",
+            noFindings: "启用的规则没有发现需要优先关注的 review 风险。",
+            rule: "规则",
+            severity: "严重程度",
+            path: "路径",
+            detail: "详情",
+            evidence: "证据",
+            recommendation: "建议",
+            footer: "ProofPR 不替代人工 review，它帮助维护者先判断证据是否足够、风险边界是否清楚。"
+        };
+    }
+    return {
+        report: "Visual Report",
+        subtitle: "A shareable static view of PR risk, evidence quality, review gate, and maintainer actions.",
+        generated: "Generated by ProofPR",
+        risk: "Risk",
+        evidenceScore: "Evidence score",
+        reviewGate: "Review gate",
+        findings: "Findings",
+        findingsHint: "Signals that deserve maintainer attention",
+        changeSummary: "Change summary",
+        filesChanged: "Files changed",
+        additions: "Additions",
+        deletions: "Deletions",
+        sensitiveFiles: "Sensitive files",
+        testFiles: "Test files",
+        highFindings: "High findings",
+        mediumFindings: "Medium findings",
+        lowFindings: "Low findings",
+        evidenceSignals: "Evidence signals",
+        prDescription: "PR description",
+        verification: "Verification",
+        reproduction: "Reproduction",
+        screenshot: "Screenshot",
+        changelog: "Changelog",
+        permissionRationale: "Permission rationale",
+        reviewPlan: "Review plan",
+        noActions: "No additional action items.",
+        findingDistribution: "Finding distribution",
+        high: "High",
+        medium: "Medium",
+        low: "Low",
+        info: "Info",
+        focusFiles: "Focus files",
+        noFocusFiles: "No focus files.",
+        scoreDetails: "Evidence deductions",
+        noDeductions: "No deductions.",
+        rulesCovered: "Rules covered",
+        noRules: "No rule hits.",
+        noFindings: "No review-risk findings detected by the enabled rules.",
+        rule: "Rule",
+        severity: "Severity",
+        path: "Path",
+        detail: "Detail",
+        evidence: "Evidence",
+        recommendation: "Recommendation",
+        footer: "ProofPR does not replace human review. It helps maintainers decide whether evidence is enough and risk boundaries are clear."
+    };
+}
+function summaryItem(label, value) {
+    return `<div class="summary-item"><strong>${value}</strong><span>${escapeHtml(label)}</span></div>`;
+}
+function signalItem(name, state, ok) {
+    return `<div class="signal"><span class="signal-name">${escapeHtml(name)}</span><span class="signal-state ${ok ? "tone-low" : "tone-medium"}">${escapeHtml(state)}</span></div>`;
+}
+function severityItem(severity, value, label) {
+    return `<div class="severity ${severity === "high" ? "tone-high" : severity === "medium" ? "tone-medium" : severity === "low" ? "tone-low" : ""}"><strong>${value}</strong><span>${escapeHtml(label)}</span></div>`;
+}
+function htmlFinding(finding, locale) {
+    const labels = htmlLabels(locale);
+    const translated = locale === "zh-CN" ? translateFinding(finding) : finding;
+    const evidence = finding.evidence && finding.evidence.length > 0
+        ? `<ul class="evidence-list">${finding.evidence
+            .map((item) => `<li><code>${escapeHtml(locale === "zh-CN" ? translateEvidence(item) : item)}</code></li>`)
+            .join("")}</ul>`
+        : "";
+    const path = finding.path
+        ? `<div class="muted">${labels.path}: <code>${escapeHtml(finding.path)}</code></div>`
+        : "";
+    const recommendation = translated.recommendation
+        ? `<div class="muted">${labels.recommendation}: ${escapeHtml(translated.recommendation)}</div>`
+        : "";
+    return `<div class="finding">
+    <div class="finding-head">
+      <div>
+        <div class="finding-title">${escapeHtml(translated.title)}</div>
+        <div class="muted">${labels.rule}: <code>${escapeHtml(finding.ruleId)}</code></div>
+      </div>
+      <span class="pill ${finding.severity === "high" ? "tone-high" : finding.severity === "medium" ? "tone-medium" : "tone-low"}">${escapeHtml(locale === "zh-CN" ? translateSeverity(finding.severity) : finding.severity)}</span>
+    </div>
+    ${path}
+    <div class="muted">${labels.detail}: ${escapeHtml(translated.message)}</div>
+    ${evidence}
+    ${recommendation}
+  </div>`;
+}
+function countFindingsBySeverity(findings) {
+    return findings.reduce((counts, finding) => {
+        counts[finding.severity] += 1;
+        return counts;
+    }, { info: 0, low: 0, medium: 0, high: 0 });
+}
+function countFindingsByRule(findings) {
+    const counts = new Map();
+    for (const finding of findings) {
+        counts.set(finding.ruleId, (counts.get(finding.ruleId) ?? 0) + 1);
+    }
+    return [...counts.entries()]
+        .sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]))
+        .map(([ruleId, count]) => ({ ruleId, count }));
+}
+function scoreColor(value) {
+    if (value >= 85) {
+        return "var(--green)";
+    }
+    if (value >= 70) {
+        return "var(--blue)";
+    }
+    if (value >= 50) {
+        return "var(--amber)";
+    }
+    return "var(--red)";
+}
+function yesNo(value, locale) {
+    return locale === "zh-CN" ? formatChineseBoolean(value) : formatBoolean(value);
+}
+function localizeActionTitle(actionId, fallback, locale) {
+    return locale === "zh-CN" ? translateReviewActionTitle(actionId, fallback) : fallback;
+}
+function localizeActionDetail(actionId, fallback, locale) {
+    return locale === "zh-CN" ? translateReviewActionDetail(actionId, fallback) : fallback;
+}
+function localizeFocusReason(reasonId, fallback, locale) {
+    return locale === "zh-CN" ? translateFocusReason(reasonId, fallback) : fallback;
+}
+function localizeDeduction(reasonId, fallback, locale) {
+    return locale === "zh-CN" ? translateDeduction(reasonId, fallback) : fallback;
+}
+function escapeHtml(value) {
+    return value
+        .replace(/&/g, "&amp;")
+        .replace(/</g, "&lt;")
+        .replace(/>/g, "&gt;")
+        .replace(/"/g, "&quot;")
+        .replace(/'/g, "&#39;");
+}
 function formatBoolean(value) {
     return value ? "yes" : "no";
 }
@@ -23908,12 +24631,27 @@ const REPRODUCTION_PATTERNS = [
     /\b(?:before|after|expected|actual)\b/i,
     /复现|重现|复现步骤|期望|实际/
 ];
+const SCREENSHOT_PATTERNS = [
+    /\b(?:screenshot|screen shot|screen recording|recording|gif|image|before\/after)\b/i,
+    /截图|录屏|效果图|前后对比|对比图/
+];
+const CHANGELOG_PATTERNS = [
+    /\b(?:changelog|release notes?|migration guide|breaking changes?|upgrade guide)\b/i,
+    /变更日志|发布说明|迁移指南|升级说明|破坏性变更|兼容性/
+];
+const PERMISSION_RATIONALE_PATTERNS = [
+    /\b(?:least privilege|permission rationale|write permission|oidc|id-token|trusted workflow|untrusted pr|token scope)\b/i,
+    /权限理由|最小权限|写权限|OIDC|id-token|不可信 PR|高权限|token 权限|凭证权限/
+];
 function analyzeEvidence(context) {
     if (!context) {
         return {
             descriptionState: "unavailable",
             verificationEvidence: false,
-            reproductionEvidence: false
+            reproductionEvidence: false,
+            screenshotEvidence: false,
+            changelogEvidence: false,
+            permissionRationaleEvidence: false
         };
     }
     const text = [context.title ?? "", context.body ?? ""].join("\n").trim();
@@ -23921,7 +24659,10 @@ function analyzeEvidence(context) {
     return {
         descriptionState: descriptionState(body),
         verificationEvidence: matchesAnyPattern(text, VERIFICATION_PATTERNS),
-        reproductionEvidence: matchesAnyPattern(text, REPRODUCTION_PATTERNS)
+        reproductionEvidence: matchesAnyPattern(text, REPRODUCTION_PATTERNS),
+        screenshotEvidence: matchesAnyPattern(text, SCREENSHOT_PATTERNS),
+        changelogEvidence: matchesAnyPattern(text, CHANGELOG_PATTERNS),
+        permissionRationaleEvidence: matchesAnyPattern(text, PERMISSION_RATIONALE_PATTERNS)
     };
 }
 function descriptionState(body) {
@@ -24102,9 +24843,11 @@ function analyzeDiffFiles(files, config, pullRequest) {
     findings.push(...analyzeSensitivePaths(activeFiles, config));
     findings.push(...analyzeMissingTests(activeFiles, config, pullRequest));
     findings.push(...analyzePullRequestEvidence(activeFiles, pullRequest));
+    findings.push(...analyzeEvidenceContracts(activeFiles, config, pullRequest));
     findings.push(...analyzeDependencyChanges(activeFiles, config));
     findings.push(...analyzeWorkflowPermissions(activeFiles));
     findings.push(...analyzeWorkflowDangerousTriggers(activeFiles));
+    findings.push(...analyzeWorkflowUntrustedCheckout(activeFiles));
     findings.push(...analyzeMcpConfigs(activeFiles));
     if (config.secrets.enabled) {
         for (const file of activeFiles) {
@@ -24124,7 +24867,10 @@ function summarizeDiffFiles(files, config, pullRequest) {
         sensitiveFilesChanged: activeFiles.filter((file) => matchesAny(file.path, config.sensitivePaths)).length,
         pullRequestDescription: evidence.descriptionState,
         verificationEvidence: evidence.verificationEvidence,
-        reproductionEvidence: evidence.reproductionEvidence
+        reproductionEvidence: evidence.reproductionEvidence,
+        screenshotEvidence: evidence.screenshotEvidence,
+        changelogEvidence: evidence.changelogEvidence,
+        permissionRationaleEvidence: evidence.permissionRationaleEvidence
     };
 }
 function analyzeChangeSize(files) {
@@ -24229,6 +24975,55 @@ function analyzePullRequestEvidence(files, pullRequest) {
     }
     return findings;
 }
+function analyzeEvidenceContracts(files, config, pullRequest) {
+    if (config.evidence.contracts.length === 0) {
+        return [];
+    }
+    const evidence = analyzeEvidence(pullRequest);
+    const hasTestChanges = files.some((file) => isTestPath(file.path));
+    const findings = [];
+    for (const contract of config.evidence.contracts) {
+        const matchedFiles = files.filter((file) => matchesAny(file.path, contract.paths));
+        if (matchedFiles.length === 0) {
+            continue;
+        }
+        const missingRequirements = contract.requires.filter((requirement) => !hasEvidenceRequirement(requirement, evidence, hasTestChanges));
+        if (missingRequirements.length === 0) {
+            continue;
+        }
+        findings.push({
+            ruleId: `evidence-contract:${contract.id}`,
+            title: contract.title ?? "Evidence contract missing",
+            message: `Changed files match evidence contract "${contract.id}", but missing required evidence: ${missingRequirements
+                .map(formatEvidenceRequirement)
+                .join(", ")}.`,
+            severity: contract.severity,
+            path: matchedFiles[0]?.path,
+            evidence: [
+                `matched files: ${matchedFiles.slice(0, 5).map((file) => file.path).join(", ")}`,
+                `missing evidence: ${missingRequirements.map(formatEvidenceRequirement).join(", ")}`
+            ],
+            recommendation: contract.recommendation ??
+                "Ask the contributor to add the missing evidence before spending deep review time."
+        });
+    }
+    return findings;
+}
+function hasEvidenceRequirement(requirement, evidence, hasTestChanges) {
+    if (requirement === "verification") {
+        return evidence.verificationEvidence || hasTestChanges;
+    }
+    if (requirement === "reproduction") {
+        return evidence.reproductionEvidence;
+    }
+    if (requirement === "screenshot") {
+        return evidence.screenshotEvidence;
+    }
+    if (requirement === "changelog") {
+        return evidence.changelogEvidence;
+    }
+    return evidence.permissionRationaleEvidence;
+}
 function analyzeDependencyChanges(files, config) {
     const findings = [];
     for (const file of files.filter((candidate) => isDependencyManifest(candidate.path))) {
@@ -24360,7 +25155,7 @@ function extractMajorVersion(version) {
 function analyzeWorkflowPermissions(files) {
     const findings = [];
     for (const file of files.filter((candidate) => isWorkflowPath(candidate.path))) {
-        const permissionLines = file.addedLines.filter((line) => /permissions:|contents:\s*write|packages:\s*write|id-token:\s*write|pull-requests:\s*write/.test(line.value.trim()));
+        const permissionLines = file.addedLines.filter((line) => isRiskyWorkflowPermissionLine(line.value));
         if (permissionLines.length === 0) {
             continue;
         }
@@ -24376,6 +25171,13 @@ function analyzeWorkflowPermissions(files) {
     }
     return findings;
 }
+function isRiskyWorkflowPermissionLine(value) {
+    const line = value.trim();
+    if (/^permissions:\s*write-all\b/i.test(line)) {
+        return true;
+    }
+    return /^(?:actions|attestations|checks|contents|deployments|discussions|id-token|issues|models|packages|pages|pull-requests|repository-projects|security-events|statuses):\s*write\b/i.test(line);
+}
 function analyzeWorkflowDangerousTriggers(files) {
     const findings = [];
     for (const file of files.filter((candidate) => isWorkflowPath(candidate.path))) {
@@ -24395,6 +25197,33 @@ function analyzeWorkflowDangerousTriggers(files) {
     }
     return findings;
 }
+function analyzeWorkflowUntrustedCheckout(files) {
+    const findings = [];
+    for (const file of files.filter((candidate) => isWorkflowPath(candidate.path))) {
+        const headCheckoutLines = file.addedLines.filter((line) => isPullRequestHeadCheckoutLine(line.value));
+        if (headCheckoutLines.length === 0) {
+            continue;
+        }
+        const hasPullRequestTarget = file.addedLines.some((line) => /\bpull_request_target\b/.test(line.value.trim()));
+        findings.push({
+            ruleId: "workflow-untrusted-checkout",
+            title: "Workflow checks out pull request head",
+            message: hasPullRequestTarget
+                ? `${file.path} combines pull_request_target with pull request head checkout references.`
+                : `${file.path} checks out pull request head references; review the job privilege boundary before merging.`,
+            severity: hasPullRequestTarget ? "high" : "medium",
+            path: file.path,
+            evidence: headCheckoutLines.slice(0, 5).map(formatEvidenceLine),
+            recommendation: "Avoid running untrusted PR code with write tokens, repository secrets, or privileged pull_request_target context."
+        });
+    }
+    return findings;
+}
+function isPullRequestHeadCheckoutLine(value) {
+    const line = value.trim();
+    return (/\bgithub\.head_ref\b/.test(line) ||
+        /\bgithub\.event\.pull_request\.head(?:\.sha|\.ref|\.repo\.full_name)?\b/.test(line));
+}
 function analyzeMcpConfigs(files) {
     const findings = [];
     for (const file of files.filter((candidate) => isMcpConfigPath(candidate.path))) {
@@ -24418,6 +25247,9 @@ function formatEvidenceLine(line) {
     const value = line.value.trim();
     return line.lineNumber ? `line ${line.lineNumber}: ${value}` : value;
 }
+function formatEvidenceRequirement(requirement) {
+    return requirement;
+}
 function sensitivePathSeverity(path) {
     if (matchesAny(path, [
         "**/.env*",
@@ -24492,8 +25324,9 @@ function calculateEvidenceScore(summary, findings) {
         "dependency-lifecycle-script",
         "workflow-permission-change",
         "workflow-dangerous-trigger",
+        "workflow-untrusted-checkout",
         "mcp-credential-risk"
-    ].includes(finding.ruleId));
+    ].includes(finding.ruleId) || finding.ruleId.startsWith("evidence-contract:"));
     if (needsVerificationEvidence && !summary.verificationEvidence) {
         addDeduction("missing-verification", 20, "No test or manual verification evidence was found.");
     }
@@ -24510,6 +25343,9 @@ function calculateEvidenceScore(summary, findings) {
         else if (finding.ruleId === "workflow-dangerous-trigger") {
             addDeduction("workflow-dangerous-trigger", 30, "pull_request_target workflows need privileged trigger review.");
         }
+        else if (finding.ruleId === "workflow-untrusted-checkout") {
+            addDeduction("workflow-untrusted-checkout", finding.severity === "high" ? 30 : 18, "Workflow checkout of pull request head needs privilege-boundary review.");
+        }
         else if (finding.ruleId === "mcp-credential-risk") {
             addDeduction("mcp-credential-risk", 25, "MCP configuration expands local execution or credential risk.");
         }
@@ -24530,6 +25366,9 @@ function calculateEvidenceScore(summary, findings) {
         else if (finding.ruleId === "dependency-lifecycle-script") {
             addDeduction("dependency-lifecycle-script", 25, "Package lifecycle scripts can run during install or publish.");
         }
+        else if (finding.ruleId.startsWith("evidence-contract:")) {
+            addDeduction("evidence-contract-missing", finding.severity === "high" ? 25 : 15, "Configured evidence contract was not satisfied.");
+        }
         else if (finding.ruleId === "missing-tests") {
             addDeduction("missing-tests", finding.severity === "medium" ? 20 : 12, "Code changed without test changes or verification notes.");
         }
@@ -24558,6 +25397,15 @@ function collectEvidenceStrengths(summary) {
     if (summary.reproductionEvidence) {
         strengths.push("Reproduction or before/after context was found.");
     }
+    if (summary.screenshotEvidence) {
+        strengths.push("Screenshot or visual evidence was found.");
+    }
+    if (summary.changelogEvidence) {
+        strengths.push("Changelog or migration evidence was found.");
+    }
+    if (summary.permissionRationaleEvidence) {
+        strengths.push("Permission rationale evidence was found.");
+    }
     if (summary.testFilesChanged > 0) {
         strengths.push("Test files changed with the PR.");
     }
@@ -24582,6 +25430,7 @@ function calculateReviewDecision(risk, evidenceScore, findings) {
     const hasBlockingSecurityFinding = findings.some((finding) => finding.ruleId.startsWith("secret-detected") ||
         finding.ruleId === "workflow-permission-change" ||
         finding.ruleId === "workflow-dangerous-trigger" ||
+        (finding.ruleId === "workflow-untrusted-checkout" && finding.severity === "high") ||
         finding.ruleId === "dependency-lifecycle-script" ||
         finding.ruleId === "mcp-credential-risk");
     if (hasBlockingSecurityFinding || evidenceScore.value < 50 || risk === "high") {
@@ -24714,6 +25563,17 @@ function reviewActionsForFinding(finding) {
             }
         ];
     }
+    if (finding.ruleId.startsWith("evidence-contract:")) {
+        return [
+            {
+                actionId: "satisfy-evidence-contract",
+                title: "Ask for the configured evidence contract to be satisfied.",
+                detail: "The PR matches a repository-defined evidence contract but is missing required proof in the PR description.",
+                priority: finding.severity === "high" ? "high" : "medium",
+                relatedRuleIds: [finding.ruleId]
+            }
+        ];
+    }
     if (finding.ruleId === "workflow-permission-change") {
         return [
             {
@@ -24736,6 +25596,17 @@ function reviewActionsForFinding(finding) {
             }
         ];
     }
+    if (finding.ruleId === "workflow-untrusted-checkout") {
+        return [
+            {
+                actionId: "review-untrusted-checkout",
+                title: "Review pull request head checkout privileges.",
+                detail: "Confirm the job does not run untrusted PR code with write tokens, repository secrets, or pull_request_target privileges.",
+                priority: finding.severity === "high" ? "high" : "medium",
+                relatedRuleIds: [finding.ruleId]
+            }
+        ];
+    }
     if (finding.ruleId === "dependency-lifecycle-script") {
         return [
             {
@@ -24864,7 +25735,7 @@ const build_program = new Command();
 build_program
     .name("proof-pr")
     .description("Review pull request evidence, scope, and safety before maintainers spend time on it.")
-    .version("0.1.6");
+    .version("0.1.7");
 build_program
     .command("scan", { isDefault: true })
     .description("Scan a git diff and print a ProofPR report.")
@@ -24875,7 +25746,7 @@ build_program
     .option("--pr-body <body>", "Pull request body used for evidence checks.")
     .option("--pr-body-file <path>", "Read a pull request body from a Markdown file.")
     .option("--config <path>", "Path to .proofpr.yml.", ".proofpr.yml")
-    .option("--format <format>", "Output format: markdown, json, or sarif.", parseFormat, "markdown")
+    .option("--format <format>", "Output format: markdown, json, sarif, or html.", parseFormat, "markdown")
     .option("--locale <locale>", "Report language: en or zh-CN.")
     .option("--fail-on <level>", "Exit with code 1 on risk level: low, medium, high, or never.", parseFailLevel, "never")
     .action(async (options) => {
@@ -24908,6 +25779,35 @@ build_program
     await writeIfMissing(options.workflowPath, renderWorkflowTemplate(options.failOn), options.force);
     process.stdout.write(`ProofPR initialized:\n- ${options.configPath}\n- ${options.workflowPath}\n`);
 });
+build_program
+    .command("benchmark")
+    .description("Run ProofPR benchmark cases and compare expected risk/finding output.")
+    .option("--cases <dir>", "Directory containing benchmark case JSON files.", "benchmarks/cases")
+    .option("--format <format>", "Output format: text, markdown, or json.", parseBenchmarkFormat, "text")
+    .option("--output <path>", "Write benchmark output to a file instead of stdout.")
+    .action(async (options) => {
+    const report = await runBenchmarks(options.cases);
+    let output;
+    if (options.format === "json") {
+        output = `${JSON.stringify(report, null, 2)}\n`;
+    }
+    else if (options.format === "markdown") {
+        output = renderBenchmarkMarkdown(report);
+    }
+    else {
+        output = renderBenchmarkText(report);
+    }
+    if (options.output) {
+        await writeOutput(options.output, output);
+        process.stdout.write(`ProofPR benchmark report written to ${options.output}\n`);
+    }
+    else {
+        process.stdout.write(output);
+    }
+    if (report.results.some((result) => !result.passed)) {
+        process.exitCode = 1;
+    }
+});
 build_program.parseAsync(process.argv).catch((error) => {
     const message = error instanceof Error ? error.message : String(error);
     process.stderr.write(`ProofPR failed: ${message}\n`);
@@ -24934,6 +25834,10 @@ async function writeIfMissing(path, contents, force) {
     await (0,promises_namespaceObject.mkdir)((0,external_node_path_.dirname)(path), { recursive: true });
     await (0,promises_namespaceObject.writeFile)(path, contents, "utf8");
 }
+async function writeOutput(path, contents) {
+    await (0,promises_namespaceObject.mkdir)((0,external_node_path_.dirname)(path), { recursive: true });
+    await (0,promises_namespaceObject.writeFile)(path, contents, "utf8");
+}
 async function pathExists(path) {
     try {
         await (0,promises_namespaceObject.access)(path);
@@ -24993,6 +25897,18 @@ comment:
 #   flagNewPackages: true
 #   flagMajorUpgrades: true
 #   flagLifecycleScripts: true
+#
+# evidence:
+#   contracts:
+#     - id: ui-screenshot
+#       title: UI changes need screenshots
+#       paths:
+#         - "src/components/**"
+#         - "app/**"
+#       requires:
+#         - screenshot
+#         - verification
+#       severity: medium
 `;
 }
 function renderWorkflowTemplate(failOn) {
@@ -25011,7 +25927,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - uses: linsk27/proof-pr@v0.1.6
+      - uses: linsk27/proof-pr@v0.1.7
         with:
           fail-on: ${failOn}
           comment: "true"
@@ -25025,13 +25941,172 @@ function renderOutput(result, format, locale) {
     if (format === "sarif") {
         return renderSarifReport(result);
     }
+    if (format === "html") {
+        return renderHtmlReport(result, locale);
+    }
     return renderMarkdownReport(result, locale);
 }
+async function runBenchmarks(casesDir) {
+    const root = (0,external_node_path_.resolve)(casesDir);
+    const entries = await (0,promises_namespaceObject.readdir)(root, { withFileTypes: true });
+    const caseFiles = entries
+        .filter((entry) => entry.isFile() && entry.name.endsWith(".json"))
+        .map((entry) => (0,external_node_path_.resolve)(root, entry.name))
+        .sort();
+    const results = [];
+    for (const caseFile of caseFiles) {
+        const testCase = JSON.parse(await (0,promises_namespaceObject.readFile)(caseFile, "utf8"));
+        const diffText = await (0,promises_namespaceObject.readFile)((0,external_node_path_.resolve)((0,external_node_path_.dirname)(caseFile), testCase.diffFile), "utf8");
+        const result = scanDiff(diffText, {
+            config: testCase.config,
+            pullRequest: testCase.pullRequest
+        });
+        const actualFindings = result.findings.map((finding) => finding.ruleId);
+        const failures = [];
+        if (testCase.expect.risk && result.risk !== testCase.expect.risk) {
+            failures.push(`expected risk ${testCase.expect.risk}, got ${result.risk}`);
+        }
+        if (testCase.expect.reviewDecision && result.reviewDecision !== testCase.expect.reviewDecision) {
+            failures.push(`expected review decision ${testCase.expect.reviewDecision}, got ${result.reviewDecision}`);
+        }
+        for (const expectedFinding of testCase.expect.findings ?? []) {
+            if (!matchesFindingExpectation(actualFindings, expectedFinding)) {
+                failures.push(`expected finding ${expectedFinding}`);
+            }
+        }
+        for (const absentFinding of testCase.expect.absentFindings ?? []) {
+            if (matchesFindingExpectation(actualFindings, absentFinding)) {
+                failures.push(`unexpected finding ${absentFinding}`);
+            }
+        }
+        results.push({
+            id: testCase.id,
+            title: testCase.title,
+            category: testCase.category ?? "uncategorized",
+            passed: failures.length === 0,
+            failures,
+            actual: {
+                risk: result.risk,
+                reviewDecision: result.reviewDecision,
+                findings: actualFindings
+            }
+        });
+    }
+    return {
+        summary: summarizeBenchmarkResults(results),
+        results
+    };
+}
+function summarizeBenchmarkResults(results) {
+    const passed = results.filter((result) => result.passed).length;
+    const categories = new Map();
+    const findingCounts = new Map();
+    for (const result of results) {
+        const categoryResults = categories.get(result.category) ?? [];
+        categoryResults.push(result);
+        categories.set(result.category, categoryResults);
+        for (const finding of new Set(result.actual.findings)) {
+            findingCounts.set(finding, (findingCounts.get(finding) ?? 0) + 1);
+        }
+    }
+    return {
+        total: results.length,
+        passed,
+        failed: results.length - passed,
+        passRate: ratio(passed, results.length),
+        categories: [...categories.entries()]
+            .sort(([left], [right]) => left.localeCompare(right))
+            .map(([category, items]) => {
+            const categoryPassed = items.filter((item) => item.passed).length;
+            return {
+                category,
+                total: items.length,
+                passed: categoryPassed,
+                failed: items.length - categoryPassed,
+                passRate: ratio(categoryPassed, items.length)
+            };
+        }),
+        findingCounts: [...findingCounts.entries()]
+            .sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]))
+            .map(([ruleId, count]) => ({ ruleId, count }))
+    };
+}
+function renderBenchmarkText(report) {
+    const lines = [
+        "ProofPR benchmark",
+        "",
+        `Summary: ${report.summary.passed}/${report.summary.total} passed (${formatPercent(report.summary.passRate)})`,
+        ""
+    ];
+    lines.push("Categories:");
+    for (const category of report.summary.categories) {
+        lines.push(`- ${category.category}: ${category.passed}/${category.total} passed (${formatPercent(category.passRate)})`);
+    }
+    if (report.summary.findingCounts.length > 0) {
+        lines.push("", "Finding coverage:");
+        for (const item of report.summary.findingCounts) {
+            lines.push(`- ${item.ruleId}: ${item.count}`);
+        }
+    }
+    lines.push("");
+    for (const result of report.results) {
+        lines.push(`${result.passed ? "PASS" : "FAIL"} ${result.id}${result.title ? ` - ${result.title}` : ""}`);
+        for (const failure of result.failures) {
+            lines.push(`  - ${failure}`);
+        }
+    }
+    lines.push("");
+    return lines.join("\n");
+}
+function renderBenchmarkMarkdown(report) {
+    const lines = [
+        "# ProofPR Benchmark",
+        "",
+        `**Summary:** ${report.summary.passed}/${report.summary.total} passed (${formatPercent(report.summary.passRate)})`,
+        "",
+        "## Categories",
+        "",
+        "| Category | Passed | Total | Pass rate |",
+        "| --- | ---: | ---: | ---: |"
+    ];
+    for (const category of report.summary.categories) {
+        lines.push(`| ${category.category} | ${category.passed} | ${category.total} | ${formatPercent(category.passRate)} |`);
+    }
+    lines.push("", "## Finding Coverage", "", "| Rule | Cases |", "| --- | ---: |");
+    for (const item of report.summary.findingCounts) {
+        lines.push(`| \`${item.ruleId}\` | ${item.count} |`);
+    }
+    lines.push("", "## Cases", "", "| Result | Case | Category | Actual risk | Gate |", "| --- | --- | --- | --- | --- |");
+    for (const result of report.results) {
+        lines.push(`| ${result.passed ? "PASS" : "FAIL"} | \`${result.id}\` | ${result.category} | ${result.actual.risk} | ${result.actual.reviewDecision} |`);
+    }
+    lines.push("");
+    return lines.join("\n");
+}
+function ratio(value, total) {
+    return total === 0 ? 0 : value / total;
+}
+function formatPercent(value) {
+    return `${Math.round(value * 100)}%`;
+}
+function matchesFindingExpectation(actualFindings, expected) {
+    if (expected.endsWith("*")) {
+        const prefix = expected.slice(0, -1);
+        return actualFindings.some((finding) => finding.startsWith(prefix));
+    }
+    return actualFindings.includes(expected);
+}
 function parseFormat(value) {
-    if (value === "json" || value === "markdown" || value === "sarif") {
+    if (value === "json" || value === "markdown" || value === "sarif" || value === "html") {
+        return value;
+    }
+    throw new InvalidArgumentError("format must be one of: markdown, json, sarif, html");
+}
+function parseBenchmarkFormat(value) {
+    if (value === "text" || value === "json" || value === "markdown") {
         return value;
     }
-    throw new InvalidArgumentError("format must be one of: markdown, json, sarif");
+    throw new InvalidArgumentError("benchmark format must be one of: text, markdown, json");
 }
 function parseFailLevel(value) {
     if (value === "low" || value === "medium" || value === "high" || value === "never") {