npm - proof-pr - Versions diffs - 0.1.5 → 0.1.7 - Mend

proof-pr 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -9,7 +9,7 @@ ProofPR 帮助维护者在投入深入 review 之前，先检查 PR 的证据、
 作为 GitHub Action 使用时，ProofPR 默认在 PR 打开、PR 分支更新、PR 重新打开时运行。普通分支 push 不会单独生成报告。
 报告会出现在 PR 评论区、GitHub Actions job summary 和 PR checks 状态里。
-`v0.1.5` 起还可以输出 GitHub annotations，并通过 `sarif-output` 写出 SARIF 文件。
+`v0.1.5` 起还可以输出 GitHub annotations，并通过 `sarif-output` 写出 SARIF 文件。当前版本还会识别依赖大版本升级、包生命周期脚本和 `pull_request_target` workflow 触发器。
 ## 使用
@@ -21,6 +21,7 @@ npx proof-pr@latest init --preset security-strict
 npx proof-pr@latest scan --base origin/main --head HEAD
 npx proof-pr@latest scan --base origin/main --head HEAD --locale zh-CN
 npx proof-pr@latest scan --base origin/main --pr-body-file pr-body.md --format json
+npx proof-pr@latest benchmark --cases benchmarks/cases
 ```
 可用预设：`balanced`、`open-source-maintainer`、`security-strict`、`ai-generated-pr`、`mcp-security`、`dependency-careful`。

package/dist/index.js CHANGED Viewed

@@ -23111,7 +23111,15 @@ function preprocess(fn, schema) {
 const riskLevelSchema = schemas_enum(["low", "medium", "high"]);
+const findingSeveritySchema = schemas_enum(["info", "low", "medium", "high"]);
 const localeSchema = schemas_enum(["en", "zh-CN"]);
+const evidenceRequirementSchema = schemas_enum([
+    "verification",
+    "reproduction",
+    "screenshot",
+    "changelog",
+    "permission-rationale"
+]);
 const configPresetSchema = schemas_enum([
     "balanced",
     "open-source-maintainer",
@@ -23149,6 +23157,38 @@ const DEFAULT_SENSITIVE_PATHS = [
     "go.sum"
 ];
 const DEFAULT_TEST_PATHS = ["src/**", "packages/**/src/**", "app/**", "lib/**"];
+const WORKFLOW_EVIDENCE_CONTRACTS = [
+    {
+        id: "workflow-permission-rationale",
+        title: "Workflow changes need a permission rationale",
+        paths: [".github/workflows/**", ".github/actions/**"],
+        requires: ["verification", "permission-rationale"],
+        severity: "high",
+        recommendation: "Explain why the workflow needs this trigger or permission, and include verification that untrusted PR code cannot reach privileged tokens."
+    }
+];
+const DEPENDENCY_EVIDENCE_CONTRACTS = [
+    {
+        id: "dependency-upgrade-evidence",
+        title: "Dependency changes need upgrade evidence",
+        paths: [
+            "package.json",
+            "**/package.json",
+            "pnpm-lock.yaml",
+            "package-lock.json",
+            "yarn.lock",
+            "requirements.txt",
+            "**/requirements.txt",
+            "pyproject.toml",
+            "**/pyproject.toml",
+            "go.mod",
+            "**/go.mod"
+        ],
+        requires: ["verification", "changelog"],
+        severity: "medium",
+        recommendation: "Link changelog or migration notes and include the test command or CI evidence used to validate the dependency change."
+    }
+];
 const PRESET_DEFAULTS = {
     balanced: {},
     "open-source-maintainer": {
@@ -23179,6 +23219,9 @@ const PRESET_DEFAULTS = {
         requireTests: {
             enabled: true,
             paths: ["src/**", "packages/**/src/**", "app/**", "lib/**", "server/**", "api/**"]
+        },
+        evidence: {
+            contracts: WORKFLOW_EVIDENCE_CONTRACTS
         }
     },
     "ai-generated-pr": {
@@ -23217,9 +23260,20 @@ const PRESET_DEFAULTS = {
         requireTests: {
             enabled: true,
             paths: DEFAULT_TEST_PATHS
+        },
+        evidence: {
+            contracts: DEPENDENCY_EVIDENCE_CONTRACTS
         }
     }
 };
+const evidenceContractSchema = object({
+    id: schemas_string().min(1),
+    title: schemas_string().min(1).optional(),
+    paths: array(schemas_string().min(1)).min(1),
+    requires: array(evidenceRequirementSchema).min(1),
+    severity: findingSeveritySchema.default("medium"),
+    recommendation: schemas_string().min(1).optional()
+});
 const configSchema = object({
     preset: configPresetSchema.default("balanced"),
     locale: localeSchema.default("en"),
@@ -23234,9 +23288,14 @@ const configSchema = object({
     secrets: object({ enabled: schemas_boolean().default(true) }).default({ enabled: true }),
     dependencies: object({
         flagNewPackages: schemas_boolean().default(true),
-        flagMajorUpgrades: schemas_boolean().default(true)
+        flagMajorUpgrades: schemas_boolean().default(true),
+        flagLifecycleScripts: schemas_boolean().default(true)
+    })
+        .default({ flagNewPackages: true, flagMajorUpgrades: true, flagLifecycleScripts: true }),
+    evidence: object({
+        contracts: array(evidenceContractSchema).default([])
     })
-        .default({ flagNewPackages: true, flagMajorUpgrades: true }),
+        .default({ contracts: [] }),
     comment: object({ enabled: schemas_boolean().default(true) }).default({ enabled: true })
 });
 function parseConfig(input) {
@@ -23374,6 +23433,9 @@ function renderEnglishMarkdownReport(result) {
         `- PR description: ${result.summary.pullRequestDescription}`,
         `- Verification evidence: ${formatBoolean(result.summary.verificationEvidence)}`,
         `- Reproduction context: ${formatBoolean(result.summary.reproductionEvidence)}`,
+        `- Screenshot evidence: ${formatBoolean(result.summary.screenshotEvidence)}`,
+        `- Changelog evidence: ${formatBoolean(result.summary.changelogEvidence)}`,
+        `- Permission rationale: ${formatBoolean(result.summary.permissionRationaleEvidence)}`,
         ""
     ];
     appendEvidenceScoreSection(lines, result, "en");
@@ -23408,6 +23470,9 @@ function renderChineseMarkdownReport(result) {
         `- PR 描述质量：${translateDescriptionState(result.summary.pullRequestDescription)}`,
         `- 验证证据：${formatChineseBoolean(result.summary.verificationEvidence)}`,
         `- 复现上下文：${formatChineseBoolean(result.summary.reproductionEvidence)}`,
+        `- 截图或视觉证据：${formatChineseBoolean(result.summary.screenshotEvidence)}`,
+        `- Changelog 或迁移证据：${formatChineseBoolean(result.summary.changelogEvidence)}`,
+        `- 权限理由证据：${formatChineseBoolean(result.summary.permissionRationaleEvidence)}`,
         ""
     ];
     appendEvidenceScoreSection(lines, result, "zh-CN");
@@ -23518,6 +23583,11 @@ function maintainerFocus(findings, locale) {
                 ? "轮换任何可能暴露的凭证，并在移除 secret 前阻止合并。"
                 : "Rotate any exposed credential and block the PR until secrets are removed.");
         }
+        else if (finding.ruleId.startsWith("evidence-contract:")) {
+            focus.add(locale === "zh-CN"
+                ? "先要求贡献者补齐仓库定义的证据契约，再投入深度 review。"
+                : "Ask the contributor to satisfy the repository-defined evidence contract before deep review.");
+        }
         else if (finding.ruleId === "workflow-permission-change") {
             focus.add(locale === "zh-CN"
                 ? "合并前重点审查 GitHub Actions 权限。"
@@ -23543,6 +23613,26 @@ function maintainerFocus(findings, locale) {
                 ? "要求拆分 PR，或提供逐文件 review 指南。"
                 : "Request a smaller PR or a file-by-file review guide.");
         }
+        else if (finding.ruleId === "dependency-major-upgrade") {
+            focus.add(locale === "zh-CN"
+                ? "重点核查依赖大版本升级的迁移说明、兼容性和测试覆盖。"
+                : "Review dependency major upgrade migration notes, compatibility, and test coverage.");
+        }
+        else if (finding.ruleId === "dependency-lifecycle-script") {
+            focus.add(locale === "zh-CN"
+                ? "合并前审查包生命周期脚本是否会在安装或发布时执行非预期代码。"
+                : "Review package lifecycle scripts for unexpected install or publish-time execution.");
+        }
+        else if (finding.ruleId === "workflow-dangerous-trigger") {
+            focus.add(locale === "zh-CN"
+                ? "重点审查 pull_request_target 是否会用高权限 token 执行不可信 PR 代码。"
+                : "Review whether pull_request_target can execute untrusted PR code with privileged tokens.");
+        }
+        else if (finding.ruleId === "workflow-untrusted-checkout") {
+            focus.add(locale === "zh-CN"
+                ? "重点审查 workflow 是否 checkout 并执行了不可信 PR head 代码。"
+                : "Review whether the workflow checks out and executes untrusted PR head code.");
+        }
         else if (finding.ruleId === "mcp-credential-risk") {
             focus.add(locale === "zh-CN"
                 ? "重点审查 MCP command、args 和凭证处理方式。"
@@ -23557,6 +23647,13 @@ function maintainerFocus(findings, locale) {
     return [...focus];
 }
 function translateFinding(finding) {
+    if (finding.ruleId.startsWith("evidence-contract:")) {
+        return {
+            title: "证据契约未满足",
+            message: "该 PR 命中了仓库自定义证据契约，但 PR 描述中缺少必需证据。",
+            recommendation: "建议要求贡献者补齐缺失证据后再深入 review。"
+        };
+    }
     if (finding.ruleId === "change-size") {
         const files = finding.evidence?.find((item) => item.startsWith("files: "))?.replace("files: ", "");
         const lines = finding.evidence?.find((item) => item.startsWith("changed lines: "))?.replace("changed lines: ", "");
@@ -23604,6 +23701,20 @@ function translateFinding(finding) {
             recommendation: "请确认包名、许可证、来源可信度，以及 lockfile 是否匹配预期依赖变化。"
         };
     }
+    if (finding.ruleId === "dependency-major-upgrade") {
+        return {
+            title: "依赖发生大版本升级",
+            message: finding.path ? `${finding.path} 中有依赖跨越了大版本边界。` : finding.message,
+            recommendation: "请核查 changelog、迁移说明、peer dependencies 影响，以及测试是否覆盖升级后的关键路径。"
+        };
+    }
+    if (finding.ruleId === "dependency-lifecycle-script") {
+        return {
+            title: "包生命周期脚本发生变更",
+            message: finding.path ? `${finding.path} 新增或修改了安装/发布阶段可能自动执行的脚本。` : finding.message,
+            recommendation: "请确认该脚本是否必要，是否下载或执行远程代码，以及是否会影响安装该包的用户。"
+        };
+    }
     if (finding.ruleId === "workflow-permission-change") {
         return {
             title: "Workflow 权限发生变更",
@@ -23611,6 +23722,22 @@ function translateFinding(finding) {
             recommendation: "请确认 workflow 是否真的需要写权限或 token 权限，并检查不可信 PR 是否能触达该 workflow。"
         };
     }
+    if (finding.ruleId === "workflow-dangerous-trigger") {
+        return {
+            title: "Workflow 使用了 pull_request_target",
+            message: finding.path ? `${finding.path} 新增了 pull_request_target 触发器。` : finding.message,
+            recommendation: "请确认该 workflow 不会用高权限 token、secret 或写权限执行不可信 PR 代码。"
+        };
+    }
+    if (finding.ruleId === "workflow-untrusted-checkout") {
+        return {
+            title: "Workflow checkout 了 PR head",
+            message: finding.path
+                ? `${finding.path} 引用了 PR head 代码来源，需要审查它是否会在高权限上下文中执行。`
+                : finding.message,
+            recommendation: "避免在 pull_request_target、写权限 token 或可读取 secret 的上下文中运行不可信 PR 代码。"
+        };
+    }
     if (finding.ruleId === "mcp-credential-risk") {
         return {
             title: "MCP 配置需要重点审查",
@@ -23629,8 +23756,15 @@ function translateFinding(finding) {
 }
 function translateEvidence(item) {
     return item
+        .replace("matched files: ", "命中文件：")
+        .replace("missing evidence: ", "缺失证据：")
         .replace("files: ", "文件数：")
         .replace("changed lines: ", "变更行数：")
+        .replace(/\bverification\b/g, "验证")
+        .replace(/\breproduction\b/g, "复现")
+        .replace(/\bscreenshot\b/g, "截图")
+        .replace(/\bchangelog\b/g, "变更日志")
+        .replace(/\bpermission-rationale\b/g, "权限理由")
         .replace("line ", "第 ")
         .replace(": ", " 行：");
 }
@@ -23682,14 +23816,19 @@ function translateReviewActionTitle(actionId, fallback) {
         "ask-for-evidence-before-review": "深入 review 前先要求补充证据",
         "review-with-focus": "带着重点清单进行 review",
         "normal-review": "进入常规 review",
+        "satisfy-evidence-contract": "要求补齐证据契约",
         "improve-pr-description": "要求补充更清楚的 PR 描述",
         "add-verification-evidence": "要求补充测试或手动验证证据",
         "add-reproduction-context": "要求补充复现或 before/after 上下文",
         "rotate-secret": "轮换并移除暴露的凭证",
         "justify-workflow-permissions": "要求说明 workflow 权限最小化理由",
+        "review-privileged-pr-trigger": "审查 pull_request_target 高权限触发器",
+        "review-untrusted-checkout": "审查 PR head checkout 的权限边界",
+        "review-package-lifecycle-script": "审查包生命周期脚本",
         "review-mcp-execution-surface": "审查 MCP 命令、参数和凭证处理",
         "request-review-map-or-split": "要求拆分 PR 或提供逐文件 review map",
         "verify-dependency-change": "核查依赖来源和 lockfile 影响",
+        "review-major-dependency-upgrade": "核查依赖大版本升级影响",
         "assign-sensitive-file-review": "安排敏感文件重点 review"
     }[actionId] ?? fallback;
 }
@@ -23699,23 +23838,35 @@ function translateReviewActionDetail(actionId, fallback) {
         "ask-for-evidence-before-review": "要求测试、截图、复现步骤或更清楚的 PR 描述，再投入详细 review。",
         "review-with-focus": "优先使用下面的风险发现和重点文件作为第一轮 review map。",
         "normal-review": "当前证据足够支撑维护者进行常规 review。",
+        "satisfy-evidence-contract": "该 PR 命中了仓库自定义证据契约，但 PR 描述里缺少必需证据。",
         "improve-pr-description": "贡献者应说明为什么改、改了什么、如何验证，以及是否有发布或兼容性风险。",
         "add-verification-evidence": "要求测试输出、CI 链接、截图，或简短的手动验证说明。",
         "add-reproduction-context": "PR 应包含复现步骤、预期/实际行为，或相关 before/after 截图。",
         "rotate-secret": "在 secret 从 PR 中移除并完成轮换前，不要合并。",
         "justify-workflow-permissions": "确认写权限或 OIDC 是否必要，并检查不可信 PR 是否能触发该 workflow。",
+        "review-privileged-pr-trigger": "确认 workflow 不会用写权限 token、secret 或仓库权限执行不可信 PR 代码。",
+        "review-untrusted-checkout": "确认 job 不会在写权限 token、仓库 secret 或 pull_request_target 高权限上下文中运行不可信 PR 代码。",
+        "review-package-lifecycle-script": "检查 install、postinstall、prepare 或 publish 脚本是否会执行非预期代码。",
         "review-mcp-execution-surface": "检查 MCP 配置是否提交凭证，或意外扩大本地执行面。",
         "request-review-map-or-split": "要求贡献者拆分无关改动，或标出最需要重点 review 的文件。",
         "verify-dependency-change": "检查包名、维护者、许可证、安装脚本，以及 lockfile 是否符合预期依赖变化。",
+        "review-major-dependency-upgrade": "检查 changelog、迁移说明、peer dependencies，以及测试是否覆盖升级后的关键路径。",
         "assign-sensitive-file-review": "合并前由维护者有意识地检查敏感文件改动。"
     }[actionId] ?? fallback;
 }
 function translateFocusReason(reasonId, fallback) {
+    if (reasonId.startsWith("evidence-contract:")) {
+        return "仓库自定义证据契约未满足";
+    }
     return {
         "change-size": "review 面积相关 finding",
         "sensitive-path": "敏感路径发生变更",
         "dependency-added": "依赖清单发生变更",
+        "dependency-major-upgrade": "依赖发生大版本升级",
+        "dependency-lifecycle-script": "包生命周期脚本发生变更",
         "workflow-permission-change": "workflow 权限发生变更",
+        "workflow-dangerous-trigger": "workflow 使用了高风险触发器",
+        "workflow-untrusted-checkout": "workflow checkout 了不可信 PR head",
         "mcp-credential-risk": "MCP 配置存在执行面或凭证风险",
         "missing-tests": "代码改动缺少测试或验证证据"
     }[reasonId] ?? fallback;
@@ -23725,6 +23876,9 @@ function translateScoreMessage(message) {
         "PR description provides review context.": "PR 描述提供了 review 上下文。",
         "Verification evidence was found.": "检测到测试或手动验证证据。",
         "Reproduction or before/after context was found.": "检测到复现步骤或 before/after 上下文。",
+        "Screenshot or visual evidence was found.": "检测到截图或视觉证据。",
+        "Changelog or migration evidence was found.": "检测到 changelog 或迁移证据。",
+        "Permission rationale evidence was found.": "检测到权限理由证据。",
         "Test files changed with the PR.": "PR 同时修改了测试文件。",
         "No configured sensitive files changed.": "没有改动已配置的敏感文件。"
     }[message] ?? message;
@@ -23744,6 +23898,11 @@ function translateDeduction(reasonId, fallback) {
         "sensitive-path-high": "高敏感文件发生变更，需要重点 review。",
         "sensitive-path-medium": "敏感文件发生变更，需要重点 review。",
         "dependency-change": "依赖清单发生变更。",
+        "dependency-major-upgrade": "依赖发生大版本升级。",
+        "dependency-lifecycle-script": "包生命周期脚本可能在安装或发布阶段执行代码。",
+        "workflow-dangerous-trigger": "pull_request_target workflow 需要重点审查高权限触发路径。",
+        "workflow-untrusted-checkout": "Workflow checkout PR head 代码，需要审查权限边界。",
+        "evidence-contract-missing": "仓库自定义证据契约未满足。",
         "missing-tests": "代码发生变更，但缺少测试变更或验证说明。"
     }[reasonId] ?? fallback;
 }
@@ -23859,12 +24018,27 @@ const REPRODUCTION_PATTERNS = [
     /\b(?:before|after|expected|actual)\b/i,
     /复现|重现|复现步骤|期望|实际/
 ];
+const SCREENSHOT_PATTERNS = [
+    /\b(?:screenshot|screen shot|screen recording|recording|gif|image|before\/after)\b/i,
+    /截图|录屏|效果图|前后对比|对比图/
+];
+const CHANGELOG_PATTERNS = [
+    /\b(?:changelog|release notes?|migration guide|breaking changes?|upgrade guide)\b/i,
+    /变更日志|发布说明|迁移指南|升级说明|破坏性变更|兼容性/
+];
+const PERMISSION_RATIONALE_PATTERNS = [
+    /\b(?:least privilege|permission rationale|write permission|oidc|id-token|trusted workflow|untrusted pr|token scope)\b/i,
+    /权限理由|最小权限|写权限|OIDC|id-token|不可信 PR|高权限|token 权限|凭证权限/
+];
 function analyzeEvidence(context) {
     if (!context) {
         return {
             descriptionState: "unavailable",
             verificationEvidence: false,
-            reproductionEvidence: false
+            reproductionEvidence: false,
+            screenshotEvidence: false,
+            changelogEvidence: false,
+            permissionRationaleEvidence: false
         };
     }
     const text = [context.title ?? "", context.body ?? ""].join("\n").trim();
@@ -23872,7 +24046,10 @@ function analyzeEvidence(context) {
     return {
         descriptionState: descriptionState(body),
         verificationEvidence: matchesAnyPattern(text, VERIFICATION_PATTERNS),
-        reproductionEvidence: matchesAnyPattern(text, REPRODUCTION_PATTERNS)
+        reproductionEvidence: matchesAnyPattern(text, REPRODUCTION_PATTERNS),
+        screenshotEvidence: matchesAnyPattern(text, SCREENSHOT_PATTERNS),
+        changelogEvidence: matchesAnyPattern(text, CHANGELOG_PATTERNS),
+        permissionRationaleEvidence: matchesAnyPattern(text, PERMISSION_RATIONALE_PATTERNS)
     };
 }
 function descriptionState(body) {
@@ -24053,8 +24230,11 @@ function analyzeDiffFiles(files, config, pullRequest) {
     findings.push(...analyzeSensitivePaths(activeFiles, config));
     findings.push(...analyzeMissingTests(activeFiles, config, pullRequest));
     findings.push(...analyzePullRequestEvidence(activeFiles, pullRequest));
+    findings.push(...analyzeEvidenceContracts(activeFiles, config, pullRequest));
     findings.push(...analyzeDependencyChanges(activeFiles, config));
     findings.push(...analyzeWorkflowPermissions(activeFiles));
+    findings.push(...analyzeWorkflowDangerousTriggers(activeFiles));
+    findings.push(...analyzeWorkflowUntrustedCheckout(activeFiles));
     findings.push(...analyzeMcpConfigs(activeFiles));
     if (config.secrets.enabled) {
         for (const file of activeFiles) {
@@ -24074,7 +24254,10 @@ function summarizeDiffFiles(files, config, pullRequest) {
         sensitiveFilesChanged: activeFiles.filter((file) => matchesAny(file.path, config.sensitivePaths)).length,
         pullRequestDescription: evidence.descriptionState,
         verificationEvidence: evidence.verificationEvidence,
-        reproductionEvidence: evidence.reproductionEvidence
+        reproductionEvidence: evidence.reproductionEvidence,
+        screenshotEvidence: evidence.screenshotEvidence,
+        changelogEvidence: evidence.changelogEvidence,
+        permissionRationaleEvidence: evidence.permissionRationaleEvidence
     };
 }
 function analyzeChangeSize(files) {
@@ -24179,55 +24362,187 @@ function analyzePullRequestEvidence(files, pullRequest) {
     }
     return findings;
 }
-function analyzeDependencyChanges(files, config) {
-    if (!config.dependencies.flagNewPackages) {
+function analyzeEvidenceContracts(files, config, pullRequest) {
+    if (config.evidence.contracts.length === 0) {
         return [];
     }
+    const evidence = analyzeEvidence(pullRequest);
+    const hasTestChanges = files.some((file) => isTestPath(file.path));
     const findings = [];
-    for (const file of files.filter((candidate) => isDependencyManifest(candidate.path))) {
-        const addedDependencyLines = file.addedLines.filter((line) => isDependencyLikeAddition(file.path, line.value.trim()));
-        if (addedDependencyLines.length === 0) {
+    for (const contract of config.evidence.contracts) {
+        const matchedFiles = files.filter((file) => matchesAny(file.path, contract.paths));
+        if (matchedFiles.length === 0) {
+            continue;
+        }
+        const missingRequirements = contract.requires.filter((requirement) => !hasEvidenceRequirement(requirement, evidence, hasTestChanges));
+        if (missingRequirements.length === 0) {
             continue;
         }
         findings.push({
-            ruleId: "dependency-added",
-            title: "Dependency manifest changed",
-            message: `${file.path} adds or changes dependency-like entries.`,
-            severity: "medium",
-            path: file.path,
-            evidence: addedDependencyLines.slice(0, 5).map(formatEvidenceLine),
-            recommendation: "Verify package names, licenses, provenance, and whether the lockfile matches the intended dependency change."
+            ruleId: `evidence-contract:${contract.id}`,
+            title: contract.title ?? "Evidence contract missing",
+            message: `Changed files match evidence contract "${contract.id}", but missing required evidence: ${missingRequirements
+                .map(formatEvidenceRequirement)
+                .join(", ")}.`,
+            severity: contract.severity,
+            path: matchedFiles[0]?.path,
+            evidence: [
+                `matched files: ${matchedFiles.slice(0, 5).map((file) => file.path).join(", ")}`,
+                `missing evidence: ${missingRequirements.map(formatEvidenceRequirement).join(", ")}`
+            ],
+            recommendation: contract.recommendation ??
+                "Ask the contributor to add the missing evidence before spending deep review time."
         });
     }
     return findings;
 }
+function hasEvidenceRequirement(requirement, evidence, hasTestChanges) {
+    if (requirement === "verification") {
+        return evidence.verificationEvidence || hasTestChanges;
+    }
+    if (requirement === "reproduction") {
+        return evidence.reproductionEvidence;
+    }
+    if (requirement === "screenshot") {
+        return evidence.screenshotEvidence;
+    }
+    if (requirement === "changelog") {
+        return evidence.changelogEvidence;
+    }
+    return evidence.permissionRationaleEvidence;
+}
+function analyzeDependencyChanges(files, config) {
+    const findings = [];
+    for (const file of files.filter((candidate) => isDependencyManifest(candidate.path))) {
+        if (config.dependencies.flagNewPackages) {
+            const addedDependencyLines = file.addedLines.filter((line) => isDependencyLikeAddition(file.path, line.value.trim()));
+            if (addedDependencyLines.length > 0) {
+                findings.push({
+                    ruleId: "dependency-added",
+                    title: "Dependency manifest changed",
+                    message: `${file.path} adds or changes dependency-like entries.`,
+                    severity: "medium",
+                    path: file.path,
+                    evidence: addedDependencyLines.slice(0, 5).map(formatEvidenceLine),
+                    recommendation: "Verify package names, licenses, provenance, and whether the lockfile matches the intended dependency change."
+                });
+            }
+        }
+        if (config.dependencies.flagMajorUpgrades) {
+            findings.push(...analyzeMajorDependencyUpgrades(file));
+        }
+        if (config.dependencies.flagLifecycleScripts) {
+            findings.push(...analyzeLifecycleScripts(file));
+        }
+    }
+    return findings;
+}
 function isDependencyLikeAddition(path, line) {
+    return parseDependencyLine(path, { value: line }) !== undefined;
+}
+function analyzeMajorDependencyUpgrades(file) {
+    const removedDependencies = new Map();
+    for (const line of file.removedLines) {
+        const parsed = parseDependencyLine(file.path, line);
+        if (parsed) {
+            removedDependencies.set(parsed.name, parsed);
+        }
+    }
+    const upgrades = file.addedLines
+        .map((line) => parseDependencyLine(file.path, line))
+        .filter((line) => Boolean(line))
+        .map((added) => ({ added, removed: removedDependencies.get(added.name) }))
+        .filter((change) => change.removed !== undefined && isMajorUpgrade(change.removed.version, change.added.version));
+    if (upgrades.length === 0) {
+        return [];
+    }
+    return [
+        {
+            ruleId: "dependency-major-upgrade",
+            title: "Dependency major version upgrade",
+            message: `${file.path} upgrades one or more dependencies across a major version boundary.`,
+            severity: "medium",
+            path: file.path,
+            evidence: upgrades.slice(0, 5).map(({ added, removed }) => `${added.line.lineNumber ? `line ${added.line.lineNumber}: ` : ""}${added.name} ${removed.version} -> ${added.version}`),
+            recommendation: "Check changelogs, migration notes, peer dependency impact, and whether tests cover the upgraded package surface."
+        }
+    ];
+}
+function analyzeLifecycleScripts(file) {
+    if (!file.path.endsWith("package.json")) {
+        return [];
+    }
+    const lifecycleLines = file.addedLines.filter((line) => /^"(?:preinstall|install|postinstall|prepare|prepublish|prepublishOnly)"\s*:/.test(line.value.trim()));
+    if (lifecycleLines.length === 0) {
+        return [];
+    }
+    return [
+        {
+            ruleId: "dependency-lifecycle-script",
+            title: "Package lifecycle script changed",
+            message: `${file.path} adds or changes npm lifecycle scripts that may run during install or publish.`,
+            severity: "high",
+            path: file.path,
+            evidence: lifecycleLines.slice(0, 5).map(formatEvidenceLine),
+            recommendation: "Review whether the lifecycle script is necessary, whether it downloads or executes remote code, and whether it can affect consumers during install."
+        }
+    ];
+}
+function parseDependencyLine(path, line) {
+    const value = line.value.trim();
     if (path.endsWith("package.json")) {
-        const match = /^"(?<key>[@A-Za-z0-9_.-]+)"\s*:\s*"(?<value>[^"]*)"/.exec(line);
+        const match = /^"(?<key>[@A-Za-z0-9_.-]+)"\s*:\s*"(?<version>[^"]*)"/.exec(value);
         if (!match?.groups) {
-            return false;
+            return undefined;
         }
-        const { key, value } = match.groups;
-        if (!key || !value || PACKAGE_JSON_NON_DEPENDENCY_KEYS.has(key)) {
-            return false;
+        const { key, version } = match.groups;
+        if (!key || !version || PACKAGE_JSON_NON_DEPENDENCY_KEYS.has(key)) {
+            return undefined;
+        }
+        if (!/^(?:\^|~|>=?|<=?|\d|workspace:|npm:|file:|link:|portal:|git\+|https?:|github:)/.test(version)) {
+            return undefined;
         }
-        return /^(?:\^|~|>=?|<=?|\d|workspace:|npm:|file:|link:|portal:|git\+|https?:|github:)/.test(value);
+        return { name: key, version, line };
     }
     if (path.endsWith("requirements.txt")) {
-        return /^[A-Za-z0-9_.-]+(?:\[.*\])?\s*(?:==|>=|<=|~=|>|<)\s*[^#\s]+/.test(line);
+        const match = /^(?<name>[A-Za-z0-9_.-]+)(?:\[.*\])?\s*(?:==|>=|<=|~=|>|<)\s*(?<version>[^#\s]+)/.exec(value);
+        return match?.groups?.name && match.groups.version
+            ? { name: match.groups.name, version: match.groups.version, line }
+            : undefined;
     }
     if (path.endsWith("pyproject.toml") || path.endsWith("Cargo.toml")) {
-        return /^[A-Za-z0-9_.-]+\s*=\s*"(?:\^|~|>=?|<=?|\d|workspace:|path\s*=|git\s*=)[^"]*"/.test(line);
+        const match = /^(?<name>[A-Za-z0-9_.-]+)\s*=\s*"(?<version>(?:\^|~|>=?|<=?|\d|workspace:|path\s*=|git\s*=)[^"]*)"/.exec(value);
+        return match?.groups?.name && match.groups.version
+            ? { name: match.groups.name, version: match.groups.version, line }
+            : undefined;
     }
     if (path.endsWith("go.mod")) {
-        return /^(?:require\s+)?[A-Za-z0-9_.\-/]+\s+v\d+\.\d+\.\d+/.test(line);
+        const match = /^(?:require\s+)?(?<name>[A-Za-z0-9_.\-/]+)\s+(?<version>v\d+\.\d+\.\d+)/.exec(value);
+        return match?.groups?.name && match.groups.version
+            ? { name: match.groups.name, version: match.groups.version, line }
+            : undefined;
     }
-    return false;
+    return undefined;
+}
+function isMajorUpgrade(previousVersion, nextVersion) {
+    const previousMajor = extractMajorVersion(previousVersion);
+    const nextMajor = extractMajorVersion(nextVersion);
+    return previousMajor !== undefined && nextMajor !== undefined && nextMajor > previousMajor;
+}
+function extractMajorVersion(version) {
+    const normalized = version
+        .replace(/^workspace:/, "")
+        .replace(/^npm:[^@]+@/, "")
+        .replace(/^[~^<>=\s]+/, "")
+        .replace(/^v/, "");
+    const match = /(?<major>\d+)\.\d+\.\d+/.exec(normalized);
+    const major = match?.groups?.major ? Number(match.groups.major) : undefined;
+    return major !== undefined && Number.isInteger(major) ? major : undefined;
 }
 function analyzeWorkflowPermissions(files) {
     const findings = [];
     for (const file of files.filter((candidate) => isWorkflowPath(candidate.path))) {
-        const permissionLines = file.addedLines.filter((line) => /permissions:|contents:\s*write|packages:\s*write|id-token:\s*write|pull-requests:\s*write/.test(line.value.trim()));
+        const permissionLines = file.addedLines.filter((line) => isRiskyWorkflowPermissionLine(line.value));
         if (permissionLines.length === 0) {
             continue;
         }
@@ -24243,6 +24558,59 @@ function analyzeWorkflowPermissions(files) {
     }
     return findings;
 }
+function isRiskyWorkflowPermissionLine(value) {
+    const line = value.trim();
+    if (/^permissions:\s*write-all\b/i.test(line)) {
+        return true;
+    }
+    return /^(?:actions|attestations|checks|contents|deployments|discussions|id-token|issues|models|packages|pages|pull-requests|repository-projects|security-events|statuses):\s*write\b/i.test(line);
+}
+function analyzeWorkflowDangerousTriggers(files) {
+    const findings = [];
+    for (const file of files.filter((candidate) => isWorkflowPath(candidate.path))) {
+        const triggerLines = file.addedLines.filter((line) => /\bpull_request_target\b/.test(line.value.trim()));
+        if (triggerLines.length === 0) {
+            continue;
+        }
+        findings.push({
+            ruleId: "workflow-dangerous-trigger",
+            title: "Workflow uses pull_request_target",
+            message: `${file.path} adds pull_request_target, which runs with base repository context and can be risky for untrusted PRs.`,
+            severity: "high",
+            path: file.path,
+            evidence: triggerLines.slice(0, 5).map(formatEvidenceLine),
+            recommendation: "Confirm the workflow does not check out or execute untrusted PR code with privileged tokens or write permissions."
+        });
+    }
+    return findings;
+}
+function analyzeWorkflowUntrustedCheckout(files) {
+    const findings = [];
+    for (const file of files.filter((candidate) => isWorkflowPath(candidate.path))) {
+        const headCheckoutLines = file.addedLines.filter((line) => isPullRequestHeadCheckoutLine(line.value));
+        if (headCheckoutLines.length === 0) {
+            continue;
+        }
+        const hasPullRequestTarget = file.addedLines.some((line) => /\bpull_request_target\b/.test(line.value.trim()));
+        findings.push({
+            ruleId: "workflow-untrusted-checkout",
+            title: "Workflow checks out pull request head",
+            message: hasPullRequestTarget
+                ? `${file.path} combines pull_request_target with pull request head checkout references.`
+                : `${file.path} checks out pull request head references; review the job privilege boundary before merging.`,
+            severity: hasPullRequestTarget ? "high" : "medium",
+            path: file.path,
+            evidence: headCheckoutLines.slice(0, 5).map(formatEvidenceLine),
+            recommendation: "Avoid running untrusted PR code with write tokens, repository secrets, or privileged pull_request_target context."
+        });
+    }
+    return findings;
+}
+function isPullRequestHeadCheckoutLine(value) {
+    const line = value.trim();
+    return (/\bgithub\.head_ref\b/.test(line) ||
+        /\bgithub\.event\.pull_request\.head(?:\.sha|\.ref|\.repo\.full_name)?\b/.test(line));
+}
 function analyzeMcpConfigs(files) {
     const findings = [];
     for (const file of files.filter((candidate) => isMcpConfigPath(candidate.path))) {
@@ -24266,6 +24634,9 @@ function formatEvidenceLine(line) {
     const value = line.value.trim();
     return line.lineNumber ? `line ${line.lineNumber}: ${value}` : value;
 }
+function formatEvidenceRequirement(requirement) {
+    return requirement;
+}
 function sensitivePathSeverity(path) {
     if (matchesAny(path, [
         "**/.env*",
@@ -24336,9 +24707,13 @@ function calculateEvidenceScore(summary, findings) {
         "sensitive-path",
         "missing-tests",
         "dependency-added",
+        "dependency-major-upgrade",
+        "dependency-lifecycle-script",
         "workflow-permission-change",
+        "workflow-dangerous-trigger",
+        "workflow-untrusted-checkout",
         "mcp-credential-risk"
-    ].includes(finding.ruleId));
+    ].includes(finding.ruleId) || finding.ruleId.startsWith("evidence-contract:"));
     if (needsVerificationEvidence && !summary.verificationEvidence) {
         addDeduction("missing-verification", 20, "No test or manual verification evidence was found.");
     }
@@ -24352,6 +24727,12 @@ function calculateEvidenceScore(summary, findings) {
         else if (finding.ruleId === "workflow-permission-change") {
             addDeduction("workflow-permission-change", 25, "Workflow permission changes need deliberate review.");
         }
+        else if (finding.ruleId === "workflow-dangerous-trigger") {
+            addDeduction("workflow-dangerous-trigger", 30, "pull_request_target workflows need privileged trigger review.");
+        }
+        else if (finding.ruleId === "workflow-untrusted-checkout") {
+            addDeduction("workflow-untrusted-checkout", finding.severity === "high" ? 30 : 18, "Workflow checkout of pull request head needs privilege-boundary review.");
+        }
         else if (finding.ruleId === "mcp-credential-risk") {
             addDeduction("mcp-credential-risk", 25, "MCP configuration expands local execution or credential risk.");
         }
@@ -24366,6 +24747,15 @@ function calculateEvidenceScore(summary, findings) {
         else if (finding.ruleId === "dependency-added") {
             addDeduction("dependency-change", 10, "Dependency manifest changed.");
         }
+        else if (finding.ruleId === "dependency-major-upgrade") {
+            addDeduction("dependency-major-upgrade", 15, "Dependency major version changed.");
+        }
+        else if (finding.ruleId === "dependency-lifecycle-script") {
+            addDeduction("dependency-lifecycle-script", 25, "Package lifecycle scripts can run during install or publish.");
+        }
+        else if (finding.ruleId.startsWith("evidence-contract:")) {
+            addDeduction("evidence-contract-missing", finding.severity === "high" ? 25 : 15, "Configured evidence contract was not satisfied.");
+        }
         else if (finding.ruleId === "missing-tests") {
             addDeduction("missing-tests", finding.severity === "medium" ? 20 : 12, "Code changed without test changes or verification notes.");
         }
@@ -24394,6 +24784,15 @@ function collectEvidenceStrengths(summary) {
     if (summary.reproductionEvidence) {
         strengths.push("Reproduction or before/after context was found.");
     }
+    if (summary.screenshotEvidence) {
+        strengths.push("Screenshot or visual evidence was found.");
+    }
+    if (summary.changelogEvidence) {
+        strengths.push("Changelog or migration evidence was found.");
+    }
+    if (summary.permissionRationaleEvidence) {
+        strengths.push("Permission rationale evidence was found.");
+    }
     if (summary.testFilesChanged > 0) {
         strengths.push("Test files changed with the PR.");
     }
@@ -24417,11 +24816,15 @@ function gradeEvidenceScore(value) {
 function calculateReviewDecision(risk, evidenceScore, findings) {
     const hasBlockingSecurityFinding = findings.some((finding) => finding.ruleId.startsWith("secret-detected") ||
         finding.ruleId === "workflow-permission-change" ||
+        finding.ruleId === "workflow-dangerous-trigger" ||
+        (finding.ruleId === "workflow-untrusted-checkout" && finding.severity === "high") ||
+        finding.ruleId === "dependency-lifecycle-script" ||
         finding.ruleId === "mcp-credential-risk");
     if (hasBlockingSecurityFinding || evidenceScore.value < 50 || risk === "high") {
         return "block-merge";
     }
-    if (evidenceScore.value < 70 || findings.some((finding) => finding.ruleId === "missing-tests" || finding.ruleId === "thin-pr-description")) {
+    if (evidenceScore.value < 70 ||
+        findings.some((finding) => finding.ruleId === "missing-tests" || finding.ruleId === "thin-pr-description")) {
         return "needs-evidence";
     }
     if (risk === "medium") {
@@ -24547,6 +24950,17 @@ function reviewActionsForFinding(finding) {
             }
         ];
     }
+    if (finding.ruleId.startsWith("evidence-contract:")) {
+        return [
+            {
+                actionId: "satisfy-evidence-contract",
+                title: "Ask for the configured evidence contract to be satisfied.",
+                detail: "The PR matches a repository-defined evidence contract but is missing required proof in the PR description.",
+                priority: finding.severity === "high" ? "high" : "medium",
+                relatedRuleIds: [finding.ruleId]
+            }
+        ];
+    }
     if (finding.ruleId === "workflow-permission-change") {
         return [
             {
@@ -24558,6 +24972,39 @@ function reviewActionsForFinding(finding) {
             }
         ];
     }
+    if (finding.ruleId === "workflow-dangerous-trigger") {
+        return [
+            {
+                actionId: "review-privileged-pr-trigger",
+                title: "Review privileged pull_request_target usage.",
+                detail: "Confirm the workflow does not execute untrusted PR code with write tokens, secrets, or repository permissions.",
+                priority: "high",
+                relatedRuleIds: [finding.ruleId]
+            }
+        ];
+    }
+    if (finding.ruleId === "workflow-untrusted-checkout") {
+        return [
+            {
+                actionId: "review-untrusted-checkout",
+                title: "Review pull request head checkout privileges.",
+                detail: "Confirm the job does not run untrusted PR code with write tokens, repository secrets, or pull_request_target privileges.",
+                priority: finding.severity === "high" ? "high" : "medium",
+                relatedRuleIds: [finding.ruleId]
+            }
+        ];
+    }
+    if (finding.ruleId === "dependency-lifecycle-script") {
+        return [
+            {
+                actionId: "review-package-lifecycle-script",
+                title: "Review package lifecycle scripts before merge.",
+                detail: "Check whether install, postinstall, prepare, or publish scripts can execute unexpected code for contributors or consumers.",
+                priority: "high",
+                relatedRuleIds: [finding.ruleId]
+            }
+        ];
+    }
     if (finding.ruleId === "mcp-credential-risk") {
         return [
             {
@@ -24591,6 +25038,17 @@ function reviewActionsForFinding(finding) {
             }
         ];
     }
+    if (finding.ruleId === "dependency-major-upgrade") {
+        return [
+            {
+                actionId: "review-major-dependency-upgrade",
+                title: "Review major dependency upgrade impact.",
+                detail: "Check changelogs, migration notes, peer dependencies, and whether tests cover the upgraded surface.",
+                priority: "medium",
+                relatedRuleIds: [finding.ruleId]
+            }
+        ];
+    }
     if (finding.ruleId === "sensitive-path") {
         return [
             {
@@ -24664,7 +25122,7 @@ const build_program = new Command();
 build_program
     .name("proof-pr")
     .description("Review pull request evidence, scope, and safety before maintainers spend time on it.")
-    .version("0.1.5");
+    .version("0.1.7");
 build_program
     .command("scan", { isDefault: true })
     .description("Scan a git diff and print a ProofPR report.")
@@ -24708,6 +25166,35 @@ build_program
     await writeIfMissing(options.workflowPath, renderWorkflowTemplate(options.failOn), options.force);
     process.stdout.write(`ProofPR initialized:\n- ${options.configPath}\n- ${options.workflowPath}\n`);
 });
+build_program
+    .command("benchmark")
+    .description("Run ProofPR benchmark cases and compare expected risk/finding output.")
+    .option("--cases <dir>", "Directory containing benchmark case JSON files.", "benchmarks/cases")
+    .option("--format <format>", "Output format: text, markdown, or json.", parseBenchmarkFormat, "text")
+    .option("--output <path>", "Write benchmark output to a file instead of stdout.")
+    .action(async (options) => {
+    const report = await runBenchmarks(options.cases);
+    let output;
+    if (options.format === "json") {
+        output = `${JSON.stringify(report, null, 2)}\n`;
+    }
+    else if (options.format === "markdown") {
+        output = renderBenchmarkMarkdown(report);
+    }
+    else {
+        output = renderBenchmarkText(report);
+    }
+    if (options.output) {
+        await writeOutput(options.output, output);
+        process.stdout.write(`ProofPR benchmark report written to ${options.output}\n`);
+    }
+    else {
+        process.stdout.write(output);
+    }
+    if (report.results.some((result) => !result.passed)) {
+        process.exitCode = 1;
+    }
+});
 build_program.parseAsync(process.argv).catch((error) => {
     const message = error instanceof Error ? error.message : String(error);
     process.stderr.write(`ProofPR failed: ${message}\n`);
@@ -24734,6 +25221,10 @@ async function writeIfMissing(path, contents, force) {
     await (0,promises_namespaceObject.mkdir)((0,external_node_path_.dirname)(path), { recursive: true });
     await (0,promises_namespaceObject.writeFile)(path, contents, "utf8");
 }
+async function writeOutput(path, contents) {
+    await (0,promises_namespaceObject.mkdir)((0,external_node_path_.dirname)(path), { recursive: true });
+    await (0,promises_namespaceObject.writeFile)(path, contents, "utf8");
+}
 async function pathExists(path) {
     try {
         await (0,promises_namespaceObject.access)(path);
@@ -24792,6 +25283,19 @@ comment:
 # dependencies:
 #   flagNewPackages: true
 #   flagMajorUpgrades: true
+#   flagLifecycleScripts: true
+#
+# evidence:
+#   contracts:
+#     - id: ui-screenshot
+#       title: UI changes need screenshots
+#       paths:
+#         - "src/components/**"
+#         - "app/**"
+#       requires:
+#         - screenshot
+#         - verification
+#       severity: medium
 `;
 }
 function renderWorkflowTemplate(failOn) {
@@ -24810,7 +25314,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - uses: linsk27/proof-pr@v0.1.5
+      - uses: linsk27/proof-pr@v0.1.7
         with:
           fail-on: ${failOn}
           comment: "true"
@@ -24826,12 +25330,168 @@ function renderOutput(result, format, locale) {
     }
     return renderMarkdownReport(result, locale);
 }
+async function runBenchmarks(casesDir) {
+    const root = (0,external_node_path_.resolve)(casesDir);
+    const entries = await (0,promises_namespaceObject.readdir)(root, { withFileTypes: true });
+    const caseFiles = entries
+        .filter((entry) => entry.isFile() && entry.name.endsWith(".json"))
+        .map((entry) => (0,external_node_path_.resolve)(root, entry.name))
+        .sort();
+    const results = [];
+    for (const caseFile of caseFiles) {
+        const testCase = JSON.parse(await (0,promises_namespaceObject.readFile)(caseFile, "utf8"));
+        const diffText = await (0,promises_namespaceObject.readFile)((0,external_node_path_.resolve)((0,external_node_path_.dirname)(caseFile), testCase.diffFile), "utf8");
+        const result = scanDiff(diffText, {
+            config: testCase.config,
+            pullRequest: testCase.pullRequest
+        });
+        const actualFindings = result.findings.map((finding) => finding.ruleId);
+        const failures = [];
+        if (testCase.expect.risk && result.risk !== testCase.expect.risk) {
+            failures.push(`expected risk ${testCase.expect.risk}, got ${result.risk}`);
+        }
+        if (testCase.expect.reviewDecision && result.reviewDecision !== testCase.expect.reviewDecision) {
+            failures.push(`expected review decision ${testCase.expect.reviewDecision}, got ${result.reviewDecision}`);
+        }
+        for (const expectedFinding of testCase.expect.findings ?? []) {
+            if (!matchesFindingExpectation(actualFindings, expectedFinding)) {
+                failures.push(`expected finding ${expectedFinding}`);
+            }
+        }
+        for (const absentFinding of testCase.expect.absentFindings ?? []) {
+            if (matchesFindingExpectation(actualFindings, absentFinding)) {
+                failures.push(`unexpected finding ${absentFinding}`);
+            }
+        }
+        results.push({
+            id: testCase.id,
+            title: testCase.title,
+            category: testCase.category ?? "uncategorized",
+            passed: failures.length === 0,
+            failures,
+            actual: {
+                risk: result.risk,
+                reviewDecision: result.reviewDecision,
+                findings: actualFindings
+            }
+        });
+    }
+    return {
+        summary: summarizeBenchmarkResults(results),
+        results
+    };
+}
+function summarizeBenchmarkResults(results) {
+    const passed = results.filter((result) => result.passed).length;
+    const categories = new Map();
+    const findingCounts = new Map();
+    for (const result of results) {
+        const categoryResults = categories.get(result.category) ?? [];
+        categoryResults.push(result);
+        categories.set(result.category, categoryResults);
+        for (const finding of new Set(result.actual.findings)) {
+            findingCounts.set(finding, (findingCounts.get(finding) ?? 0) + 1);
+        }
+    }
+    return {
+        total: results.length,
+        passed,
+        failed: results.length - passed,
+        passRate: ratio(passed, results.length),
+        categories: [...categories.entries()]
+            .sort(([left], [right]) => left.localeCompare(right))
+            .map(([category, items]) => {
+            const categoryPassed = items.filter((item) => item.passed).length;
+            return {
+                category,
+                total: items.length,
+                passed: categoryPassed,
+                failed: items.length - categoryPassed,
+                passRate: ratio(categoryPassed, items.length)
+            };
+        }),
+        findingCounts: [...findingCounts.entries()]
+            .sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]))
+            .map(([ruleId, count]) => ({ ruleId, count }))
+    };
+}
+function renderBenchmarkText(report) {
+    const lines = [
+        "ProofPR benchmark",
+        "",
+        `Summary: ${report.summary.passed}/${report.summary.total} passed (${formatPercent(report.summary.passRate)})`,
+        ""
+    ];
+    lines.push("Categories:");
+    for (const category of report.summary.categories) {
+        lines.push(`- ${category.category}: ${category.passed}/${category.total} passed (${formatPercent(category.passRate)})`);
+    }
+    if (report.summary.findingCounts.length > 0) {
+        lines.push("", "Finding coverage:");
+        for (const item of report.summary.findingCounts) {
+            lines.push(`- ${item.ruleId}: ${item.count}`);
+        }
+    }
+    lines.push("");
+    for (const result of report.results) {
+        lines.push(`${result.passed ? "PASS" : "FAIL"} ${result.id}${result.title ? ` - ${result.title}` : ""}`);
+        for (const failure of result.failures) {
+            lines.push(`  - ${failure}`);
+        }
+    }
+    lines.push("");
+    return lines.join("\n");
+}
+function renderBenchmarkMarkdown(report) {
+    const lines = [
+        "# ProofPR Benchmark",
+        "",
+        `**Summary:** ${report.summary.passed}/${report.summary.total} passed (${formatPercent(report.summary.passRate)})`,
+        "",
+        "## Categories",
+        "",
+        "| Category | Passed | Total | Pass rate |",
+        "| --- | ---: | ---: | ---: |"
+    ];
+    for (const category of report.summary.categories) {
+        lines.push(`| ${category.category} | ${category.passed} | ${category.total} | ${formatPercent(category.passRate)} |`);
+    }
+    lines.push("", "## Finding Coverage", "", "| Rule | Cases |", "| --- | ---: |");
+    for (const item of report.summary.findingCounts) {
+        lines.push(`| \`${item.ruleId}\` | ${item.count} |`);
+    }
+    lines.push("", "## Cases", "", "| Result | Case | Category | Actual risk | Gate |", "| --- | --- | --- | --- | --- |");
+    for (const result of report.results) {
+        lines.push(`| ${result.passed ? "PASS" : "FAIL"} | \`${result.id}\` | ${result.category} | ${result.actual.risk} | ${result.actual.reviewDecision} |`);
+    }
+    lines.push("");
+    return lines.join("\n");
+}
+function ratio(value, total) {
+    return total === 0 ? 0 : value / total;
+}
+function formatPercent(value) {
+    return `${Math.round(value * 100)}%`;
+}
+function matchesFindingExpectation(actualFindings, expected) {
+    if (expected.endsWith("*")) {
+        const prefix = expected.slice(0, -1);
+        return actualFindings.some((finding) => finding.startsWith(prefix));
+    }
+    return actualFindings.includes(expected);
+}
 function parseFormat(value) {
     if (value === "json" || value === "markdown" || value === "sarif") {
         return value;
     }
     throw new InvalidArgumentError("format must be one of: markdown, json, sarif");
 }
+function parseBenchmarkFormat(value) {
+    if (value === "text" || value === "json" || value === "markdown") {
+        return value;
+    }
+    throw new InvalidArgumentError("benchmark format must be one of: text, markdown, json");
+}
 function parseFailLevel(value) {
     if (value === "low" || value === "medium" || value === "high" || value === "never") {
         return value;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "proof-pr",
-  "version": "0.1.5",
+  "version": "0.1.7",
   "description": "CLI for ProofPR, a maintainer-focused pull request evidence scanner.",
   "license": "MIT",
   "type": "module",