proof-pr 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +2 -1
  2. package/dist/index.js +692 -32
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -9,7 +9,7 @@ ProofPR 帮助维护者在投入深入 review 之前,先检查 PR 的证据、
9
9
  作为 GitHub Action 使用时,ProofPR 默认在 PR 打开、PR 分支更新、PR 重新打开时运行。普通分支 push 不会单独生成报告。
10
10
 
11
11
  报告会出现在 PR 评论区、GitHub Actions job summary 和 PR checks 状态里。
12
- `v0.1.5` 起还可以输出 GitHub annotations,并通过 `sarif-output` 写出 SARIF 文件。
12
+ `v0.1.5` 起还可以输出 GitHub annotations,并通过 `sarif-output` 写出 SARIF 文件。当前版本还会识别依赖大版本升级、包生命周期脚本和 `pull_request_target` workflow 触发器。
13
13
 
14
14
  ## 使用
15
15
 
@@ -21,6 +21,7 @@ npx proof-pr@latest init --preset security-strict
21
21
  npx proof-pr@latest scan --base origin/main --head HEAD
22
22
  npx proof-pr@latest scan --base origin/main --head HEAD --locale zh-CN
23
23
  npx proof-pr@latest scan --base origin/main --pr-body-file pr-body.md --format json
24
+ npx proof-pr@latest benchmark --cases benchmarks/cases
24
25
  ```
25
26
 
26
27
  可用预设:`balanced`、`open-source-maintainer`、`security-strict`、`ai-generated-pr`、`mcp-security`、`dependency-careful`。
package/dist/index.js CHANGED
@@ -23111,7 +23111,15 @@ function preprocess(fn, schema) {
23111
23111
 
23112
23112
 
23113
23113
  const riskLevelSchema = schemas_enum(["low", "medium", "high"]);
23114
+ const findingSeveritySchema = schemas_enum(["info", "low", "medium", "high"]);
23114
23115
  const localeSchema = schemas_enum(["en", "zh-CN"]);
23116
+ const evidenceRequirementSchema = schemas_enum([
23117
+ "verification",
23118
+ "reproduction",
23119
+ "screenshot",
23120
+ "changelog",
23121
+ "permission-rationale"
23122
+ ]);
23115
23123
  const configPresetSchema = schemas_enum([
23116
23124
  "balanced",
23117
23125
  "open-source-maintainer",
@@ -23149,6 +23157,38 @@ const DEFAULT_SENSITIVE_PATHS = [
23149
23157
  "go.sum"
23150
23158
  ];
23151
23159
  const DEFAULT_TEST_PATHS = ["src/**", "packages/**/src/**", "app/**", "lib/**"];
23160
+ const WORKFLOW_EVIDENCE_CONTRACTS = [
23161
+ {
23162
+ id: "workflow-permission-rationale",
23163
+ title: "Workflow changes need a permission rationale",
23164
+ paths: [".github/workflows/**", ".github/actions/**"],
23165
+ requires: ["verification", "permission-rationale"],
23166
+ severity: "high",
23167
+ recommendation: "Explain why the workflow needs this trigger or permission, and include verification that untrusted PR code cannot reach privileged tokens."
23168
+ }
23169
+ ];
23170
+ const DEPENDENCY_EVIDENCE_CONTRACTS = [
23171
+ {
23172
+ id: "dependency-upgrade-evidence",
23173
+ title: "Dependency changes need upgrade evidence",
23174
+ paths: [
23175
+ "package.json",
23176
+ "**/package.json",
23177
+ "pnpm-lock.yaml",
23178
+ "package-lock.json",
23179
+ "yarn.lock",
23180
+ "requirements.txt",
23181
+ "**/requirements.txt",
23182
+ "pyproject.toml",
23183
+ "**/pyproject.toml",
23184
+ "go.mod",
23185
+ "**/go.mod"
23186
+ ],
23187
+ requires: ["verification", "changelog"],
23188
+ severity: "medium",
23189
+ recommendation: "Link changelog or migration notes and include the test command or CI evidence used to validate the dependency change."
23190
+ }
23191
+ ];
23152
23192
  const PRESET_DEFAULTS = {
23153
23193
  balanced: {},
23154
23194
  "open-source-maintainer": {
@@ -23179,6 +23219,9 @@ const PRESET_DEFAULTS = {
23179
23219
  requireTests: {
23180
23220
  enabled: true,
23181
23221
  paths: ["src/**", "packages/**/src/**", "app/**", "lib/**", "server/**", "api/**"]
23222
+ },
23223
+ evidence: {
23224
+ contracts: WORKFLOW_EVIDENCE_CONTRACTS
23182
23225
  }
23183
23226
  },
23184
23227
  "ai-generated-pr": {
@@ -23217,9 +23260,20 @@ const PRESET_DEFAULTS = {
23217
23260
  requireTests: {
23218
23261
  enabled: true,
23219
23262
  paths: DEFAULT_TEST_PATHS
23263
+ },
23264
+ evidence: {
23265
+ contracts: DEPENDENCY_EVIDENCE_CONTRACTS
23220
23266
  }
23221
23267
  }
23222
23268
  };
23269
+ const evidenceContractSchema = object({
23270
+ id: schemas_string().min(1),
23271
+ title: schemas_string().min(1).optional(),
23272
+ paths: array(schemas_string().min(1)).min(1),
23273
+ requires: array(evidenceRequirementSchema).min(1),
23274
+ severity: findingSeveritySchema.default("medium"),
23275
+ recommendation: schemas_string().min(1).optional()
23276
+ });
23223
23277
  const configSchema = object({
23224
23278
  preset: configPresetSchema.default("balanced"),
23225
23279
  locale: localeSchema.default("en"),
@@ -23234,9 +23288,14 @@ const configSchema = object({
23234
23288
  secrets: object({ enabled: schemas_boolean().default(true) }).default({ enabled: true }),
23235
23289
  dependencies: object({
23236
23290
  flagNewPackages: schemas_boolean().default(true),
23237
- flagMajorUpgrades: schemas_boolean().default(true)
23291
+ flagMajorUpgrades: schemas_boolean().default(true),
23292
+ flagLifecycleScripts: schemas_boolean().default(true)
23293
+ })
23294
+ .default({ flagNewPackages: true, flagMajorUpgrades: true, flagLifecycleScripts: true }),
23295
+ evidence: object({
23296
+ contracts: array(evidenceContractSchema).default([])
23238
23297
  })
23239
- .default({ flagNewPackages: true, flagMajorUpgrades: true }),
23298
+ .default({ contracts: [] }),
23240
23299
  comment: object({ enabled: schemas_boolean().default(true) }).default({ enabled: true })
23241
23300
  });
23242
23301
  function parseConfig(input) {
@@ -23374,6 +23433,9 @@ function renderEnglishMarkdownReport(result) {
23374
23433
  `- PR description: ${result.summary.pullRequestDescription}`,
23375
23434
  `- Verification evidence: ${formatBoolean(result.summary.verificationEvidence)}`,
23376
23435
  `- Reproduction context: ${formatBoolean(result.summary.reproductionEvidence)}`,
23436
+ `- Screenshot evidence: ${formatBoolean(result.summary.screenshotEvidence)}`,
23437
+ `- Changelog evidence: ${formatBoolean(result.summary.changelogEvidence)}`,
23438
+ `- Permission rationale: ${formatBoolean(result.summary.permissionRationaleEvidence)}`,
23377
23439
  ""
23378
23440
  ];
23379
23441
  appendEvidenceScoreSection(lines, result, "en");
@@ -23408,6 +23470,9 @@ function renderChineseMarkdownReport(result) {
23408
23470
  `- PR 描述质量:${translateDescriptionState(result.summary.pullRequestDescription)}`,
23409
23471
  `- 验证证据:${formatChineseBoolean(result.summary.verificationEvidence)}`,
23410
23472
  `- 复现上下文:${formatChineseBoolean(result.summary.reproductionEvidence)}`,
23473
+ `- 截图或视觉证据:${formatChineseBoolean(result.summary.screenshotEvidence)}`,
23474
+ `- Changelog 或迁移证据:${formatChineseBoolean(result.summary.changelogEvidence)}`,
23475
+ `- 权限理由证据:${formatChineseBoolean(result.summary.permissionRationaleEvidence)}`,
23411
23476
  ""
23412
23477
  ];
23413
23478
  appendEvidenceScoreSection(lines, result, "zh-CN");
@@ -23518,6 +23583,11 @@ function maintainerFocus(findings, locale) {
23518
23583
  ? "轮换任何可能暴露的凭证,并在移除 secret 前阻止合并。"
23519
23584
  : "Rotate any exposed credential and block the PR until secrets are removed.");
23520
23585
  }
23586
+ else if (finding.ruleId.startsWith("evidence-contract:")) {
23587
+ focus.add(locale === "zh-CN"
23588
+ ? "先要求贡献者补齐仓库定义的证据契约,再投入深度 review。"
23589
+ : "Ask the contributor to satisfy the repository-defined evidence contract before deep review.");
23590
+ }
23521
23591
  else if (finding.ruleId === "workflow-permission-change") {
23522
23592
  focus.add(locale === "zh-CN"
23523
23593
  ? "合并前重点审查 GitHub Actions 权限。"
@@ -23543,6 +23613,26 @@ function maintainerFocus(findings, locale) {
23543
23613
  ? "要求拆分 PR,或提供逐文件 review 指南。"
23544
23614
  : "Request a smaller PR or a file-by-file review guide.");
23545
23615
  }
23616
+ else if (finding.ruleId === "dependency-major-upgrade") {
23617
+ focus.add(locale === "zh-CN"
23618
+ ? "重点核查依赖大版本升级的迁移说明、兼容性和测试覆盖。"
23619
+ : "Review dependency major upgrade migration notes, compatibility, and test coverage.");
23620
+ }
23621
+ else if (finding.ruleId === "dependency-lifecycle-script") {
23622
+ focus.add(locale === "zh-CN"
23623
+ ? "合并前审查包生命周期脚本是否会在安装或发布时执行非预期代码。"
23624
+ : "Review package lifecycle scripts for unexpected install or publish-time execution.");
23625
+ }
23626
+ else if (finding.ruleId === "workflow-dangerous-trigger") {
23627
+ focus.add(locale === "zh-CN"
23628
+ ? "重点审查 pull_request_target 是否会用高权限 token 执行不可信 PR 代码。"
23629
+ : "Review whether pull_request_target can execute untrusted PR code with privileged tokens.");
23630
+ }
23631
+ else if (finding.ruleId === "workflow-untrusted-checkout") {
23632
+ focus.add(locale === "zh-CN"
23633
+ ? "重点审查 workflow 是否 checkout 并执行了不可信 PR head 代码。"
23634
+ : "Review whether the workflow checks out and executes untrusted PR head code.");
23635
+ }
23546
23636
  else if (finding.ruleId === "mcp-credential-risk") {
23547
23637
  focus.add(locale === "zh-CN"
23548
23638
  ? "重点审查 MCP command、args 和凭证处理方式。"
@@ -23557,6 +23647,13 @@ function maintainerFocus(findings, locale) {
23557
23647
  return [...focus];
23558
23648
  }
23559
23649
  function translateFinding(finding) {
23650
+ if (finding.ruleId.startsWith("evidence-contract:")) {
23651
+ return {
23652
+ title: "证据契约未满足",
23653
+ message: "该 PR 命中了仓库自定义证据契约,但 PR 描述中缺少必需证据。",
23654
+ recommendation: "建议要求贡献者补齐缺失证据后再深入 review。"
23655
+ };
23656
+ }
23560
23657
  if (finding.ruleId === "change-size") {
23561
23658
  const files = finding.evidence?.find((item) => item.startsWith("files: "))?.replace("files: ", "");
23562
23659
  const lines = finding.evidence?.find((item) => item.startsWith("changed lines: "))?.replace("changed lines: ", "");
@@ -23604,6 +23701,20 @@ function translateFinding(finding) {
23604
23701
  recommendation: "请确认包名、许可证、来源可信度,以及 lockfile 是否匹配预期依赖变化。"
23605
23702
  };
23606
23703
  }
23704
+ if (finding.ruleId === "dependency-major-upgrade") {
23705
+ return {
23706
+ title: "依赖发生大版本升级",
23707
+ message: finding.path ? `${finding.path} 中有依赖跨越了大版本边界。` : finding.message,
23708
+ recommendation: "请核查 changelog、迁移说明、peer dependencies 影响,以及测试是否覆盖升级后的关键路径。"
23709
+ };
23710
+ }
23711
+ if (finding.ruleId === "dependency-lifecycle-script") {
23712
+ return {
23713
+ title: "包生命周期脚本发生变更",
23714
+ message: finding.path ? `${finding.path} 新增或修改了安装/发布阶段可能自动执行的脚本。` : finding.message,
23715
+ recommendation: "请确认该脚本是否必要,是否下载或执行远程代码,以及是否会影响安装该包的用户。"
23716
+ };
23717
+ }
23607
23718
  if (finding.ruleId === "workflow-permission-change") {
23608
23719
  return {
23609
23720
  title: "Workflow 权限发生变更",
@@ -23611,6 +23722,22 @@ function translateFinding(finding) {
23611
23722
  recommendation: "请确认 workflow 是否真的需要写权限或 token 权限,并检查不可信 PR 是否能触达该 workflow。"
23612
23723
  };
23613
23724
  }
23725
+ if (finding.ruleId === "workflow-dangerous-trigger") {
23726
+ return {
23727
+ title: "Workflow 使用了 pull_request_target",
23728
+ message: finding.path ? `${finding.path} 新增了 pull_request_target 触发器。` : finding.message,
23729
+ recommendation: "请确认该 workflow 不会用高权限 token、secret 或写权限执行不可信 PR 代码。"
23730
+ };
23731
+ }
23732
+ if (finding.ruleId === "workflow-untrusted-checkout") {
23733
+ return {
23734
+ title: "Workflow checkout 了 PR head",
23735
+ message: finding.path
23736
+ ? `${finding.path} 引用了 PR head 代码来源,需要审查它是否会在高权限上下文中执行。`
23737
+ : finding.message,
23738
+ recommendation: "避免在 pull_request_target、写权限 token 或可读取 secret 的上下文中运行不可信 PR 代码。"
23739
+ };
23740
+ }
23614
23741
  if (finding.ruleId === "mcp-credential-risk") {
23615
23742
  return {
23616
23743
  title: "MCP 配置需要重点审查",
@@ -23629,8 +23756,15 @@ function translateFinding(finding) {
23629
23756
  }
23630
23757
  function translateEvidence(item) {
23631
23758
  return item
23759
+ .replace("matched files: ", "命中文件:")
23760
+ .replace("missing evidence: ", "缺失证据:")
23632
23761
  .replace("files: ", "文件数:")
23633
23762
  .replace("changed lines: ", "变更行数:")
23763
+ .replace(/\bverification\b/g, "验证")
23764
+ .replace(/\breproduction\b/g, "复现")
23765
+ .replace(/\bscreenshot\b/g, "截图")
23766
+ .replace(/\bchangelog\b/g, "变更日志")
23767
+ .replace(/\bpermission-rationale\b/g, "权限理由")
23634
23768
  .replace("line ", "第 ")
23635
23769
  .replace(": ", " 行:");
23636
23770
  }
@@ -23682,14 +23816,19 @@ function translateReviewActionTitle(actionId, fallback) {
23682
23816
  "ask-for-evidence-before-review": "深入 review 前先要求补充证据",
23683
23817
  "review-with-focus": "带着重点清单进行 review",
23684
23818
  "normal-review": "进入常规 review",
23819
+ "satisfy-evidence-contract": "要求补齐证据契约",
23685
23820
  "improve-pr-description": "要求补充更清楚的 PR 描述",
23686
23821
  "add-verification-evidence": "要求补充测试或手动验证证据",
23687
23822
  "add-reproduction-context": "要求补充复现或 before/after 上下文",
23688
23823
  "rotate-secret": "轮换并移除暴露的凭证",
23689
23824
  "justify-workflow-permissions": "要求说明 workflow 权限最小化理由",
23825
+ "review-privileged-pr-trigger": "审查 pull_request_target 高权限触发器",
23826
+ "review-untrusted-checkout": "审查 PR head checkout 的权限边界",
23827
+ "review-package-lifecycle-script": "审查包生命周期脚本",
23690
23828
  "review-mcp-execution-surface": "审查 MCP 命令、参数和凭证处理",
23691
23829
  "request-review-map-or-split": "要求拆分 PR 或提供逐文件 review map",
23692
23830
  "verify-dependency-change": "核查依赖来源和 lockfile 影响",
23831
+ "review-major-dependency-upgrade": "核查依赖大版本升级影响",
23693
23832
  "assign-sensitive-file-review": "安排敏感文件重点 review"
23694
23833
  }[actionId] ?? fallback;
23695
23834
  }
@@ -23699,23 +23838,35 @@ function translateReviewActionDetail(actionId, fallback) {
23699
23838
  "ask-for-evidence-before-review": "要求测试、截图、复现步骤或更清楚的 PR 描述,再投入详细 review。",
23700
23839
  "review-with-focus": "优先使用下面的风险发现和重点文件作为第一轮 review map。",
23701
23840
  "normal-review": "当前证据足够支撑维护者进行常规 review。",
23841
+ "satisfy-evidence-contract": "该 PR 命中了仓库自定义证据契约,但 PR 描述里缺少必需证据。",
23702
23842
  "improve-pr-description": "贡献者应说明为什么改、改了什么、如何验证,以及是否有发布或兼容性风险。",
23703
23843
  "add-verification-evidence": "要求测试输出、CI 链接、截图,或简短的手动验证说明。",
23704
23844
  "add-reproduction-context": "PR 应包含复现步骤、预期/实际行为,或相关 before/after 截图。",
23705
23845
  "rotate-secret": "在 secret 从 PR 中移除并完成轮换前,不要合并。",
23706
23846
  "justify-workflow-permissions": "确认写权限或 OIDC 是否必要,并检查不可信 PR 是否能触发该 workflow。",
23847
+ "review-privileged-pr-trigger": "确认 workflow 不会用写权限 token、secret 或仓库权限执行不可信 PR 代码。",
23848
+ "review-untrusted-checkout": "确认 job 不会在写权限 token、仓库 secret 或 pull_request_target 高权限上下文中运行不可信 PR 代码。",
23849
+ "review-package-lifecycle-script": "检查 install、postinstall、prepare 或 publish 脚本是否会执行非预期代码。",
23707
23850
  "review-mcp-execution-surface": "检查 MCP 配置是否提交凭证,或意外扩大本地执行面。",
23708
23851
  "request-review-map-or-split": "要求贡献者拆分无关改动,或标出最需要重点 review 的文件。",
23709
23852
  "verify-dependency-change": "检查包名、维护者、许可证、安装脚本,以及 lockfile 是否符合预期依赖变化。",
23853
+ "review-major-dependency-upgrade": "检查 changelog、迁移说明、peer dependencies,以及测试是否覆盖升级后的关键路径。",
23710
23854
  "assign-sensitive-file-review": "合并前由维护者有意识地检查敏感文件改动。"
23711
23855
  }[actionId] ?? fallback;
23712
23856
  }
23713
23857
  function translateFocusReason(reasonId, fallback) {
23858
+ if (reasonId.startsWith("evidence-contract:")) {
23859
+ return "仓库自定义证据契约未满足";
23860
+ }
23714
23861
  return {
23715
23862
  "change-size": "review 面积相关 finding",
23716
23863
  "sensitive-path": "敏感路径发生变更",
23717
23864
  "dependency-added": "依赖清单发生变更",
23865
+ "dependency-major-upgrade": "依赖发生大版本升级",
23866
+ "dependency-lifecycle-script": "包生命周期脚本发生变更",
23718
23867
  "workflow-permission-change": "workflow 权限发生变更",
23868
+ "workflow-dangerous-trigger": "workflow 使用了高风险触发器",
23869
+ "workflow-untrusted-checkout": "workflow checkout 了不可信 PR head",
23719
23870
  "mcp-credential-risk": "MCP 配置存在执行面或凭证风险",
23720
23871
  "missing-tests": "代码改动缺少测试或验证证据"
23721
23872
  }[reasonId] ?? fallback;
@@ -23725,6 +23876,9 @@ function translateScoreMessage(message) {
23725
23876
  "PR description provides review context.": "PR 描述提供了 review 上下文。",
23726
23877
  "Verification evidence was found.": "检测到测试或手动验证证据。",
23727
23878
  "Reproduction or before/after context was found.": "检测到复现步骤或 before/after 上下文。",
23879
+ "Screenshot or visual evidence was found.": "检测到截图或视觉证据。",
23880
+ "Changelog or migration evidence was found.": "检测到 changelog 或迁移证据。",
23881
+ "Permission rationale evidence was found.": "检测到权限理由证据。",
23728
23882
  "Test files changed with the PR.": "PR 同时修改了测试文件。",
23729
23883
  "No configured sensitive files changed.": "没有改动已配置的敏感文件。"
23730
23884
  }[message] ?? message;
@@ -23744,6 +23898,11 @@ function translateDeduction(reasonId, fallback) {
23744
23898
  "sensitive-path-high": "高敏感文件发生变更,需要重点 review。",
23745
23899
  "sensitive-path-medium": "敏感文件发生变更,需要重点 review。",
23746
23900
  "dependency-change": "依赖清单发生变更。",
23901
+ "dependency-major-upgrade": "依赖发生大版本升级。",
23902
+ "dependency-lifecycle-script": "包生命周期脚本可能在安装或发布阶段执行代码。",
23903
+ "workflow-dangerous-trigger": "pull_request_target workflow 需要重点审查高权限触发路径。",
23904
+ "workflow-untrusted-checkout": "Workflow checkout PR head 代码,需要审查权限边界。",
23905
+ "evidence-contract-missing": "仓库自定义证据契约未满足。",
23747
23906
  "missing-tests": "代码发生变更,但缺少测试变更或验证说明。"
23748
23907
  }[reasonId] ?? fallback;
23749
23908
  }
@@ -23859,12 +24018,27 @@ const REPRODUCTION_PATTERNS = [
23859
24018
  /\b(?:before|after|expected|actual)\b/i,
23860
24019
  /复现|重现|复现步骤|期望|实际/
23861
24020
  ];
24021
+ const SCREENSHOT_PATTERNS = [
24022
+ /\b(?:screenshot|screen shot|screen recording|recording|gif|image|before\/after)\b/i,
24023
+ /截图|录屏|效果图|前后对比|对比图/
24024
+ ];
24025
+ const CHANGELOG_PATTERNS = [
24026
+ /\b(?:changelog|release notes?|migration guide|breaking changes?|upgrade guide)\b/i,
24027
+ /变更日志|发布说明|迁移指南|升级说明|破坏性变更|兼容性/
24028
+ ];
24029
+ const PERMISSION_RATIONALE_PATTERNS = [
24030
+ /\b(?:least privilege|permission rationale|write permission|oidc|id-token|trusted workflow|untrusted pr|token scope)\b/i,
24031
+ /权限理由|最小权限|写权限|OIDC|id-token|不可信 PR|高权限|token 权限|凭证权限/
24032
+ ];
23862
24033
  function analyzeEvidence(context) {
23863
24034
  if (!context) {
23864
24035
  return {
23865
24036
  descriptionState: "unavailable",
23866
24037
  verificationEvidence: false,
23867
- reproductionEvidence: false
24038
+ reproductionEvidence: false,
24039
+ screenshotEvidence: false,
24040
+ changelogEvidence: false,
24041
+ permissionRationaleEvidence: false
23868
24042
  };
23869
24043
  }
23870
24044
  const text = [context.title ?? "", context.body ?? ""].join("\n").trim();
@@ -23872,7 +24046,10 @@ function analyzeEvidence(context) {
23872
24046
  return {
23873
24047
  descriptionState: descriptionState(body),
23874
24048
  verificationEvidence: matchesAnyPattern(text, VERIFICATION_PATTERNS),
23875
- reproductionEvidence: matchesAnyPattern(text, REPRODUCTION_PATTERNS)
24049
+ reproductionEvidence: matchesAnyPattern(text, REPRODUCTION_PATTERNS),
24050
+ screenshotEvidence: matchesAnyPattern(text, SCREENSHOT_PATTERNS),
24051
+ changelogEvidence: matchesAnyPattern(text, CHANGELOG_PATTERNS),
24052
+ permissionRationaleEvidence: matchesAnyPattern(text, PERMISSION_RATIONALE_PATTERNS)
23876
24053
  };
23877
24054
  }
23878
24055
  function descriptionState(body) {
@@ -24053,8 +24230,11 @@ function analyzeDiffFiles(files, config, pullRequest) {
24053
24230
  findings.push(...analyzeSensitivePaths(activeFiles, config));
24054
24231
  findings.push(...analyzeMissingTests(activeFiles, config, pullRequest));
24055
24232
  findings.push(...analyzePullRequestEvidence(activeFiles, pullRequest));
24233
+ findings.push(...analyzeEvidenceContracts(activeFiles, config, pullRequest));
24056
24234
  findings.push(...analyzeDependencyChanges(activeFiles, config));
24057
24235
  findings.push(...analyzeWorkflowPermissions(activeFiles));
24236
+ findings.push(...analyzeWorkflowDangerousTriggers(activeFiles));
24237
+ findings.push(...analyzeWorkflowUntrustedCheckout(activeFiles));
24058
24238
  findings.push(...analyzeMcpConfigs(activeFiles));
24059
24239
  if (config.secrets.enabled) {
24060
24240
  for (const file of activeFiles) {
@@ -24074,7 +24254,10 @@ function summarizeDiffFiles(files, config, pullRequest) {
24074
24254
  sensitiveFilesChanged: activeFiles.filter((file) => matchesAny(file.path, config.sensitivePaths)).length,
24075
24255
  pullRequestDescription: evidence.descriptionState,
24076
24256
  verificationEvidence: evidence.verificationEvidence,
24077
- reproductionEvidence: evidence.reproductionEvidence
24257
+ reproductionEvidence: evidence.reproductionEvidence,
24258
+ screenshotEvidence: evidence.screenshotEvidence,
24259
+ changelogEvidence: evidence.changelogEvidence,
24260
+ permissionRationaleEvidence: evidence.permissionRationaleEvidence
24078
24261
  };
24079
24262
  }
24080
24263
  function analyzeChangeSize(files) {
@@ -24179,55 +24362,187 @@ function analyzePullRequestEvidence(files, pullRequest) {
24179
24362
  }
24180
24363
  return findings;
24181
24364
  }
24182
- function analyzeDependencyChanges(files, config) {
24183
- if (!config.dependencies.flagNewPackages) {
24365
+ function analyzeEvidenceContracts(files, config, pullRequest) {
24366
+ if (config.evidence.contracts.length === 0) {
24184
24367
  return [];
24185
24368
  }
24369
+ const evidence = analyzeEvidence(pullRequest);
24370
+ const hasTestChanges = files.some((file) => isTestPath(file.path));
24186
24371
  const findings = [];
24187
- for (const file of files.filter((candidate) => isDependencyManifest(candidate.path))) {
24188
- const addedDependencyLines = file.addedLines.filter((line) => isDependencyLikeAddition(file.path, line.value.trim()));
24189
- if (addedDependencyLines.length === 0) {
24372
+ for (const contract of config.evidence.contracts) {
24373
+ const matchedFiles = files.filter((file) => matchesAny(file.path, contract.paths));
24374
+ if (matchedFiles.length === 0) {
24375
+ continue;
24376
+ }
24377
+ const missingRequirements = contract.requires.filter((requirement) => !hasEvidenceRequirement(requirement, evidence, hasTestChanges));
24378
+ if (missingRequirements.length === 0) {
24190
24379
  continue;
24191
24380
  }
24192
24381
  findings.push({
24193
- ruleId: "dependency-added",
24194
- title: "Dependency manifest changed",
24195
- message: `${file.path} adds or changes dependency-like entries.`,
24196
- severity: "medium",
24197
- path: file.path,
24198
- evidence: addedDependencyLines.slice(0, 5).map(formatEvidenceLine),
24199
- recommendation: "Verify package names, licenses, provenance, and whether the lockfile matches the intended dependency change."
24382
+ ruleId: `evidence-contract:${contract.id}`,
24383
+ title: contract.title ?? "Evidence contract missing",
24384
+ message: `Changed files match evidence contract "${contract.id}", but missing required evidence: ${missingRequirements
24385
+ .map(formatEvidenceRequirement)
24386
+ .join(", ")}.`,
24387
+ severity: contract.severity,
24388
+ path: matchedFiles[0]?.path,
24389
+ evidence: [
24390
+ `matched files: ${matchedFiles.slice(0, 5).map((file) => file.path).join(", ")}`,
24391
+ `missing evidence: ${missingRequirements.map(formatEvidenceRequirement).join(", ")}`
24392
+ ],
24393
+ recommendation: contract.recommendation ??
24394
+ "Ask the contributor to add the missing evidence before spending deep review time."
24200
24395
  });
24201
24396
  }
24202
24397
  return findings;
24203
24398
  }
24399
+ function hasEvidenceRequirement(requirement, evidence, hasTestChanges) {
24400
+ if (requirement === "verification") {
24401
+ return evidence.verificationEvidence || hasTestChanges;
24402
+ }
24403
+ if (requirement === "reproduction") {
24404
+ return evidence.reproductionEvidence;
24405
+ }
24406
+ if (requirement === "screenshot") {
24407
+ return evidence.screenshotEvidence;
24408
+ }
24409
+ if (requirement === "changelog") {
24410
+ return evidence.changelogEvidence;
24411
+ }
24412
+ return evidence.permissionRationaleEvidence;
24413
+ }
24414
+ function analyzeDependencyChanges(files, config) {
24415
+ const findings = [];
24416
+ for (const file of files.filter((candidate) => isDependencyManifest(candidate.path))) {
24417
+ if (config.dependencies.flagNewPackages) {
24418
+ const addedDependencyLines = file.addedLines.filter((line) => isDependencyLikeAddition(file.path, line.value.trim()));
24419
+ if (addedDependencyLines.length > 0) {
24420
+ findings.push({
24421
+ ruleId: "dependency-added",
24422
+ title: "Dependency manifest changed",
24423
+ message: `${file.path} adds or changes dependency-like entries.`,
24424
+ severity: "medium",
24425
+ path: file.path,
24426
+ evidence: addedDependencyLines.slice(0, 5).map(formatEvidenceLine),
24427
+ recommendation: "Verify package names, licenses, provenance, and whether the lockfile matches the intended dependency change."
24428
+ });
24429
+ }
24430
+ }
24431
+ if (config.dependencies.flagMajorUpgrades) {
24432
+ findings.push(...analyzeMajorDependencyUpgrades(file));
24433
+ }
24434
+ if (config.dependencies.flagLifecycleScripts) {
24435
+ findings.push(...analyzeLifecycleScripts(file));
24436
+ }
24437
+ }
24438
+ return findings;
24439
+ }
24204
24440
  function isDependencyLikeAddition(path, line) {
24441
+ return parseDependencyLine(path, { value: line }) !== undefined;
24442
+ }
24443
+ function analyzeMajorDependencyUpgrades(file) {
24444
+ const removedDependencies = new Map();
24445
+ for (const line of file.removedLines) {
24446
+ const parsed = parseDependencyLine(file.path, line);
24447
+ if (parsed) {
24448
+ removedDependencies.set(parsed.name, parsed);
24449
+ }
24450
+ }
24451
+ const upgrades = file.addedLines
24452
+ .map((line) => parseDependencyLine(file.path, line))
24453
+ .filter((line) => Boolean(line))
24454
+ .map((added) => ({ added, removed: removedDependencies.get(added.name) }))
24455
+ .filter((change) => change.removed !== undefined && isMajorUpgrade(change.removed.version, change.added.version));
24456
+ if (upgrades.length === 0) {
24457
+ return [];
24458
+ }
24459
+ return [
24460
+ {
24461
+ ruleId: "dependency-major-upgrade",
24462
+ title: "Dependency major version upgrade",
24463
+ message: `${file.path} upgrades one or more dependencies across a major version boundary.`,
24464
+ severity: "medium",
24465
+ path: file.path,
24466
+ evidence: upgrades.slice(0, 5).map(({ added, removed }) => `${added.line.lineNumber ? `line ${added.line.lineNumber}: ` : ""}${added.name} ${removed.version} -> ${added.version}`),
24467
+ recommendation: "Check changelogs, migration notes, peer dependency impact, and whether tests cover the upgraded package surface."
24468
+ }
24469
+ ];
24470
+ }
24471
+ function analyzeLifecycleScripts(file) {
24472
+ if (!file.path.endsWith("package.json")) {
24473
+ return [];
24474
+ }
24475
+ const lifecycleLines = file.addedLines.filter((line) => /^"(?:preinstall|install|postinstall|prepare|prepublish|prepublishOnly)"\s*:/.test(line.value.trim()));
24476
+ if (lifecycleLines.length === 0) {
24477
+ return [];
24478
+ }
24479
+ return [
24480
+ {
24481
+ ruleId: "dependency-lifecycle-script",
24482
+ title: "Package lifecycle script changed",
24483
+ message: `${file.path} adds or changes npm lifecycle scripts that may run during install or publish.`,
24484
+ severity: "high",
24485
+ path: file.path,
24486
+ evidence: lifecycleLines.slice(0, 5).map(formatEvidenceLine),
24487
+ recommendation: "Review whether the lifecycle script is necessary, whether it downloads or executes remote code, and whether it can affect consumers during install."
24488
+ }
24489
+ ];
24490
+ }
24491
+ function parseDependencyLine(path, line) {
24492
+ const value = line.value.trim();
24205
24493
  if (path.endsWith("package.json")) {
24206
- const match = /^"(?<key>[@A-Za-z0-9_.-]+)"\s*:\s*"(?<value>[^"]*)"/.exec(line);
24494
+ const match = /^"(?<key>[@A-Za-z0-9_.-]+)"\s*:\s*"(?<version>[^"]*)"/.exec(value);
24207
24495
  if (!match?.groups) {
24208
- return false;
24496
+ return undefined;
24209
24497
  }
24210
- const { key, value } = match.groups;
24211
- if (!key || !value || PACKAGE_JSON_NON_DEPENDENCY_KEYS.has(key)) {
24212
- return false;
24498
+ const { key, version } = match.groups;
24499
+ if (!key || !version || PACKAGE_JSON_NON_DEPENDENCY_KEYS.has(key)) {
24500
+ return undefined;
24501
+ }
24502
+ if (!/^(?:\^|~|>=?|<=?|\d|workspace:|npm:|file:|link:|portal:|git\+|https?:|github:)/.test(version)) {
24503
+ return undefined;
24213
24504
  }
24214
- return /^(?:\^|~|>=?|<=?|\d|workspace:|npm:|file:|link:|portal:|git\+|https?:|github:)/.test(value);
24505
+ return { name: key, version, line };
24215
24506
  }
24216
24507
  if (path.endsWith("requirements.txt")) {
24217
- return /^[A-Za-z0-9_.-]+(?:\[.*\])?\s*(?:==|>=|<=|~=|>|<)\s*[^#\s]+/.test(line);
24508
+ const match = /^(?<name>[A-Za-z0-9_.-]+)(?:\[.*\])?\s*(?:==|>=|<=|~=|>|<)\s*(?<version>[^#\s]+)/.exec(value);
24509
+ return match?.groups?.name && match.groups.version
24510
+ ? { name: match.groups.name, version: match.groups.version, line }
24511
+ : undefined;
24218
24512
  }
24219
24513
  if (path.endsWith("pyproject.toml") || path.endsWith("Cargo.toml")) {
24220
- return /^[A-Za-z0-9_.-]+\s*=\s*"(?:\^|~|>=?|<=?|\d|workspace:|path\s*=|git\s*=)[^"]*"/.test(line);
24514
+ const match = /^(?<name>[A-Za-z0-9_.-]+)\s*=\s*"(?<version>(?:\^|~|>=?|<=?|\d|workspace:|path\s*=|git\s*=)[^"]*)"/.exec(value);
24515
+ return match?.groups?.name && match.groups.version
24516
+ ? { name: match.groups.name, version: match.groups.version, line }
24517
+ : undefined;
24221
24518
  }
24222
24519
  if (path.endsWith("go.mod")) {
24223
- return /^(?:require\s+)?[A-Za-z0-9_.\-/]+\s+v\d+\.\d+\.\d+/.test(line);
24520
+ const match = /^(?:require\s+)?(?<name>[A-Za-z0-9_.\-/]+)\s+(?<version>v\d+\.\d+\.\d+)/.exec(value);
24521
+ return match?.groups?.name && match.groups.version
24522
+ ? { name: match.groups.name, version: match.groups.version, line }
24523
+ : undefined;
24224
24524
  }
24225
- return false;
24525
+ return undefined;
24526
+ }
24527
+ function isMajorUpgrade(previousVersion, nextVersion) {
24528
+ const previousMajor = extractMajorVersion(previousVersion);
24529
+ const nextMajor = extractMajorVersion(nextVersion);
24530
+ return previousMajor !== undefined && nextMajor !== undefined && nextMajor > previousMajor;
24531
+ }
24532
+ function extractMajorVersion(version) {
24533
+ const normalized = version
24534
+ .replace(/^workspace:/, "")
24535
+ .replace(/^npm:[^@]+@/, "")
24536
+ .replace(/^[~^<>=\s]+/, "")
24537
+ .replace(/^v/, "");
24538
+ const match = /(?<major>\d+)\.\d+\.\d+/.exec(normalized);
24539
+ const major = match?.groups?.major ? Number(match.groups.major) : undefined;
24540
+ return major !== undefined && Number.isInteger(major) ? major : undefined;
24226
24541
  }
24227
24542
  function analyzeWorkflowPermissions(files) {
24228
24543
  const findings = [];
24229
24544
  for (const file of files.filter((candidate) => isWorkflowPath(candidate.path))) {
24230
- const permissionLines = file.addedLines.filter((line) => /permissions:|contents:\s*write|packages:\s*write|id-token:\s*write|pull-requests:\s*write/.test(line.value.trim()));
24545
+ const permissionLines = file.addedLines.filter((line) => isRiskyWorkflowPermissionLine(line.value));
24231
24546
  if (permissionLines.length === 0) {
24232
24547
  continue;
24233
24548
  }
@@ -24243,6 +24558,59 @@ function analyzeWorkflowPermissions(files) {
24243
24558
  }
24244
24559
  return findings;
24245
24560
  }
24561
+ function isRiskyWorkflowPermissionLine(value) {
24562
+ const line = value.trim();
24563
+ if (/^permissions:\s*write-all\b/i.test(line)) {
24564
+ return true;
24565
+ }
24566
+ return /^(?:actions|attestations|checks|contents|deployments|discussions|id-token|issues|models|packages|pages|pull-requests|repository-projects|security-events|statuses):\s*write\b/i.test(line);
24567
+ }
24568
+ function analyzeWorkflowDangerousTriggers(files) {
24569
+ const findings = [];
24570
+ for (const file of files.filter((candidate) => isWorkflowPath(candidate.path))) {
24571
+ const triggerLines = file.addedLines.filter((line) => /\bpull_request_target\b/.test(line.value.trim()));
24572
+ if (triggerLines.length === 0) {
24573
+ continue;
24574
+ }
24575
+ findings.push({
24576
+ ruleId: "workflow-dangerous-trigger",
24577
+ title: "Workflow uses pull_request_target",
24578
+ message: `${file.path} adds pull_request_target, which runs with base repository context and can be risky for untrusted PRs.`,
24579
+ severity: "high",
24580
+ path: file.path,
24581
+ evidence: triggerLines.slice(0, 5).map(formatEvidenceLine),
24582
+ recommendation: "Confirm the workflow does not check out or execute untrusted PR code with privileged tokens or write permissions."
24583
+ });
24584
+ }
24585
+ return findings;
24586
+ }
24587
+ function analyzeWorkflowUntrustedCheckout(files) {
24588
+ const findings = [];
24589
+ for (const file of files.filter((candidate) => isWorkflowPath(candidate.path))) {
24590
+ const headCheckoutLines = file.addedLines.filter((line) => isPullRequestHeadCheckoutLine(line.value));
24591
+ if (headCheckoutLines.length === 0) {
24592
+ continue;
24593
+ }
24594
+ const hasPullRequestTarget = file.addedLines.some((line) => /\bpull_request_target\b/.test(line.value.trim()));
24595
+ findings.push({
24596
+ ruleId: "workflow-untrusted-checkout",
24597
+ title: "Workflow checks out pull request head",
24598
+ message: hasPullRequestTarget
24599
+ ? `${file.path} combines pull_request_target with pull request head checkout references.`
24600
+ : `${file.path} checks out pull request head references; review the job privilege boundary before merging.`,
24601
+ severity: hasPullRequestTarget ? "high" : "medium",
24602
+ path: file.path,
24603
+ evidence: headCheckoutLines.slice(0, 5).map(formatEvidenceLine),
24604
+ recommendation: "Avoid running untrusted PR code with write tokens, repository secrets, or privileged pull_request_target context."
24605
+ });
24606
+ }
24607
+ return findings;
24608
+ }
24609
+ function isPullRequestHeadCheckoutLine(value) {
24610
+ const line = value.trim();
24611
+ return (/\bgithub\.head_ref\b/.test(line) ||
24612
+ /\bgithub\.event\.pull_request\.head(?:\.sha|\.ref|\.repo\.full_name)?\b/.test(line));
24613
+ }
24246
24614
  function analyzeMcpConfigs(files) {
24247
24615
  const findings = [];
24248
24616
  for (const file of files.filter((candidate) => isMcpConfigPath(candidate.path))) {
@@ -24266,6 +24634,9 @@ function formatEvidenceLine(line) {
24266
24634
  const value = line.value.trim();
24267
24635
  return line.lineNumber ? `line ${line.lineNumber}: ${value}` : value;
24268
24636
  }
24637
+ function formatEvidenceRequirement(requirement) {
24638
+ return requirement;
24639
+ }
24269
24640
  function sensitivePathSeverity(path) {
24270
24641
  if (matchesAny(path, [
24271
24642
  "**/.env*",
@@ -24336,9 +24707,13 @@ function calculateEvidenceScore(summary, findings) {
24336
24707
  "sensitive-path",
24337
24708
  "missing-tests",
24338
24709
  "dependency-added",
24710
+ "dependency-major-upgrade",
24711
+ "dependency-lifecycle-script",
24339
24712
  "workflow-permission-change",
24713
+ "workflow-dangerous-trigger",
24714
+ "workflow-untrusted-checkout",
24340
24715
  "mcp-credential-risk"
24341
- ].includes(finding.ruleId));
24716
+ ].includes(finding.ruleId) || finding.ruleId.startsWith("evidence-contract:"));
24342
24717
  if (needsVerificationEvidence && !summary.verificationEvidence) {
24343
24718
  addDeduction("missing-verification", 20, "No test or manual verification evidence was found.");
24344
24719
  }
@@ -24352,6 +24727,12 @@ function calculateEvidenceScore(summary, findings) {
24352
24727
  else if (finding.ruleId === "workflow-permission-change") {
24353
24728
  addDeduction("workflow-permission-change", 25, "Workflow permission changes need deliberate review.");
24354
24729
  }
24730
+ else if (finding.ruleId === "workflow-dangerous-trigger") {
24731
+ addDeduction("workflow-dangerous-trigger", 30, "pull_request_target workflows need privileged trigger review.");
24732
+ }
24733
+ else if (finding.ruleId === "workflow-untrusted-checkout") {
24734
+ addDeduction("workflow-untrusted-checkout", finding.severity === "high" ? 30 : 18, "Workflow checkout of pull request head needs privilege-boundary review.");
24735
+ }
24355
24736
  else if (finding.ruleId === "mcp-credential-risk") {
24356
24737
  addDeduction("mcp-credential-risk", 25, "MCP configuration expands local execution or credential risk.");
24357
24738
  }
@@ -24366,6 +24747,15 @@ function calculateEvidenceScore(summary, findings) {
24366
24747
  else if (finding.ruleId === "dependency-added") {
24367
24748
  addDeduction("dependency-change", 10, "Dependency manifest changed.");
24368
24749
  }
24750
+ else if (finding.ruleId === "dependency-major-upgrade") {
24751
+ addDeduction("dependency-major-upgrade", 15, "Dependency major version changed.");
24752
+ }
24753
+ else if (finding.ruleId === "dependency-lifecycle-script") {
24754
+ addDeduction("dependency-lifecycle-script", 25, "Package lifecycle scripts can run during install or publish.");
24755
+ }
24756
+ else if (finding.ruleId.startsWith("evidence-contract:")) {
24757
+ addDeduction("evidence-contract-missing", finding.severity === "high" ? 25 : 15, "Configured evidence contract was not satisfied.");
24758
+ }
24369
24759
  else if (finding.ruleId === "missing-tests") {
24370
24760
  addDeduction("missing-tests", finding.severity === "medium" ? 20 : 12, "Code changed without test changes or verification notes.");
24371
24761
  }
@@ -24394,6 +24784,15 @@ function collectEvidenceStrengths(summary) {
24394
24784
  if (summary.reproductionEvidence) {
24395
24785
  strengths.push("Reproduction or before/after context was found.");
24396
24786
  }
24787
+ if (summary.screenshotEvidence) {
24788
+ strengths.push("Screenshot or visual evidence was found.");
24789
+ }
24790
+ if (summary.changelogEvidence) {
24791
+ strengths.push("Changelog or migration evidence was found.");
24792
+ }
24793
+ if (summary.permissionRationaleEvidence) {
24794
+ strengths.push("Permission rationale evidence was found.");
24795
+ }
24397
24796
  if (summary.testFilesChanged > 0) {
24398
24797
  strengths.push("Test files changed with the PR.");
24399
24798
  }
@@ -24417,11 +24816,15 @@ function gradeEvidenceScore(value) {
24417
24816
  function calculateReviewDecision(risk, evidenceScore, findings) {
24418
24817
  const hasBlockingSecurityFinding = findings.some((finding) => finding.ruleId.startsWith("secret-detected") ||
24419
24818
  finding.ruleId === "workflow-permission-change" ||
24819
+ finding.ruleId === "workflow-dangerous-trigger" ||
24820
+ (finding.ruleId === "workflow-untrusted-checkout" && finding.severity === "high") ||
24821
+ finding.ruleId === "dependency-lifecycle-script" ||
24420
24822
  finding.ruleId === "mcp-credential-risk");
24421
24823
  if (hasBlockingSecurityFinding || evidenceScore.value < 50 || risk === "high") {
24422
24824
  return "block-merge";
24423
24825
  }
24424
- if (evidenceScore.value < 70 || findings.some((finding) => finding.ruleId === "missing-tests" || finding.ruleId === "thin-pr-description")) {
24826
+ if (evidenceScore.value < 70 ||
24827
+ findings.some((finding) => finding.ruleId === "missing-tests" || finding.ruleId === "thin-pr-description")) {
24425
24828
  return "needs-evidence";
24426
24829
  }
24427
24830
  if (risk === "medium") {
@@ -24547,6 +24950,17 @@ function reviewActionsForFinding(finding) {
24547
24950
  }
24548
24951
  ];
24549
24952
  }
24953
+ if (finding.ruleId.startsWith("evidence-contract:")) {
24954
+ return [
24955
+ {
24956
+ actionId: "satisfy-evidence-contract",
24957
+ title: "Ask for the configured evidence contract to be satisfied.",
24958
+ detail: "The PR matches a repository-defined evidence contract but is missing required proof in the PR description.",
24959
+ priority: finding.severity === "high" ? "high" : "medium",
24960
+ relatedRuleIds: [finding.ruleId]
24961
+ }
24962
+ ];
24963
+ }
24550
24964
  if (finding.ruleId === "workflow-permission-change") {
24551
24965
  return [
24552
24966
  {
@@ -24558,6 +24972,39 @@ function reviewActionsForFinding(finding) {
24558
24972
  }
24559
24973
  ];
24560
24974
  }
24975
+ if (finding.ruleId === "workflow-dangerous-trigger") {
24976
+ return [
24977
+ {
24978
+ actionId: "review-privileged-pr-trigger",
24979
+ title: "Review privileged pull_request_target usage.",
24980
+ detail: "Confirm the workflow does not execute untrusted PR code with write tokens, secrets, or repository permissions.",
24981
+ priority: "high",
24982
+ relatedRuleIds: [finding.ruleId]
24983
+ }
24984
+ ];
24985
+ }
24986
+ if (finding.ruleId === "workflow-untrusted-checkout") {
24987
+ return [
24988
+ {
24989
+ actionId: "review-untrusted-checkout",
24990
+ title: "Review pull request head checkout privileges.",
24991
+ detail: "Confirm the job does not run untrusted PR code with write tokens, repository secrets, or pull_request_target privileges.",
24992
+ priority: finding.severity === "high" ? "high" : "medium",
24993
+ relatedRuleIds: [finding.ruleId]
24994
+ }
24995
+ ];
24996
+ }
24997
+ if (finding.ruleId === "dependency-lifecycle-script") {
24998
+ return [
24999
+ {
25000
+ actionId: "review-package-lifecycle-script",
25001
+ title: "Review package lifecycle scripts before merge.",
25002
+ detail: "Check whether install, postinstall, prepare, or publish scripts can execute unexpected code for contributors or consumers.",
25003
+ priority: "high",
25004
+ relatedRuleIds: [finding.ruleId]
25005
+ }
25006
+ ];
25007
+ }
24561
25008
  if (finding.ruleId === "mcp-credential-risk") {
24562
25009
  return [
24563
25010
  {
@@ -24591,6 +25038,17 @@ function reviewActionsForFinding(finding) {
24591
25038
  }
24592
25039
  ];
24593
25040
  }
25041
+ if (finding.ruleId === "dependency-major-upgrade") {
25042
+ return [
25043
+ {
25044
+ actionId: "review-major-dependency-upgrade",
25045
+ title: "Review major dependency upgrade impact.",
25046
+ detail: "Check changelogs, migration notes, peer dependencies, and whether tests cover the upgraded surface.",
25047
+ priority: "medium",
25048
+ relatedRuleIds: [finding.ruleId]
25049
+ }
25050
+ ];
25051
+ }
24594
25052
  if (finding.ruleId === "sensitive-path") {
24595
25053
  return [
24596
25054
  {
@@ -24664,7 +25122,7 @@ const build_program = new Command();
24664
25122
  build_program
24665
25123
  .name("proof-pr")
24666
25124
  .description("Review pull request evidence, scope, and safety before maintainers spend time on it.")
24667
- .version("0.1.5");
25125
+ .version("0.1.7");
24668
25126
  build_program
24669
25127
  .command("scan", { isDefault: true })
24670
25128
  .description("Scan a git diff and print a ProofPR report.")
@@ -24708,6 +25166,35 @@ build_program
24708
25166
  await writeIfMissing(options.workflowPath, renderWorkflowTemplate(options.failOn), options.force);
24709
25167
  process.stdout.write(`ProofPR initialized:\n- ${options.configPath}\n- ${options.workflowPath}\n`);
24710
25168
  });
25169
+ build_program
25170
+ .command("benchmark")
25171
+ .description("Run ProofPR benchmark cases and compare expected risk/finding output.")
25172
+ .option("--cases <dir>", "Directory containing benchmark case JSON files.", "benchmarks/cases")
25173
+ .option("--format <format>", "Output format: text, markdown, or json.", parseBenchmarkFormat, "text")
25174
+ .option("--output <path>", "Write benchmark output to a file instead of stdout.")
25175
+ .action(async (options) => {
25176
+ const report = await runBenchmarks(options.cases);
25177
+ let output;
25178
+ if (options.format === "json") {
25179
+ output = `${JSON.stringify(report, null, 2)}\n`;
25180
+ }
25181
+ else if (options.format === "markdown") {
25182
+ output = renderBenchmarkMarkdown(report);
25183
+ }
25184
+ else {
25185
+ output = renderBenchmarkText(report);
25186
+ }
25187
+ if (options.output) {
25188
+ await writeOutput(options.output, output);
25189
+ process.stdout.write(`ProofPR benchmark report written to ${options.output}\n`);
25190
+ }
25191
+ else {
25192
+ process.stdout.write(output);
25193
+ }
25194
+ if (report.results.some((result) => !result.passed)) {
25195
+ process.exitCode = 1;
25196
+ }
25197
+ });
24711
25198
  build_program.parseAsync(process.argv).catch((error) => {
24712
25199
  const message = error instanceof Error ? error.message : String(error);
24713
25200
  process.stderr.write(`ProofPR failed: ${message}\n`);
@@ -24734,6 +25221,10 @@ async function writeIfMissing(path, contents, force) {
24734
25221
  await (0,promises_namespaceObject.mkdir)((0,external_node_path_.dirname)(path), { recursive: true });
24735
25222
  await (0,promises_namespaceObject.writeFile)(path, contents, "utf8");
24736
25223
  }
25224
+ async function writeOutput(path, contents) {
25225
+ await (0,promises_namespaceObject.mkdir)((0,external_node_path_.dirname)(path), { recursive: true });
25226
+ await (0,promises_namespaceObject.writeFile)(path, contents, "utf8");
25227
+ }
24737
25228
  async function pathExists(path) {
24738
25229
  try {
24739
25230
  await (0,promises_namespaceObject.access)(path);
@@ -24792,6 +25283,19 @@ comment:
24792
25283
  # dependencies:
24793
25284
  # flagNewPackages: true
24794
25285
  # flagMajorUpgrades: true
25286
+ # flagLifecycleScripts: true
25287
+ #
25288
+ # evidence:
25289
+ # contracts:
25290
+ # - id: ui-screenshot
25291
+ # title: UI changes need screenshots
25292
+ # paths:
25293
+ # - "src/components/**"
25294
+ # - "app/**"
25295
+ # requires:
25296
+ # - screenshot
25297
+ # - verification
25298
+ # severity: medium
24795
25299
  `;
24796
25300
  }
24797
25301
  function renderWorkflowTemplate(failOn) {
@@ -24810,7 +25314,7 @@ jobs:
24810
25314
  runs-on: ubuntu-latest
24811
25315
  steps:
24812
25316
  - uses: actions/checkout@v4
24813
- - uses: linsk27/proof-pr@v0.1.5
25317
+ - uses: linsk27/proof-pr@v0.1.7
24814
25318
  with:
24815
25319
  fail-on: ${failOn}
24816
25320
  comment: "true"
@@ -24826,12 +25330,168 @@ function renderOutput(result, format, locale) {
24826
25330
  }
24827
25331
  return renderMarkdownReport(result, locale);
24828
25332
  }
25333
+ async function runBenchmarks(casesDir) {
25334
+ const root = (0,external_node_path_.resolve)(casesDir);
25335
+ const entries = await (0,promises_namespaceObject.readdir)(root, { withFileTypes: true });
25336
+ const caseFiles = entries
25337
+ .filter((entry) => entry.isFile() && entry.name.endsWith(".json"))
25338
+ .map((entry) => (0,external_node_path_.resolve)(root, entry.name))
25339
+ .sort();
25340
+ const results = [];
25341
+ for (const caseFile of caseFiles) {
25342
+ const testCase = JSON.parse(await (0,promises_namespaceObject.readFile)(caseFile, "utf8"));
25343
+ const diffText = await (0,promises_namespaceObject.readFile)((0,external_node_path_.resolve)((0,external_node_path_.dirname)(caseFile), testCase.diffFile), "utf8");
25344
+ const result = scanDiff(diffText, {
25345
+ config: testCase.config,
25346
+ pullRequest: testCase.pullRequest
25347
+ });
25348
+ const actualFindings = result.findings.map((finding) => finding.ruleId);
25349
+ const failures = [];
25350
+ if (testCase.expect.risk && result.risk !== testCase.expect.risk) {
25351
+ failures.push(`expected risk ${testCase.expect.risk}, got ${result.risk}`);
25352
+ }
25353
+ if (testCase.expect.reviewDecision && result.reviewDecision !== testCase.expect.reviewDecision) {
25354
+ failures.push(`expected review decision ${testCase.expect.reviewDecision}, got ${result.reviewDecision}`);
25355
+ }
25356
+ for (const expectedFinding of testCase.expect.findings ?? []) {
25357
+ if (!matchesFindingExpectation(actualFindings, expectedFinding)) {
25358
+ failures.push(`expected finding ${expectedFinding}`);
25359
+ }
25360
+ }
25361
+ for (const absentFinding of testCase.expect.absentFindings ?? []) {
25362
+ if (matchesFindingExpectation(actualFindings, absentFinding)) {
25363
+ failures.push(`unexpected finding ${absentFinding}`);
25364
+ }
25365
+ }
25366
+ results.push({
25367
+ id: testCase.id,
25368
+ title: testCase.title,
25369
+ category: testCase.category ?? "uncategorized",
25370
+ passed: failures.length === 0,
25371
+ failures,
25372
+ actual: {
25373
+ risk: result.risk,
25374
+ reviewDecision: result.reviewDecision,
25375
+ findings: actualFindings
25376
+ }
25377
+ });
25378
+ }
25379
+ return {
25380
+ summary: summarizeBenchmarkResults(results),
25381
+ results
25382
+ };
25383
+ }
25384
+ function summarizeBenchmarkResults(results) {
25385
+ const passed = results.filter((result) => result.passed).length;
25386
+ const categories = new Map();
25387
+ const findingCounts = new Map();
25388
+ for (const result of results) {
25389
+ const categoryResults = categories.get(result.category) ?? [];
25390
+ categoryResults.push(result);
25391
+ categories.set(result.category, categoryResults);
25392
+ for (const finding of new Set(result.actual.findings)) {
25393
+ findingCounts.set(finding, (findingCounts.get(finding) ?? 0) + 1);
25394
+ }
25395
+ }
25396
+ return {
25397
+ total: results.length,
25398
+ passed,
25399
+ failed: results.length - passed,
25400
+ passRate: ratio(passed, results.length),
25401
+ categories: [...categories.entries()]
25402
+ .sort(([left], [right]) => left.localeCompare(right))
25403
+ .map(([category, items]) => {
25404
+ const categoryPassed = items.filter((item) => item.passed).length;
25405
+ return {
25406
+ category,
25407
+ total: items.length,
25408
+ passed: categoryPassed,
25409
+ failed: items.length - categoryPassed,
25410
+ passRate: ratio(categoryPassed, items.length)
25411
+ };
25412
+ }),
25413
+ findingCounts: [...findingCounts.entries()]
25414
+ .sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]))
25415
+ .map(([ruleId, count]) => ({ ruleId, count }))
25416
+ };
25417
+ }
25418
+ function renderBenchmarkText(report) {
25419
+ const lines = [
25420
+ "ProofPR benchmark",
25421
+ "",
25422
+ `Summary: ${report.summary.passed}/${report.summary.total} passed (${formatPercent(report.summary.passRate)})`,
25423
+ ""
25424
+ ];
25425
+ lines.push("Categories:");
25426
+ for (const category of report.summary.categories) {
25427
+ lines.push(`- ${category.category}: ${category.passed}/${category.total} passed (${formatPercent(category.passRate)})`);
25428
+ }
25429
+ if (report.summary.findingCounts.length > 0) {
25430
+ lines.push("", "Finding coverage:");
25431
+ for (const item of report.summary.findingCounts) {
25432
+ lines.push(`- ${item.ruleId}: ${item.count}`);
25433
+ }
25434
+ }
25435
+ lines.push("");
25436
+ for (const result of report.results) {
25437
+ lines.push(`${result.passed ? "PASS" : "FAIL"} ${result.id}${result.title ? ` - ${result.title}` : ""}`);
25438
+ for (const failure of result.failures) {
25439
+ lines.push(` - ${failure}`);
25440
+ }
25441
+ }
25442
+ lines.push("");
25443
+ return lines.join("\n");
25444
+ }
25445
+ function renderBenchmarkMarkdown(report) {
25446
+ const lines = [
25447
+ "# ProofPR Benchmark",
25448
+ "",
25449
+ `**Summary:** ${report.summary.passed}/${report.summary.total} passed (${formatPercent(report.summary.passRate)})`,
25450
+ "",
25451
+ "## Categories",
25452
+ "",
25453
+ "| Category | Passed | Total | Pass rate |",
25454
+ "| --- | ---: | ---: | ---: |"
25455
+ ];
25456
+ for (const category of report.summary.categories) {
25457
+ lines.push(`| ${category.category} | ${category.passed} | ${category.total} | ${formatPercent(category.passRate)} |`);
25458
+ }
25459
+ lines.push("", "## Finding Coverage", "", "| Rule | Cases |", "| --- | ---: |");
25460
+ for (const item of report.summary.findingCounts) {
25461
+ lines.push(`| \`${item.ruleId}\` | ${item.count} |`);
25462
+ }
25463
+ lines.push("", "## Cases", "", "| Result | Case | Category | Actual risk | Gate |", "| --- | --- | --- | --- | --- |");
25464
+ for (const result of report.results) {
25465
+ lines.push(`| ${result.passed ? "PASS" : "FAIL"} | \`${result.id}\` | ${result.category} | ${result.actual.risk} | ${result.actual.reviewDecision} |`);
25466
+ }
25467
+ lines.push("");
25468
+ return lines.join("\n");
25469
+ }
25470
+ function ratio(value, total) {
25471
+ return total === 0 ? 0 : value / total;
25472
+ }
25473
+ function formatPercent(value) {
25474
+ return `${Math.round(value * 100)}%`;
25475
+ }
25476
+ function matchesFindingExpectation(actualFindings, expected) {
25477
+ if (expected.endsWith("*")) {
25478
+ const prefix = expected.slice(0, -1);
25479
+ return actualFindings.some((finding) => finding.startsWith(prefix));
25480
+ }
25481
+ return actualFindings.includes(expected);
25482
+ }
24829
25483
  function parseFormat(value) {
24830
25484
  if (value === "json" || value === "markdown" || value === "sarif") {
24831
25485
  return value;
24832
25486
  }
24833
25487
  throw new InvalidArgumentError("format must be one of: markdown, json, sarif");
24834
25488
  }
25489
+ function parseBenchmarkFormat(value) {
25490
+ if (value === "text" || value === "json" || value === "markdown") {
25491
+ return value;
25492
+ }
25493
+ throw new InvalidArgumentError("benchmark format must be one of: text, markdown, json");
25494
+ }
24835
25495
  function parseFailLevel(value) {
24836
25496
  if (value === "low" || value === "medium" || value === "high" || value === "never") {
24837
25497
  return value;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "proof-pr",
3
- "version": "0.1.5",
3
+ "version": "0.1.7",
4
4
  "description": "CLI for ProofPR, a maintainer-focused pull request evidence scanner.",
5
5
  "license": "MIT",
6
6
  "type": "module",