proof-pr 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -23111,7 +23111,15 @@ function preprocess(fn, schema) {
23111
23111
 
23112
23112
 
23113
23113
  const riskLevelSchema = schemas_enum(["low", "medium", "high"]);
23114
+ const findingSeveritySchema = schemas_enum(["info", "low", "medium", "high"]);
23114
23115
  const localeSchema = schemas_enum(["en", "zh-CN"]);
23116
+ const evidenceRequirementSchema = schemas_enum([
23117
+ "verification",
23118
+ "reproduction",
23119
+ "screenshot",
23120
+ "changelog",
23121
+ "permission-rationale"
23122
+ ]);
23115
23123
  const configPresetSchema = schemas_enum([
23116
23124
  "balanced",
23117
23125
  "open-source-maintainer",
@@ -23149,6 +23157,38 @@ const DEFAULT_SENSITIVE_PATHS = [
23149
23157
  "go.sum"
23150
23158
  ];
23151
23159
  const DEFAULT_TEST_PATHS = ["src/**", "packages/**/src/**", "app/**", "lib/**"];
23160
+ const WORKFLOW_EVIDENCE_CONTRACTS = [
23161
+ {
23162
+ id: "workflow-permission-rationale",
23163
+ title: "Workflow changes need a permission rationale",
23164
+ paths: [".github/workflows/**", ".github/actions/**"],
23165
+ requires: ["verification", "permission-rationale"],
23166
+ severity: "high",
23167
+ recommendation: "Explain why the workflow needs this trigger or permission, and include verification that untrusted PR code cannot reach privileged tokens."
23168
+ }
23169
+ ];
23170
+ const DEPENDENCY_EVIDENCE_CONTRACTS = [
23171
+ {
23172
+ id: "dependency-upgrade-evidence",
23173
+ title: "Dependency changes need upgrade evidence",
23174
+ paths: [
23175
+ "package.json",
23176
+ "**/package.json",
23177
+ "pnpm-lock.yaml",
23178
+ "package-lock.json",
23179
+ "yarn.lock",
23180
+ "requirements.txt",
23181
+ "**/requirements.txt",
23182
+ "pyproject.toml",
23183
+ "**/pyproject.toml",
23184
+ "go.mod",
23185
+ "**/go.mod"
23186
+ ],
23187
+ requires: ["verification", "changelog"],
23188
+ severity: "medium",
23189
+ recommendation: "Link changelog or migration notes and include the test command or CI evidence used to validate the dependency change."
23190
+ }
23191
+ ];
23152
23192
  const PRESET_DEFAULTS = {
23153
23193
  balanced: {},
23154
23194
  "open-source-maintainer": {
@@ -23179,6 +23219,9 @@ const PRESET_DEFAULTS = {
23179
23219
  requireTests: {
23180
23220
  enabled: true,
23181
23221
  paths: ["src/**", "packages/**/src/**", "app/**", "lib/**", "server/**", "api/**"]
23222
+ },
23223
+ evidence: {
23224
+ contracts: WORKFLOW_EVIDENCE_CONTRACTS
23182
23225
  }
23183
23226
  },
23184
23227
  "ai-generated-pr": {
@@ -23217,9 +23260,20 @@ const PRESET_DEFAULTS = {
23217
23260
  requireTests: {
23218
23261
  enabled: true,
23219
23262
  paths: DEFAULT_TEST_PATHS
23263
+ },
23264
+ evidence: {
23265
+ contracts: DEPENDENCY_EVIDENCE_CONTRACTS
23220
23266
  }
23221
23267
  }
23222
23268
  };
23269
+ const evidenceContractSchema = object({
23270
+ id: schemas_string().min(1),
23271
+ title: schemas_string().min(1).optional(),
23272
+ paths: array(schemas_string().min(1)).min(1),
23273
+ requires: array(evidenceRequirementSchema).min(1),
23274
+ severity: findingSeveritySchema.default("medium"),
23275
+ recommendation: schemas_string().min(1).optional()
23276
+ });
23223
23277
  const configSchema = object({
23224
23278
  preset: configPresetSchema.default("balanced"),
23225
23279
  locale: localeSchema.default("en"),
@@ -23238,6 +23292,10 @@ const configSchema = object({
23238
23292
  flagLifecycleScripts: schemas_boolean().default(true)
23239
23293
  })
23240
23294
  .default({ flagNewPackages: true, flagMajorUpgrades: true, flagLifecycleScripts: true }),
23295
+ evidence: object({
23296
+ contracts: array(evidenceContractSchema).default([])
23297
+ })
23298
+ .default({ contracts: [] }),
23241
23299
  comment: object({ enabled: schemas_boolean().default(true) }).default({ enabled: true })
23242
23300
  });
23243
23301
  function parseConfig(input) {
@@ -23313,6 +23371,434 @@ function renderMarkdownReport(result, locale = "en") {
23313
23371
  }
23314
23372
  return renderEnglishMarkdownReport(result);
23315
23373
  }
23374
+ function renderHtmlReport(result, locale = "en") {
23375
+ const labels = htmlLabels(locale);
23376
+ const risk = locale === "zh-CN" ? translateRisk(result.risk) : result.risk;
23377
+ const decision = formatReviewDecision(result.reviewDecision, locale);
23378
+ const scoreGrade = formatEvidenceGrade(result.evidenceScore.grade, locale);
23379
+ const findingsBySeverity = countFindingsBySeverity(result.findings);
23380
+ const ruleCounts = countFindingsByRule(result.findings);
23381
+ const evidenceSignals = [
23382
+ [labels.prDescription, locale === "zh-CN" ? translateDescriptionState(result.summary.pullRequestDescription) : result.summary.pullRequestDescription, result.summary.pullRequestDescription === "present"],
23383
+ [labels.verification, yesNo(result.summary.verificationEvidence, locale), result.summary.verificationEvidence],
23384
+ [labels.reproduction, yesNo(result.summary.reproductionEvidence, locale), result.summary.reproductionEvidence],
23385
+ [labels.screenshot, yesNo(result.summary.screenshotEvidence, locale), result.summary.screenshotEvidence],
23386
+ [labels.changelog, yesNo(result.summary.changelogEvidence, locale), result.summary.changelogEvidence],
23387
+ [labels.permissionRationale, yesNo(result.summary.permissionRationaleEvidence, locale), result.summary.permissionRationaleEvidence]
23388
+ ];
23389
+ return `<!doctype html>
23390
+ <html lang="${locale === "zh-CN" ? "zh-CN" : "en"}">
23391
+ <head>
23392
+ <meta charset="utf-8">
23393
+ <meta name="viewport" content="width=device-width, initial-scale=1">
23394
+ <title>ProofPR ${labels.report}</title>
23395
+ <style>
23396
+ :root {
23397
+ color-scheme: light;
23398
+ --bg: #f6f7f9;
23399
+ --panel: #ffffff;
23400
+ --ink: #17202a;
23401
+ --muted: #667085;
23402
+ --line: #d9dee7;
23403
+ --green: #138a5e;
23404
+ --amber: #b7791f;
23405
+ --red: #c24135;
23406
+ --blue: #2563a9;
23407
+ --soft-green: #e8f6ef;
23408
+ --soft-amber: #fff3d6;
23409
+ --soft-red: #fdebea;
23410
+ --soft-blue: #eaf2fb;
23411
+ }
23412
+
23413
+ * { box-sizing: border-box; }
23414
+
23415
+ body {
23416
+ margin: 0;
23417
+ background: var(--bg);
23418
+ color: var(--ink);
23419
+ font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Microsoft YaHei", sans-serif;
23420
+ line-height: 1.5;
23421
+ }
23422
+
23423
+ main {
23424
+ width: min(1180px, calc(100vw - 32px));
23425
+ margin: 0 auto;
23426
+ padding: 32px 0 48px;
23427
+ }
23428
+
23429
+ .topbar {
23430
+ display: flex;
23431
+ justify-content: space-between;
23432
+ gap: 18px;
23433
+ align-items: flex-start;
23434
+ margin-bottom: 20px;
23435
+ }
23436
+
23437
+ h1, h2, h3, p { margin: 0; }
23438
+
23439
+ h1 {
23440
+ font-size: 28px;
23441
+ line-height: 1.2;
23442
+ }
23443
+
23444
+ h2 {
23445
+ font-size: 17px;
23446
+ margin-bottom: 14px;
23447
+ }
23448
+
23449
+ h3 {
23450
+ font-size: 15px;
23451
+ margin-bottom: 8px;
23452
+ }
23453
+
23454
+ .subtitle {
23455
+ color: var(--muted);
23456
+ margin-top: 8px;
23457
+ max-width: 760px;
23458
+ }
23459
+
23460
+ .pill {
23461
+ display: inline-flex;
23462
+ align-items: center;
23463
+ border: 1px solid var(--line);
23464
+ border-radius: 999px;
23465
+ padding: 5px 10px;
23466
+ background: var(--panel);
23467
+ color: var(--muted);
23468
+ font-size: 13px;
23469
+ white-space: nowrap;
23470
+ }
23471
+
23472
+ .grid {
23473
+ display: grid;
23474
+ grid-template-columns: repeat(12, 1fr);
23475
+ gap: 14px;
23476
+ }
23477
+
23478
+ .card {
23479
+ background: var(--panel);
23480
+ border: 1px solid var(--line);
23481
+ border-radius: 8px;
23482
+ padding: 18px;
23483
+ box-shadow: 0 1px 2px rgba(16, 24, 40, 0.04);
23484
+ }
23485
+
23486
+ .metric { grid-column: span 3; }
23487
+ .wide { grid-column: span 8; }
23488
+ .side { grid-column: span 4; }
23489
+ .full { grid-column: 1 / -1; }
23490
+
23491
+ .metric-label {
23492
+ color: var(--muted);
23493
+ font-size: 13px;
23494
+ margin-bottom: 8px;
23495
+ }
23496
+
23497
+ .metric-value {
23498
+ font-size: 27px;
23499
+ font-weight: 720;
23500
+ line-height: 1.1;
23501
+ }
23502
+
23503
+ .tone-low { color: var(--green); background: var(--soft-green); border-color: #b8e5cf; }
23504
+ .tone-medium { color: var(--amber); background: var(--soft-amber); border-color: #f1d28a; }
23505
+ .tone-high { color: var(--red); background: var(--soft-red); border-color: #f3b6b1; }
23506
+
23507
+ .scorebar {
23508
+ width: 100%;
23509
+ height: 16px;
23510
+ border: 1px solid var(--line);
23511
+ border-radius: 999px;
23512
+ overflow: hidden;
23513
+ margin: 14px 0 10px;
23514
+ background: #eef1f5;
23515
+ }
23516
+
23517
+ .scorefill {
23518
+ height: 100%;
23519
+ width: ${result.evidenceScore.value}%;
23520
+ background: ${scoreColor(result.evidenceScore.value)};
23521
+ }
23522
+
23523
+ .summary-grid {
23524
+ display: grid;
23525
+ grid-template-columns: repeat(4, minmax(0, 1fr));
23526
+ gap: 10px;
23527
+ }
23528
+
23529
+ .summary-item {
23530
+ border: 1px solid var(--line);
23531
+ border-radius: 8px;
23532
+ padding: 10px 12px;
23533
+ background: #fbfcfd;
23534
+ }
23535
+
23536
+ .summary-item strong {
23537
+ display: block;
23538
+ font-size: 20px;
23539
+ margin-bottom: 2px;
23540
+ }
23541
+
23542
+ .summary-item span {
23543
+ color: var(--muted);
23544
+ font-size: 12px;
23545
+ }
23546
+
23547
+ .signal-list, .action-list, .finding-list, .focus-list, .deduction-list, .rule-list {
23548
+ display: grid;
23549
+ gap: 10px;
23550
+ }
23551
+
23552
+ .signal, .action, .focus, .deduction, .rule-row {
23553
+ border: 1px solid var(--line);
23554
+ border-radius: 8px;
23555
+ padding: 10px 12px;
23556
+ background: #fbfcfd;
23557
+ }
23558
+
23559
+ .signal {
23560
+ display: flex;
23561
+ justify-content: space-between;
23562
+ gap: 12px;
23563
+ align-items: center;
23564
+ }
23565
+
23566
+ .signal-name, .action-title, .finding-title {
23567
+ font-weight: 680;
23568
+ }
23569
+
23570
+ .signal-state {
23571
+ font-size: 12px;
23572
+ border-radius: 999px;
23573
+ padding: 3px 8px;
23574
+ border: 1px solid var(--line);
23575
+ white-space: nowrap;
23576
+ }
23577
+
23578
+ .severity-grid {
23579
+ display: grid;
23580
+ grid-template-columns: repeat(4, minmax(0, 1fr));
23581
+ gap: 8px;
23582
+ }
23583
+
23584
+ .severity {
23585
+ border: 1px solid var(--line);
23586
+ border-radius: 8px;
23587
+ padding: 10px;
23588
+ background: #fbfcfd;
23589
+ }
23590
+
23591
+ .severity strong {
23592
+ display: block;
23593
+ font-size: 22px;
23594
+ }
23595
+
23596
+ .muted {
23597
+ color: var(--muted);
23598
+ font-size: 13px;
23599
+ }
23600
+
23601
+ .action {
23602
+ display: grid;
23603
+ grid-template-columns: auto 1fr;
23604
+ gap: 10px;
23605
+ }
23606
+
23607
+ .box {
23608
+ width: 18px;
23609
+ height: 18px;
23610
+ border: 2px solid var(--blue);
23611
+ border-radius: 4px;
23612
+ margin-top: 2px;
23613
+ }
23614
+
23615
+ .priority {
23616
+ display: inline-flex;
23617
+ margin-left: 6px;
23618
+ color: var(--muted);
23619
+ font-size: 12px;
23620
+ font-weight: 560;
23621
+ }
23622
+
23623
+ .finding {
23624
+ border: 1px solid var(--line);
23625
+ border-radius: 8px;
23626
+ padding: 14px;
23627
+ background: #fff;
23628
+ }
23629
+
23630
+ .finding-head {
23631
+ display: flex;
23632
+ justify-content: space-between;
23633
+ gap: 12px;
23634
+ align-items: flex-start;
23635
+ margin-bottom: 8px;
23636
+ }
23637
+
23638
+ code {
23639
+ font-family: "SFMono-Regular", Consolas, "Liberation Mono", monospace;
23640
+ font-size: 12px;
23641
+ background: #f0f3f7;
23642
+ border: 1px solid var(--line);
23643
+ border-radius: 6px;
23644
+ padding: 2px 5px;
23645
+ word-break: break-word;
23646
+ }
23647
+
23648
+ .evidence-list {
23649
+ margin: 10px 0 0;
23650
+ padding-left: 18px;
23651
+ color: var(--muted);
23652
+ }
23653
+
23654
+ .footer {
23655
+ color: var(--muted);
23656
+ font-size: 12px;
23657
+ margin-top: 18px;
23658
+ text-align: center;
23659
+ }
23660
+
23661
+ @media (max-width: 860px) {
23662
+ main { width: min(100vw - 20px, 1180px); padding-top: 20px; }
23663
+ .topbar { display: block; }
23664
+ .pill { margin-top: 12px; }
23665
+ .metric, .wide, .side { grid-column: 1 / -1; }
23666
+ .summary-grid, .severity-grid { grid-template-columns: repeat(2, minmax(0, 1fr)); }
23667
+ }
23668
+ </style>
23669
+ </head>
23670
+ <body>
23671
+ <main>
23672
+ <section class="topbar">
23673
+ <div>
23674
+ <h1>ProofPR ${labels.report}</h1>
23675
+ <p class="subtitle">${labels.subtitle}</p>
23676
+ </div>
23677
+ <span class="pill">${labels.generated}</span>
23678
+ </section>
23679
+
23680
+ <section class="grid">
23681
+ <article class="card metric">
23682
+ <div class="metric-label">${labels.risk}</div>
23683
+ <div class="metric-value">${escapeHtml(risk)}</div>
23684
+ <span class="pill tone-${result.risk}">${escapeHtml(result.risk)}</span>
23685
+ </article>
23686
+ <article class="card metric">
23687
+ <div class="metric-label">${labels.evidenceScore}</div>
23688
+ <div class="metric-value">${result.evidenceScore.value}/100</div>
23689
+ <div class="scorebar" aria-label="${labels.evidenceScore}">
23690
+ <div class="scorefill"></div>
23691
+ </div>
23692
+ <div class="muted">${escapeHtml(scoreGrade)}</div>
23693
+ </article>
23694
+ <article class="card metric">
23695
+ <div class="metric-label">${labels.reviewGate}</div>
23696
+ <div class="metric-value" style="font-size: 20px;">${escapeHtml(decision)}</div>
23697
+ </article>
23698
+ <article class="card metric">
23699
+ <div class="metric-label">${labels.findings}</div>
23700
+ <div class="metric-value">${result.findings.length}</div>
23701
+ <div class="muted">${labels.findingsHint}</div>
23702
+ </article>
23703
+
23704
+ <article class="card wide">
23705
+ <h2>${labels.changeSummary}</h2>
23706
+ <div class="summary-grid">
23707
+ ${summaryItem(labels.filesChanged, result.summary.filesChanged)}
23708
+ ${summaryItem(labels.additions, result.summary.additions)}
23709
+ ${summaryItem(labels.deletions, result.summary.deletions)}
23710
+ ${summaryItem(labels.sensitiveFiles, result.summary.sensitiveFilesChanged)}
23711
+ ${summaryItem(labels.testFiles, result.summary.testFilesChanged)}
23712
+ ${summaryItem(labels.highFindings, findingsBySeverity.high)}
23713
+ ${summaryItem(labels.mediumFindings, findingsBySeverity.medium)}
23714
+ ${summaryItem(labels.lowFindings, findingsBySeverity.low)}
23715
+ </div>
23716
+ </article>
23717
+
23718
+ <article class="card side">
23719
+ <h2>${labels.evidenceSignals}</h2>
23720
+ <div class="signal-list">
23721
+ ${evidenceSignals.map(([name, state, ok]) => signalItem(name, state, ok)).join("\n")}
23722
+ </div>
23723
+ </article>
23724
+
23725
+ <article class="card wide">
23726
+ <h2>${labels.reviewPlan}</h2>
23727
+ <div class="action-list">
23728
+ ${result.reviewPlan.actionItems.length > 0
23729
+ ? result.reviewPlan.actionItems.map((action) => `
23730
+ <div class="action">
23731
+ <span class="box"></span>
23732
+ <div>
23733
+ <div class="action-title">${escapeHtml(localizeActionTitle(action.actionId, action.title, locale))}<span class="priority">${escapeHtml(formatPriority(action.priority, locale))}</span></div>
23734
+ <div class="muted">${escapeHtml(localizeActionDetail(action.actionId, action.detail, locale))}</div>
23735
+ </div>
23736
+ </div>`).join("\n")
23737
+ : `<div class="muted">${labels.noActions}</div>`}
23738
+ </div>
23739
+ </article>
23740
+
23741
+ <article class="card side">
23742
+ <h2>${labels.findingDistribution}</h2>
23743
+ <div class="severity-grid">
23744
+ ${severityItem("high", findingsBySeverity.high, labels.high)}
23745
+ ${severityItem("medium", findingsBySeverity.medium, labels.medium)}
23746
+ ${severityItem("low", findingsBySeverity.low, labels.low)}
23747
+ ${severityItem("info", findingsBySeverity.info, labels.info)}
23748
+ </div>
23749
+ </article>
23750
+
23751
+ <article class="card side">
23752
+ <h2>${labels.focusFiles}</h2>
23753
+ <div class="focus-list">
23754
+ ${result.reviewPlan.focusFiles.length > 0
23755
+ ? result.reviewPlan.focusFiles.map((file) => `
23756
+ <div class="focus">
23757
+ <div><code>${escapeHtml(file.path)}</code></div>
23758
+ <div class="muted">${escapeHtml(localizeFocusReason(file.reasonId, file.reason, locale))}</div>
23759
+ </div>`).join("\n")
23760
+ : `<div class="muted">${labels.noFocusFiles}</div>`}
23761
+ </div>
23762
+ </article>
23763
+
23764
+ <article class="card side">
23765
+ <h2>${labels.scoreDetails}</h2>
23766
+ <div class="deduction-list">
23767
+ ${result.evidenceScore.deductions.length > 0
23768
+ ? result.evidenceScore.deductions.map((deduction) => `
23769
+ <div class="deduction">
23770
+ <strong>-${deduction.points}</strong>
23771
+ <div class="muted">${escapeHtml(localizeDeduction(deduction.reasonId, deduction.message, locale))}</div>
23772
+ </div>`).join("\n")
23773
+ : `<div class="muted">${labels.noDeductions}</div>`}
23774
+ </div>
23775
+ </article>
23776
+
23777
+ <article class="card full">
23778
+ <h2>${labels.rulesCovered}</h2>
23779
+ <div class="rule-list">
23780
+ ${ruleCounts.length > 0
23781
+ ? ruleCounts.map((item) => `<div class="rule-row"><code>${escapeHtml(item.ruleId)}</code> <span class="muted">${item.count}</span></div>`).join("\n")
23782
+ : `<div class="muted">${labels.noRules}</div>`}
23783
+ </div>
23784
+ </article>
23785
+
23786
+ <article class="card full">
23787
+ <h2>${labels.findings}</h2>
23788
+ <div class="finding-list">
23789
+ ${result.findings.length > 0
23790
+ ? result.findings.map((finding) => htmlFinding(finding, locale)).join("\n")
23791
+ : `<div class="muted">${labels.noFindings}</div>`}
23792
+ </div>
23793
+ </article>
23794
+ </section>
23795
+
23796
+ <p class="footer">${labels.footer}</p>
23797
+ </main>
23798
+ </body>
23799
+ </html>
23800
+ `;
23801
+ }
23316
23802
  function getReportMarker() {
23317
23803
  return REPORT_MARKER;
23318
23804
  }
@@ -23375,6 +23861,9 @@ function renderEnglishMarkdownReport(result) {
23375
23861
  `- PR description: ${result.summary.pullRequestDescription}`,
23376
23862
  `- Verification evidence: ${formatBoolean(result.summary.verificationEvidence)}`,
23377
23863
  `- Reproduction context: ${formatBoolean(result.summary.reproductionEvidence)}`,
23864
+ `- Screenshot evidence: ${formatBoolean(result.summary.screenshotEvidence)}`,
23865
+ `- Changelog evidence: ${formatBoolean(result.summary.changelogEvidence)}`,
23866
+ `- Permission rationale: ${formatBoolean(result.summary.permissionRationaleEvidence)}`,
23378
23867
  ""
23379
23868
  ];
23380
23869
  appendEvidenceScoreSection(lines, result, "en");
@@ -23409,6 +23898,9 @@ function renderChineseMarkdownReport(result) {
23409
23898
  `- PR 描述质量:${translateDescriptionState(result.summary.pullRequestDescription)}`,
23410
23899
  `- 验证证据:${formatChineseBoolean(result.summary.verificationEvidence)}`,
23411
23900
  `- 复现上下文:${formatChineseBoolean(result.summary.reproductionEvidence)}`,
23901
+ `- 截图或视觉证据:${formatChineseBoolean(result.summary.screenshotEvidence)}`,
23902
+ `- Changelog 或迁移证据:${formatChineseBoolean(result.summary.changelogEvidence)}`,
23903
+ `- 权限理由证据:${formatChineseBoolean(result.summary.permissionRationaleEvidence)}`,
23412
23904
  ""
23413
23905
  ];
23414
23906
  appendEvidenceScoreSection(lines, result, "zh-CN");
@@ -23519,6 +24011,11 @@ function maintainerFocus(findings, locale) {
23519
24011
  ? "轮换任何可能暴露的凭证,并在移除 secret 前阻止合并。"
23520
24012
  : "Rotate any exposed credential and block the PR until secrets are removed.");
23521
24013
  }
24014
+ else if (finding.ruleId.startsWith("evidence-contract:")) {
24015
+ focus.add(locale === "zh-CN"
24016
+ ? "先要求贡献者补齐仓库定义的证据契约,再投入深度 review。"
24017
+ : "Ask the contributor to satisfy the repository-defined evidence contract before deep review.");
24018
+ }
23522
24019
  else if (finding.ruleId === "workflow-permission-change") {
23523
24020
  focus.add(locale === "zh-CN"
23524
24021
  ? "合并前重点审查 GitHub Actions 权限。"
@@ -23559,6 +24056,11 @@ function maintainerFocus(findings, locale) {
23559
24056
  ? "重点审查 pull_request_target 是否会用高权限 token 执行不可信 PR 代码。"
23560
24057
  : "Review whether pull_request_target can execute untrusted PR code with privileged tokens.");
23561
24058
  }
24059
+ else if (finding.ruleId === "workflow-untrusted-checkout") {
24060
+ focus.add(locale === "zh-CN"
24061
+ ? "重点审查 workflow 是否 checkout 并执行了不可信 PR head 代码。"
24062
+ : "Review whether the workflow checks out and executes untrusted PR head code.");
24063
+ }
23562
24064
  else if (finding.ruleId === "mcp-credential-risk") {
23563
24065
  focus.add(locale === "zh-CN"
23564
24066
  ? "重点审查 MCP command、args 和凭证处理方式。"
@@ -23573,6 +24075,13 @@ function maintainerFocus(findings, locale) {
23573
24075
  return [...focus];
23574
24076
  }
23575
24077
  function translateFinding(finding) {
24078
+ if (finding.ruleId.startsWith("evidence-contract:")) {
24079
+ return {
24080
+ title: "证据契约未满足",
24081
+ message: "该 PR 命中了仓库自定义证据契约,但 PR 描述中缺少必需证据。",
24082
+ recommendation: "建议要求贡献者补齐缺失证据后再深入 review。"
24083
+ };
24084
+ }
23576
24085
  if (finding.ruleId === "change-size") {
23577
24086
  const files = finding.evidence?.find((item) => item.startsWith("files: "))?.replace("files: ", "");
23578
24087
  const lines = finding.evidence?.find((item) => item.startsWith("changed lines: "))?.replace("changed lines: ", "");
@@ -23648,6 +24157,15 @@ function translateFinding(finding) {
23648
24157
  recommendation: "请确认该 workflow 不会用高权限 token、secret 或写权限执行不可信 PR 代码。"
23649
24158
  };
23650
24159
  }
24160
+ if (finding.ruleId === "workflow-untrusted-checkout") {
24161
+ return {
24162
+ title: "Workflow checkout 了 PR head",
24163
+ message: finding.path
24164
+ ? `${finding.path} 引用了 PR head 代码来源,需要审查它是否会在高权限上下文中执行。`
24165
+ : finding.message,
24166
+ recommendation: "避免在 pull_request_target、写权限 token 或可读取 secret 的上下文中运行不可信 PR 代码。"
24167
+ };
24168
+ }
23651
24169
  if (finding.ruleId === "mcp-credential-risk") {
23652
24170
  return {
23653
24171
  title: "MCP 配置需要重点审查",
@@ -23666,8 +24184,15 @@ function translateFinding(finding) {
23666
24184
  }
23667
24185
  function translateEvidence(item) {
23668
24186
  return item
24187
+ .replace("matched files: ", "命中文件:")
24188
+ .replace("missing evidence: ", "缺失证据:")
23669
24189
  .replace("files: ", "文件数:")
23670
24190
  .replace("changed lines: ", "变更行数:")
24191
+ .replace(/\bverification\b/g, "验证")
24192
+ .replace(/\breproduction\b/g, "复现")
24193
+ .replace(/\bscreenshot\b/g, "截图")
24194
+ .replace(/\bchangelog\b/g, "变更日志")
24195
+ .replace(/\bpermission-rationale\b/g, "权限理由")
23671
24196
  .replace("line ", "第 ")
23672
24197
  .replace(": ", " 行:");
23673
24198
  }
@@ -23719,12 +24244,14 @@ function translateReviewActionTitle(actionId, fallback) {
23719
24244
  "ask-for-evidence-before-review": "深入 review 前先要求补充证据",
23720
24245
  "review-with-focus": "带着重点清单进行 review",
23721
24246
  "normal-review": "进入常规 review",
24247
+ "satisfy-evidence-contract": "要求补齐证据契约",
23722
24248
  "improve-pr-description": "要求补充更清楚的 PR 描述",
23723
24249
  "add-verification-evidence": "要求补充测试或手动验证证据",
23724
24250
  "add-reproduction-context": "要求补充复现或 before/after 上下文",
23725
24251
  "rotate-secret": "轮换并移除暴露的凭证",
23726
24252
  "justify-workflow-permissions": "要求说明 workflow 权限最小化理由",
23727
24253
  "review-privileged-pr-trigger": "审查 pull_request_target 高权限触发器",
24254
+ "review-untrusted-checkout": "审查 PR head checkout 的权限边界",
23728
24255
  "review-package-lifecycle-script": "审查包生命周期脚本",
23729
24256
  "review-mcp-execution-surface": "审查 MCP 命令、参数和凭证处理",
23730
24257
  "request-review-map-or-split": "要求拆分 PR 或提供逐文件 review map",
@@ -23739,12 +24266,14 @@ function translateReviewActionDetail(actionId, fallback) {
23739
24266
  "ask-for-evidence-before-review": "要求测试、截图、复现步骤或更清楚的 PR 描述,再投入详细 review。",
23740
24267
  "review-with-focus": "优先使用下面的风险发现和重点文件作为第一轮 review map。",
23741
24268
  "normal-review": "当前证据足够支撑维护者进行常规 review。",
24269
+ "satisfy-evidence-contract": "该 PR 命中了仓库自定义证据契约,但 PR 描述里缺少必需证据。",
23742
24270
  "improve-pr-description": "贡献者应说明为什么改、改了什么、如何验证,以及是否有发布或兼容性风险。",
23743
24271
  "add-verification-evidence": "要求测试输出、CI 链接、截图,或简短的手动验证说明。",
23744
24272
  "add-reproduction-context": "PR 应包含复现步骤、预期/实际行为,或相关 before/after 截图。",
23745
24273
  "rotate-secret": "在 secret 从 PR 中移除并完成轮换前,不要合并。",
23746
24274
  "justify-workflow-permissions": "确认写权限或 OIDC 是否必要,并检查不可信 PR 是否能触发该 workflow。",
23747
24275
  "review-privileged-pr-trigger": "确认 workflow 不会用写权限 token、secret 或仓库权限执行不可信 PR 代码。",
24276
+ "review-untrusted-checkout": "确认 job 不会在写权限 token、仓库 secret 或 pull_request_target 高权限上下文中运行不可信 PR 代码。",
23748
24277
  "review-package-lifecycle-script": "检查 install、postinstall、prepare 或 publish 脚本是否会执行非预期代码。",
23749
24278
  "review-mcp-execution-surface": "检查 MCP 配置是否提交凭证,或意外扩大本地执行面。",
23750
24279
  "request-review-map-or-split": "要求贡献者拆分无关改动,或标出最需要重点 review 的文件。",
@@ -23754,6 +24283,9 @@ function translateReviewActionDetail(actionId, fallback) {
23754
24283
  }[actionId] ?? fallback;
23755
24284
  }
23756
24285
  function translateFocusReason(reasonId, fallback) {
24286
+ if (reasonId.startsWith("evidence-contract:")) {
24287
+ return "仓库自定义证据契约未满足";
24288
+ }
23757
24289
  return {
23758
24290
  "change-size": "review 面积相关 finding",
23759
24291
  "sensitive-path": "敏感路径发生变更",
@@ -23762,6 +24294,7 @@ function translateFocusReason(reasonId, fallback) {
23762
24294
  "dependency-lifecycle-script": "包生命周期脚本发生变更",
23763
24295
  "workflow-permission-change": "workflow 权限发生变更",
23764
24296
  "workflow-dangerous-trigger": "workflow 使用了高风险触发器",
24297
+ "workflow-untrusted-checkout": "workflow checkout 了不可信 PR head",
23765
24298
  "mcp-credential-risk": "MCP 配置存在执行面或凭证风险",
23766
24299
  "missing-tests": "代码改动缺少测试或验证证据"
23767
24300
  }[reasonId] ?? fallback;
@@ -23771,6 +24304,9 @@ function translateScoreMessage(message) {
23771
24304
  "PR description provides review context.": "PR 描述提供了 review 上下文。",
23772
24305
  "Verification evidence was found.": "检测到测试或手动验证证据。",
23773
24306
  "Reproduction or before/after context was found.": "检测到复现步骤或 before/after 上下文。",
24307
+ "Screenshot or visual evidence was found.": "检测到截图或视觉证据。",
24308
+ "Changelog or migration evidence was found.": "检测到 changelog 或迁移证据。",
24309
+ "Permission rationale evidence was found.": "检测到权限理由证据。",
23774
24310
  "Test files changed with the PR.": "PR 同时修改了测试文件。",
23775
24311
  "No configured sensitive files changed.": "没有改动已配置的敏感文件。"
23776
24312
  }[message] ?? message;
@@ -23793,9 +24329,196 @@ function translateDeduction(reasonId, fallback) {
23793
24329
  "dependency-major-upgrade": "依赖发生大版本升级。",
23794
24330
  "dependency-lifecycle-script": "包生命周期脚本可能在安装或发布阶段执行代码。",
23795
24331
  "workflow-dangerous-trigger": "pull_request_target workflow 需要重点审查高权限触发路径。",
24332
+ "workflow-untrusted-checkout": "Workflow checkout PR head 代码,需要审查权限边界。",
24333
+ "evidence-contract-missing": "仓库自定义证据契约未满足。",
23796
24334
  "missing-tests": "代码发生变更,但缺少测试变更或验证说明。"
23797
24335
  }[reasonId] ?? fallback;
23798
24336
  }
24337
+ function htmlLabels(locale) {
24338
+ if (locale === "zh-CN") {
24339
+ return {
24340
+ report: "可视化报告",
24341
+ subtitle: "把 PR 风险、证据质量、Review 门禁和维护者行动清单整理成一个可分享的静态页面。",
24342
+ generated: "Generated by ProofPR",
24343
+ risk: "风险等级",
24344
+ evidenceScore: "证据评分",
24345
+ reviewGate: "Review 门禁",
24346
+ findings: "风险发现",
24347
+ findingsHint: "需要维护者优先关注的信号",
24348
+ changeSummary: "改动概览",
24349
+ filesChanged: "改动文件",
24350
+ additions: "新增行",
24351
+ deletions: "删除行",
24352
+ sensitiveFiles: "敏感文件",
24353
+ testFiles: "测试文件",
24354
+ highFindings: "高风险",
24355
+ mediumFindings: "中风险",
24356
+ lowFindings: "低风险",
24357
+ evidenceSignals: "证据信号",
24358
+ prDescription: "PR 描述",
24359
+ verification: "验证证据",
24360
+ reproduction: "复现上下文",
24361
+ screenshot: "截图证据",
24362
+ changelog: "Changelog",
24363
+ permissionRationale: "权限理由",
24364
+ reviewPlan: "Review 行动清单",
24365
+ noActions: "没有额外行动项。",
24366
+ findingDistribution: "Finding 分布",
24367
+ high: "高",
24368
+ medium: "中",
24369
+ low: "低",
24370
+ info: "信息",
24371
+ focusFiles: "重点文件",
24372
+ noFocusFiles: "没有重点文件。",
24373
+ scoreDetails: "证据扣分",
24374
+ noDeductions: "没有扣分项。",
24375
+ rulesCovered: "命中规则",
24376
+ noRules: "没有规则命中。",
24377
+ noFindings: "启用的规则没有发现需要优先关注的 review 风险。",
24378
+ rule: "规则",
24379
+ severity: "严重程度",
24380
+ path: "路径",
24381
+ detail: "详情",
24382
+ evidence: "证据",
24383
+ recommendation: "建议",
24384
+ footer: "ProofPR 不替代人工 review,它帮助维护者先判断证据是否足够、风险边界是否清楚。"
24385
+ };
24386
+ }
24387
+ return {
24388
+ report: "Visual Report",
24389
+ subtitle: "A shareable static view of PR risk, evidence quality, review gate, and maintainer actions.",
24390
+ generated: "Generated by ProofPR",
24391
+ risk: "Risk",
24392
+ evidenceScore: "Evidence score",
24393
+ reviewGate: "Review gate",
24394
+ findings: "Findings",
24395
+ findingsHint: "Signals that deserve maintainer attention",
24396
+ changeSummary: "Change summary",
24397
+ filesChanged: "Files changed",
24398
+ additions: "Additions",
24399
+ deletions: "Deletions",
24400
+ sensitiveFiles: "Sensitive files",
24401
+ testFiles: "Test files",
24402
+ highFindings: "High findings",
24403
+ mediumFindings: "Medium findings",
24404
+ lowFindings: "Low findings",
24405
+ evidenceSignals: "Evidence signals",
24406
+ prDescription: "PR description",
24407
+ verification: "Verification",
24408
+ reproduction: "Reproduction",
24409
+ screenshot: "Screenshot",
24410
+ changelog: "Changelog",
24411
+ permissionRationale: "Permission rationale",
24412
+ reviewPlan: "Review plan",
24413
+ noActions: "No additional action items.",
24414
+ findingDistribution: "Finding distribution",
24415
+ high: "High",
24416
+ medium: "Medium",
24417
+ low: "Low",
24418
+ info: "Info",
24419
+ focusFiles: "Focus files",
24420
+ noFocusFiles: "No focus files.",
24421
+ scoreDetails: "Evidence deductions",
24422
+ noDeductions: "No deductions.",
24423
+ rulesCovered: "Rules covered",
24424
+ noRules: "No rule hits.",
24425
+ noFindings: "No review-risk findings detected by the enabled rules.",
24426
+ rule: "Rule",
24427
+ severity: "Severity",
24428
+ path: "Path",
24429
+ detail: "Detail",
24430
+ evidence: "Evidence",
24431
+ recommendation: "Recommendation",
24432
+ footer: "ProofPR does not replace human review. It helps maintainers decide whether evidence is enough and risk boundaries are clear."
24433
+ };
24434
+ }
24435
+ function summaryItem(label, value) {
24436
+ return `<div class="summary-item"><strong>${value}</strong><span>${escapeHtml(label)}</span></div>`;
24437
+ }
24438
+ function signalItem(name, state, ok) {
24439
+ return `<div class="signal"><span class="signal-name">${escapeHtml(name)}</span><span class="signal-state ${ok ? "tone-low" : "tone-medium"}">${escapeHtml(state)}</span></div>`;
24440
+ }
24441
+ function severityItem(severity, value, label) {
24442
+ return `<div class="severity ${severity === "high" ? "tone-high" : severity === "medium" ? "tone-medium" : severity === "low" ? "tone-low" : ""}"><strong>${value}</strong><span>${escapeHtml(label)}</span></div>`;
24443
+ }
24444
+ function htmlFinding(finding, locale) {
24445
+ const labels = htmlLabels(locale);
24446
+ const translated = locale === "zh-CN" ? translateFinding(finding) : finding;
24447
+ const evidence = finding.evidence && finding.evidence.length > 0
24448
+ ? `<ul class="evidence-list">${finding.evidence
24449
+ .map((item) => `<li><code>${escapeHtml(locale === "zh-CN" ? translateEvidence(item) : item)}</code></li>`)
24450
+ .join("")}</ul>`
24451
+ : "";
24452
+ const path = finding.path
24453
+ ? `<div class="muted">${labels.path}: <code>${escapeHtml(finding.path)}</code></div>`
24454
+ : "";
24455
+ const recommendation = translated.recommendation
24456
+ ? `<div class="muted">${labels.recommendation}: ${escapeHtml(translated.recommendation)}</div>`
24457
+ : "";
24458
+ return `<div class="finding">
24459
+ <div class="finding-head">
24460
+ <div>
24461
+ <div class="finding-title">${escapeHtml(translated.title)}</div>
24462
+ <div class="muted">${labels.rule}: <code>${escapeHtml(finding.ruleId)}</code></div>
24463
+ </div>
24464
+ <span class="pill ${finding.severity === "high" ? "tone-high" : finding.severity === "medium" ? "tone-medium" : "tone-low"}">${escapeHtml(locale === "zh-CN" ? translateSeverity(finding.severity) : finding.severity)}</span>
24465
+ </div>
24466
+ ${path}
24467
+ <div class="muted">${labels.detail}: ${escapeHtml(translated.message)}</div>
24468
+ ${evidence}
24469
+ ${recommendation}
24470
+ </div>`;
24471
+ }
24472
+ function countFindingsBySeverity(findings) {
24473
+ return findings.reduce((counts, finding) => {
24474
+ counts[finding.severity] += 1;
24475
+ return counts;
24476
+ }, { info: 0, low: 0, medium: 0, high: 0 });
24477
+ }
24478
+ function countFindingsByRule(findings) {
24479
+ const counts = new Map();
24480
+ for (const finding of findings) {
24481
+ counts.set(finding.ruleId, (counts.get(finding.ruleId) ?? 0) + 1);
24482
+ }
24483
+ return [...counts.entries()]
24484
+ .sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]))
24485
+ .map(([ruleId, count]) => ({ ruleId, count }));
24486
+ }
24487
+ function scoreColor(value) {
24488
+ if (value >= 85) {
24489
+ return "var(--green)";
24490
+ }
24491
+ if (value >= 70) {
24492
+ return "var(--blue)";
24493
+ }
24494
+ if (value >= 50) {
24495
+ return "var(--amber)";
24496
+ }
24497
+ return "var(--red)";
24498
+ }
24499
+ function yesNo(value, locale) {
24500
+ return locale === "zh-CN" ? formatChineseBoolean(value) : formatBoolean(value);
24501
+ }
24502
+ function localizeActionTitle(actionId, fallback, locale) {
24503
+ return locale === "zh-CN" ? translateReviewActionTitle(actionId, fallback) : fallback;
24504
+ }
24505
+ function localizeActionDetail(actionId, fallback, locale) {
24506
+ return locale === "zh-CN" ? translateReviewActionDetail(actionId, fallback) : fallback;
24507
+ }
24508
+ function localizeFocusReason(reasonId, fallback, locale) {
24509
+ return locale === "zh-CN" ? translateFocusReason(reasonId, fallback) : fallback;
24510
+ }
24511
+ function localizeDeduction(reasonId, fallback, locale) {
24512
+ return locale === "zh-CN" ? translateDeduction(reasonId, fallback) : fallback;
24513
+ }
24514
+ function escapeHtml(value) {
24515
+ return value
24516
+ .replace(/&/g, "&amp;")
24517
+ .replace(/</g, "&lt;")
24518
+ .replace(/>/g, "&gt;")
24519
+ .replace(/"/g, "&quot;")
24520
+ .replace(/'/g, "&#39;");
24521
+ }
23799
24522
  function formatBoolean(value) {
23800
24523
  return value ? "yes" : "no";
23801
24524
  }
@@ -23908,12 +24631,27 @@ const REPRODUCTION_PATTERNS = [
23908
24631
  /\b(?:before|after|expected|actual)\b/i,
23909
24632
  /复现|重现|复现步骤|期望|实际/
23910
24633
  ];
24634
+ const SCREENSHOT_PATTERNS = [
24635
+ /\b(?:screenshot|screen shot|screen recording|recording|gif|image|before\/after)\b/i,
24636
+ /截图|录屏|效果图|前后对比|对比图/
24637
+ ];
24638
+ const CHANGELOG_PATTERNS = [
24639
+ /\b(?:changelog|release notes?|migration guide|breaking changes?|upgrade guide)\b/i,
24640
+ /变更日志|发布说明|迁移指南|升级说明|破坏性变更|兼容性/
24641
+ ];
24642
+ const PERMISSION_RATIONALE_PATTERNS = [
24643
+ /\b(?:least privilege|permission rationale|write permission|oidc|id-token|trusted workflow|untrusted pr|token scope)\b/i,
24644
+ /权限理由|最小权限|写权限|OIDC|id-token|不可信 PR|高权限|token 权限|凭证权限/
24645
+ ];
23911
24646
  function analyzeEvidence(context) {
23912
24647
  if (!context) {
23913
24648
  return {
23914
24649
  descriptionState: "unavailable",
23915
24650
  verificationEvidence: false,
23916
- reproductionEvidence: false
24651
+ reproductionEvidence: false,
24652
+ screenshotEvidence: false,
24653
+ changelogEvidence: false,
24654
+ permissionRationaleEvidence: false
23917
24655
  };
23918
24656
  }
23919
24657
  const text = [context.title ?? "", context.body ?? ""].join("\n").trim();
@@ -23921,7 +24659,10 @@ function analyzeEvidence(context) {
23921
24659
  return {
23922
24660
  descriptionState: descriptionState(body),
23923
24661
  verificationEvidence: matchesAnyPattern(text, VERIFICATION_PATTERNS),
23924
- reproductionEvidence: matchesAnyPattern(text, REPRODUCTION_PATTERNS)
24662
+ reproductionEvidence: matchesAnyPattern(text, REPRODUCTION_PATTERNS),
24663
+ screenshotEvidence: matchesAnyPattern(text, SCREENSHOT_PATTERNS),
24664
+ changelogEvidence: matchesAnyPattern(text, CHANGELOG_PATTERNS),
24665
+ permissionRationaleEvidence: matchesAnyPattern(text, PERMISSION_RATIONALE_PATTERNS)
23925
24666
  };
23926
24667
  }
23927
24668
  function descriptionState(body) {
@@ -24102,9 +24843,11 @@ function analyzeDiffFiles(files, config, pullRequest) {
24102
24843
  findings.push(...analyzeSensitivePaths(activeFiles, config));
24103
24844
  findings.push(...analyzeMissingTests(activeFiles, config, pullRequest));
24104
24845
  findings.push(...analyzePullRequestEvidence(activeFiles, pullRequest));
24846
+ findings.push(...analyzeEvidenceContracts(activeFiles, config, pullRequest));
24105
24847
  findings.push(...analyzeDependencyChanges(activeFiles, config));
24106
24848
  findings.push(...analyzeWorkflowPermissions(activeFiles));
24107
24849
  findings.push(...analyzeWorkflowDangerousTriggers(activeFiles));
24850
+ findings.push(...analyzeWorkflowUntrustedCheckout(activeFiles));
24108
24851
  findings.push(...analyzeMcpConfigs(activeFiles));
24109
24852
  if (config.secrets.enabled) {
24110
24853
  for (const file of activeFiles) {
@@ -24124,7 +24867,10 @@ function summarizeDiffFiles(files, config, pullRequest) {
24124
24867
  sensitiveFilesChanged: activeFiles.filter((file) => matchesAny(file.path, config.sensitivePaths)).length,
24125
24868
  pullRequestDescription: evidence.descriptionState,
24126
24869
  verificationEvidence: evidence.verificationEvidence,
24127
- reproductionEvidence: evidence.reproductionEvidence
24870
+ reproductionEvidence: evidence.reproductionEvidence,
24871
+ screenshotEvidence: evidence.screenshotEvidence,
24872
+ changelogEvidence: evidence.changelogEvidence,
24873
+ permissionRationaleEvidence: evidence.permissionRationaleEvidence
24128
24874
  };
24129
24875
  }
24130
24876
  function analyzeChangeSize(files) {
@@ -24229,6 +24975,55 @@ function analyzePullRequestEvidence(files, pullRequest) {
24229
24975
  }
24230
24976
  return findings;
24231
24977
  }
24978
+ function analyzeEvidenceContracts(files, config, pullRequest) {
24979
+ if (config.evidence.contracts.length === 0) {
24980
+ return [];
24981
+ }
24982
+ const evidence = analyzeEvidence(pullRequest);
24983
+ const hasTestChanges = files.some((file) => isTestPath(file.path));
24984
+ const findings = [];
24985
+ for (const contract of config.evidence.contracts) {
24986
+ const matchedFiles = files.filter((file) => matchesAny(file.path, contract.paths));
24987
+ if (matchedFiles.length === 0) {
24988
+ continue;
24989
+ }
24990
+ const missingRequirements = contract.requires.filter((requirement) => !hasEvidenceRequirement(requirement, evidence, hasTestChanges));
24991
+ if (missingRequirements.length === 0) {
24992
+ continue;
24993
+ }
24994
+ findings.push({
24995
+ ruleId: `evidence-contract:${contract.id}`,
24996
+ title: contract.title ?? "Evidence contract missing",
24997
+ message: `Changed files match evidence contract "${contract.id}", but missing required evidence: ${missingRequirements
24998
+ .map(formatEvidenceRequirement)
24999
+ .join(", ")}.`,
25000
+ severity: contract.severity,
25001
+ path: matchedFiles[0]?.path,
25002
+ evidence: [
25003
+ `matched files: ${matchedFiles.slice(0, 5).map((file) => file.path).join(", ")}`,
25004
+ `missing evidence: ${missingRequirements.map(formatEvidenceRequirement).join(", ")}`
25005
+ ],
25006
+ recommendation: contract.recommendation ??
25007
+ "Ask the contributor to add the missing evidence before spending deep review time."
25008
+ });
25009
+ }
25010
+ return findings;
25011
+ }
25012
+ function hasEvidenceRequirement(requirement, evidence, hasTestChanges) {
25013
+ if (requirement === "verification") {
25014
+ return evidence.verificationEvidence || hasTestChanges;
25015
+ }
25016
+ if (requirement === "reproduction") {
25017
+ return evidence.reproductionEvidence;
25018
+ }
25019
+ if (requirement === "screenshot") {
25020
+ return evidence.screenshotEvidence;
25021
+ }
25022
+ if (requirement === "changelog") {
25023
+ return evidence.changelogEvidence;
25024
+ }
25025
+ return evidence.permissionRationaleEvidence;
25026
+ }
24232
25027
  function analyzeDependencyChanges(files, config) {
24233
25028
  const findings = [];
24234
25029
  for (const file of files.filter((candidate) => isDependencyManifest(candidate.path))) {
@@ -24360,7 +25155,7 @@ function extractMajorVersion(version) {
24360
25155
  function analyzeWorkflowPermissions(files) {
24361
25156
  const findings = [];
24362
25157
  for (const file of files.filter((candidate) => isWorkflowPath(candidate.path))) {
24363
- const permissionLines = file.addedLines.filter((line) => /permissions:|contents:\s*write|packages:\s*write|id-token:\s*write|pull-requests:\s*write/.test(line.value.trim()));
25158
+ const permissionLines = file.addedLines.filter((line) => isRiskyWorkflowPermissionLine(line.value));
24364
25159
  if (permissionLines.length === 0) {
24365
25160
  continue;
24366
25161
  }
@@ -24376,6 +25171,13 @@ function analyzeWorkflowPermissions(files) {
24376
25171
  }
24377
25172
  return findings;
24378
25173
  }
25174
+ function isRiskyWorkflowPermissionLine(value) {
25175
+ const line = value.trim();
25176
+ if (/^permissions:\s*write-all\b/i.test(line)) {
25177
+ return true;
25178
+ }
25179
+ return /^(?:actions|attestations|checks|contents|deployments|discussions|id-token|issues|models|packages|pages|pull-requests|repository-projects|security-events|statuses):\s*write\b/i.test(line);
25180
+ }
24379
25181
  function analyzeWorkflowDangerousTriggers(files) {
24380
25182
  const findings = [];
24381
25183
  for (const file of files.filter((candidate) => isWorkflowPath(candidate.path))) {
@@ -24395,6 +25197,33 @@ function analyzeWorkflowDangerousTriggers(files) {
24395
25197
  }
24396
25198
  return findings;
24397
25199
  }
25200
+ function analyzeWorkflowUntrustedCheckout(files) {
25201
+ const findings = [];
25202
+ for (const file of files.filter((candidate) => isWorkflowPath(candidate.path))) {
25203
+ const headCheckoutLines = file.addedLines.filter((line) => isPullRequestHeadCheckoutLine(line.value));
25204
+ if (headCheckoutLines.length === 0) {
25205
+ continue;
25206
+ }
25207
+ const hasPullRequestTarget = file.addedLines.some((line) => /\bpull_request_target\b/.test(line.value.trim()));
25208
+ findings.push({
25209
+ ruleId: "workflow-untrusted-checkout",
25210
+ title: "Workflow checks out pull request head",
25211
+ message: hasPullRequestTarget
25212
+ ? `${file.path} combines pull_request_target with pull request head checkout references.`
25213
+ : `${file.path} checks out pull request head references; review the job privilege boundary before merging.`,
25214
+ severity: hasPullRequestTarget ? "high" : "medium",
25215
+ path: file.path,
25216
+ evidence: headCheckoutLines.slice(0, 5).map(formatEvidenceLine),
25217
+ recommendation: "Avoid running untrusted PR code with write tokens, repository secrets, or privileged pull_request_target context."
25218
+ });
25219
+ }
25220
+ return findings;
25221
+ }
25222
+ function isPullRequestHeadCheckoutLine(value) {
25223
+ const line = value.trim();
25224
+ return (/\bgithub\.head_ref\b/.test(line) ||
25225
+ /\bgithub\.event\.pull_request\.head(?:\.sha|\.ref|\.repo\.full_name)?\b/.test(line));
25226
+ }
24398
25227
  function analyzeMcpConfigs(files) {
24399
25228
  const findings = [];
24400
25229
  for (const file of files.filter((candidate) => isMcpConfigPath(candidate.path))) {
@@ -24418,6 +25247,9 @@ function formatEvidenceLine(line) {
24418
25247
  const value = line.value.trim();
24419
25248
  return line.lineNumber ? `line ${line.lineNumber}: ${value}` : value;
24420
25249
  }
25250
+ function formatEvidenceRequirement(requirement) {
25251
+ return requirement;
25252
+ }
24421
25253
  function sensitivePathSeverity(path) {
24422
25254
  if (matchesAny(path, [
24423
25255
  "**/.env*",
@@ -24492,8 +25324,9 @@ function calculateEvidenceScore(summary, findings) {
24492
25324
  "dependency-lifecycle-script",
24493
25325
  "workflow-permission-change",
24494
25326
  "workflow-dangerous-trigger",
25327
+ "workflow-untrusted-checkout",
24495
25328
  "mcp-credential-risk"
24496
- ].includes(finding.ruleId));
25329
+ ].includes(finding.ruleId) || finding.ruleId.startsWith("evidence-contract:"));
24497
25330
  if (needsVerificationEvidence && !summary.verificationEvidence) {
24498
25331
  addDeduction("missing-verification", 20, "No test or manual verification evidence was found.");
24499
25332
  }
@@ -24510,6 +25343,9 @@ function calculateEvidenceScore(summary, findings) {
24510
25343
  else if (finding.ruleId === "workflow-dangerous-trigger") {
24511
25344
  addDeduction("workflow-dangerous-trigger", 30, "pull_request_target workflows need privileged trigger review.");
24512
25345
  }
25346
+ else if (finding.ruleId === "workflow-untrusted-checkout") {
25347
+ addDeduction("workflow-untrusted-checkout", finding.severity === "high" ? 30 : 18, "Workflow checkout of pull request head needs privilege-boundary review.");
25348
+ }
24513
25349
  else if (finding.ruleId === "mcp-credential-risk") {
24514
25350
  addDeduction("mcp-credential-risk", 25, "MCP configuration expands local execution or credential risk.");
24515
25351
  }
@@ -24530,6 +25366,9 @@ function calculateEvidenceScore(summary, findings) {
24530
25366
  else if (finding.ruleId === "dependency-lifecycle-script") {
24531
25367
  addDeduction("dependency-lifecycle-script", 25, "Package lifecycle scripts can run during install or publish.");
24532
25368
  }
25369
+ else if (finding.ruleId.startsWith("evidence-contract:")) {
25370
+ addDeduction("evidence-contract-missing", finding.severity === "high" ? 25 : 15, "Configured evidence contract was not satisfied.");
25371
+ }
24533
25372
  else if (finding.ruleId === "missing-tests") {
24534
25373
  addDeduction("missing-tests", finding.severity === "medium" ? 20 : 12, "Code changed without test changes or verification notes.");
24535
25374
  }
@@ -24558,6 +25397,15 @@ function collectEvidenceStrengths(summary) {
24558
25397
  if (summary.reproductionEvidence) {
24559
25398
  strengths.push("Reproduction or before/after context was found.");
24560
25399
  }
25400
+ if (summary.screenshotEvidence) {
25401
+ strengths.push("Screenshot or visual evidence was found.");
25402
+ }
25403
+ if (summary.changelogEvidence) {
25404
+ strengths.push("Changelog or migration evidence was found.");
25405
+ }
25406
+ if (summary.permissionRationaleEvidence) {
25407
+ strengths.push("Permission rationale evidence was found.");
25408
+ }
24561
25409
  if (summary.testFilesChanged > 0) {
24562
25410
  strengths.push("Test files changed with the PR.");
24563
25411
  }
@@ -24582,6 +25430,7 @@ function calculateReviewDecision(risk, evidenceScore, findings) {
24582
25430
  const hasBlockingSecurityFinding = findings.some((finding) => finding.ruleId.startsWith("secret-detected") ||
24583
25431
  finding.ruleId === "workflow-permission-change" ||
24584
25432
  finding.ruleId === "workflow-dangerous-trigger" ||
25433
+ (finding.ruleId === "workflow-untrusted-checkout" && finding.severity === "high") ||
24585
25434
  finding.ruleId === "dependency-lifecycle-script" ||
24586
25435
  finding.ruleId === "mcp-credential-risk");
24587
25436
  if (hasBlockingSecurityFinding || evidenceScore.value < 50 || risk === "high") {
@@ -24714,6 +25563,17 @@ function reviewActionsForFinding(finding) {
24714
25563
  }
24715
25564
  ];
24716
25565
  }
25566
+ if (finding.ruleId.startsWith("evidence-contract:")) {
25567
+ return [
25568
+ {
25569
+ actionId: "satisfy-evidence-contract",
25570
+ title: "Ask for the configured evidence contract to be satisfied.",
25571
+ detail: "The PR matches a repository-defined evidence contract but is missing required proof in the PR description.",
25572
+ priority: finding.severity === "high" ? "high" : "medium",
25573
+ relatedRuleIds: [finding.ruleId]
25574
+ }
25575
+ ];
25576
+ }
24717
25577
  if (finding.ruleId === "workflow-permission-change") {
24718
25578
  return [
24719
25579
  {
@@ -24736,6 +25596,17 @@ function reviewActionsForFinding(finding) {
24736
25596
  }
24737
25597
  ];
24738
25598
  }
25599
+ if (finding.ruleId === "workflow-untrusted-checkout") {
25600
+ return [
25601
+ {
25602
+ actionId: "review-untrusted-checkout",
25603
+ title: "Review pull request head checkout privileges.",
25604
+ detail: "Confirm the job does not run untrusted PR code with write tokens, repository secrets, or pull_request_target privileges.",
25605
+ priority: finding.severity === "high" ? "high" : "medium",
25606
+ relatedRuleIds: [finding.ruleId]
25607
+ }
25608
+ ];
25609
+ }
24739
25610
  if (finding.ruleId === "dependency-lifecycle-script") {
24740
25611
  return [
24741
25612
  {
@@ -24864,7 +25735,7 @@ const build_program = new Command();
24864
25735
  build_program
24865
25736
  .name("proof-pr")
24866
25737
  .description("Review pull request evidence, scope, and safety before maintainers spend time on it.")
24867
- .version("0.1.6");
25738
+ .version("0.1.7");
24868
25739
  build_program
24869
25740
  .command("scan", { isDefault: true })
24870
25741
  .description("Scan a git diff and print a ProofPR report.")
@@ -24875,7 +25746,7 @@ build_program
24875
25746
  .option("--pr-body <body>", "Pull request body used for evidence checks.")
24876
25747
  .option("--pr-body-file <path>", "Read a pull request body from a Markdown file.")
24877
25748
  .option("--config <path>", "Path to .proofpr.yml.", ".proofpr.yml")
24878
- .option("--format <format>", "Output format: markdown, json, or sarif.", parseFormat, "markdown")
25749
+ .option("--format <format>", "Output format: markdown, json, sarif, or html.", parseFormat, "markdown")
24879
25750
  .option("--locale <locale>", "Report language: en or zh-CN.")
24880
25751
  .option("--fail-on <level>", "Exit with code 1 on risk level: low, medium, high, or never.", parseFailLevel, "never")
24881
25752
  .action(async (options) => {
@@ -24908,6 +25779,35 @@ build_program
24908
25779
  await writeIfMissing(options.workflowPath, renderWorkflowTemplate(options.failOn), options.force);
24909
25780
  process.stdout.write(`ProofPR initialized:\n- ${options.configPath}\n- ${options.workflowPath}\n`);
24910
25781
  });
25782
+ build_program
25783
+ .command("benchmark")
25784
+ .description("Run ProofPR benchmark cases and compare expected risk/finding output.")
25785
+ .option("--cases <dir>", "Directory containing benchmark case JSON files.", "benchmarks/cases")
25786
+ .option("--format <format>", "Output format: text, markdown, or json.", parseBenchmarkFormat, "text")
25787
+ .option("--output <path>", "Write benchmark output to a file instead of stdout.")
25788
+ .action(async (options) => {
25789
+ const report = await runBenchmarks(options.cases);
25790
+ let output;
25791
+ if (options.format === "json") {
25792
+ output = `${JSON.stringify(report, null, 2)}\n`;
25793
+ }
25794
+ else if (options.format === "markdown") {
25795
+ output = renderBenchmarkMarkdown(report);
25796
+ }
25797
+ else {
25798
+ output = renderBenchmarkText(report);
25799
+ }
25800
+ if (options.output) {
25801
+ await writeOutput(options.output, output);
25802
+ process.stdout.write(`ProofPR benchmark report written to ${options.output}\n`);
25803
+ }
25804
+ else {
25805
+ process.stdout.write(output);
25806
+ }
25807
+ if (report.results.some((result) => !result.passed)) {
25808
+ process.exitCode = 1;
25809
+ }
25810
+ });
24911
25811
  build_program.parseAsync(process.argv).catch((error) => {
24912
25812
  const message = error instanceof Error ? error.message : String(error);
24913
25813
  process.stderr.write(`ProofPR failed: ${message}\n`);
@@ -24934,6 +25834,10 @@ async function writeIfMissing(path, contents, force) {
24934
25834
  await (0,promises_namespaceObject.mkdir)((0,external_node_path_.dirname)(path), { recursive: true });
24935
25835
  await (0,promises_namespaceObject.writeFile)(path, contents, "utf8");
24936
25836
  }
25837
+ async function writeOutput(path, contents) {
25838
+ await (0,promises_namespaceObject.mkdir)((0,external_node_path_.dirname)(path), { recursive: true });
25839
+ await (0,promises_namespaceObject.writeFile)(path, contents, "utf8");
25840
+ }
24937
25841
  async function pathExists(path) {
24938
25842
  try {
24939
25843
  await (0,promises_namespaceObject.access)(path);
@@ -24993,6 +25897,18 @@ comment:
24993
25897
  # flagNewPackages: true
24994
25898
  # flagMajorUpgrades: true
24995
25899
  # flagLifecycleScripts: true
25900
+ #
25901
+ # evidence:
25902
+ # contracts:
25903
+ # - id: ui-screenshot
25904
+ # title: UI changes need screenshots
25905
+ # paths:
25906
+ # - "src/components/**"
25907
+ # - "app/**"
25908
+ # requires:
25909
+ # - screenshot
25910
+ # - verification
25911
+ # severity: medium
24996
25912
  `;
24997
25913
  }
24998
25914
  function renderWorkflowTemplate(failOn) {
@@ -25011,7 +25927,7 @@ jobs:
25011
25927
  runs-on: ubuntu-latest
25012
25928
  steps:
25013
25929
  - uses: actions/checkout@v4
25014
- - uses: linsk27/proof-pr@v0.1.6
25930
+ - uses: linsk27/proof-pr@v0.1.7
25015
25931
  with:
25016
25932
  fail-on: ${failOn}
25017
25933
  comment: "true"
@@ -25025,13 +25941,172 @@ function renderOutput(result, format, locale) {
25025
25941
  if (format === "sarif") {
25026
25942
  return renderSarifReport(result);
25027
25943
  }
25944
+ if (format === "html") {
25945
+ return renderHtmlReport(result, locale);
25946
+ }
25028
25947
  return renderMarkdownReport(result, locale);
25029
25948
  }
25949
+ async function runBenchmarks(casesDir) {
25950
+ const root = (0,external_node_path_.resolve)(casesDir);
25951
+ const entries = await (0,promises_namespaceObject.readdir)(root, { withFileTypes: true });
25952
+ const caseFiles = entries
25953
+ .filter((entry) => entry.isFile() && entry.name.endsWith(".json"))
25954
+ .map((entry) => (0,external_node_path_.resolve)(root, entry.name))
25955
+ .sort();
25956
+ const results = [];
25957
+ for (const caseFile of caseFiles) {
25958
+ const testCase = JSON.parse(await (0,promises_namespaceObject.readFile)(caseFile, "utf8"));
25959
+ const diffText = await (0,promises_namespaceObject.readFile)((0,external_node_path_.resolve)((0,external_node_path_.dirname)(caseFile), testCase.diffFile), "utf8");
25960
+ const result = scanDiff(diffText, {
25961
+ config: testCase.config,
25962
+ pullRequest: testCase.pullRequest
25963
+ });
25964
+ const actualFindings = result.findings.map((finding) => finding.ruleId);
25965
+ const failures = [];
25966
+ if (testCase.expect.risk && result.risk !== testCase.expect.risk) {
25967
+ failures.push(`expected risk ${testCase.expect.risk}, got ${result.risk}`);
25968
+ }
25969
+ if (testCase.expect.reviewDecision && result.reviewDecision !== testCase.expect.reviewDecision) {
25970
+ failures.push(`expected review decision ${testCase.expect.reviewDecision}, got ${result.reviewDecision}`);
25971
+ }
25972
+ for (const expectedFinding of testCase.expect.findings ?? []) {
25973
+ if (!matchesFindingExpectation(actualFindings, expectedFinding)) {
25974
+ failures.push(`expected finding ${expectedFinding}`);
25975
+ }
25976
+ }
25977
+ for (const absentFinding of testCase.expect.absentFindings ?? []) {
25978
+ if (matchesFindingExpectation(actualFindings, absentFinding)) {
25979
+ failures.push(`unexpected finding ${absentFinding}`);
25980
+ }
25981
+ }
25982
+ results.push({
25983
+ id: testCase.id,
25984
+ title: testCase.title,
25985
+ category: testCase.category ?? "uncategorized",
25986
+ passed: failures.length === 0,
25987
+ failures,
25988
+ actual: {
25989
+ risk: result.risk,
25990
+ reviewDecision: result.reviewDecision,
25991
+ findings: actualFindings
25992
+ }
25993
+ });
25994
+ }
25995
+ return {
25996
+ summary: summarizeBenchmarkResults(results),
25997
+ results
25998
+ };
25999
+ }
26000
+ function summarizeBenchmarkResults(results) {
26001
+ const passed = results.filter((result) => result.passed).length;
26002
+ const categories = new Map();
26003
+ const findingCounts = new Map();
26004
+ for (const result of results) {
26005
+ const categoryResults = categories.get(result.category) ?? [];
26006
+ categoryResults.push(result);
26007
+ categories.set(result.category, categoryResults);
26008
+ for (const finding of new Set(result.actual.findings)) {
26009
+ findingCounts.set(finding, (findingCounts.get(finding) ?? 0) + 1);
26010
+ }
26011
+ }
26012
+ return {
26013
+ total: results.length,
26014
+ passed,
26015
+ failed: results.length - passed,
26016
+ passRate: ratio(passed, results.length),
26017
+ categories: [...categories.entries()]
26018
+ .sort(([left], [right]) => left.localeCompare(right))
26019
+ .map(([category, items]) => {
26020
+ const categoryPassed = items.filter((item) => item.passed).length;
26021
+ return {
26022
+ category,
26023
+ total: items.length,
26024
+ passed: categoryPassed,
26025
+ failed: items.length - categoryPassed,
26026
+ passRate: ratio(categoryPassed, items.length)
26027
+ };
26028
+ }),
26029
+ findingCounts: [...findingCounts.entries()]
26030
+ .sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]))
26031
+ .map(([ruleId, count]) => ({ ruleId, count }))
26032
+ };
26033
+ }
26034
+ function renderBenchmarkText(report) {
26035
+ const lines = [
26036
+ "ProofPR benchmark",
26037
+ "",
26038
+ `Summary: ${report.summary.passed}/${report.summary.total} passed (${formatPercent(report.summary.passRate)})`,
26039
+ ""
26040
+ ];
26041
+ lines.push("Categories:");
26042
+ for (const category of report.summary.categories) {
26043
+ lines.push(`- ${category.category}: ${category.passed}/${category.total} passed (${formatPercent(category.passRate)})`);
26044
+ }
26045
+ if (report.summary.findingCounts.length > 0) {
26046
+ lines.push("", "Finding coverage:");
26047
+ for (const item of report.summary.findingCounts) {
26048
+ lines.push(`- ${item.ruleId}: ${item.count}`);
26049
+ }
26050
+ }
26051
+ lines.push("");
26052
+ for (const result of report.results) {
26053
+ lines.push(`${result.passed ? "PASS" : "FAIL"} ${result.id}${result.title ? ` - ${result.title}` : ""}`);
26054
+ for (const failure of result.failures) {
26055
+ lines.push(` - ${failure}`);
26056
+ }
26057
+ }
26058
+ lines.push("");
26059
+ return lines.join("\n");
26060
+ }
26061
+ function renderBenchmarkMarkdown(report) {
26062
+ const lines = [
26063
+ "# ProofPR Benchmark",
26064
+ "",
26065
+ `**Summary:** ${report.summary.passed}/${report.summary.total} passed (${formatPercent(report.summary.passRate)})`,
26066
+ "",
26067
+ "## Categories",
26068
+ "",
26069
+ "| Category | Passed | Total | Pass rate |",
26070
+ "| --- | ---: | ---: | ---: |"
26071
+ ];
26072
+ for (const category of report.summary.categories) {
26073
+ lines.push(`| ${category.category} | ${category.passed} | ${category.total} | ${formatPercent(category.passRate)} |`);
26074
+ }
26075
+ lines.push("", "## Finding Coverage", "", "| Rule | Cases |", "| --- | ---: |");
26076
+ for (const item of report.summary.findingCounts) {
26077
+ lines.push(`| \`${item.ruleId}\` | ${item.count} |`);
26078
+ }
26079
+ lines.push("", "## Cases", "", "| Result | Case | Category | Actual risk | Gate |", "| --- | --- | --- | --- | --- |");
26080
+ for (const result of report.results) {
26081
+ lines.push(`| ${result.passed ? "PASS" : "FAIL"} | \`${result.id}\` | ${result.category} | ${result.actual.risk} | ${result.actual.reviewDecision} |`);
26082
+ }
26083
+ lines.push("");
26084
+ return lines.join("\n");
26085
+ }
26086
+ function ratio(value, total) {
26087
+ return total === 0 ? 0 : value / total;
26088
+ }
26089
+ function formatPercent(value) {
26090
+ return `${Math.round(value * 100)}%`;
26091
+ }
26092
+ function matchesFindingExpectation(actualFindings, expected) {
26093
+ if (expected.endsWith("*")) {
26094
+ const prefix = expected.slice(0, -1);
26095
+ return actualFindings.some((finding) => finding.startsWith(prefix));
26096
+ }
26097
+ return actualFindings.includes(expected);
26098
+ }
25030
26099
  function parseFormat(value) {
25031
- if (value === "json" || value === "markdown" || value === "sarif") {
26100
+ if (value === "json" || value === "markdown" || value === "sarif" || value === "html") {
26101
+ return value;
26102
+ }
26103
+ throw new InvalidArgumentError("format must be one of: markdown, json, sarif, html");
26104
+ }
26105
+ function parseBenchmarkFormat(value) {
26106
+ if (value === "text" || value === "json" || value === "markdown") {
25032
26107
  return value;
25033
26108
  }
25034
- throw new InvalidArgumentError("format must be one of: markdown, json, sarif");
26109
+ throw new InvalidArgumentError("benchmark format must be one of: text, markdown, json");
25035
26110
  }
25036
26111
  function parseFailLevel(value) {
25037
26112
  if (value === "low" || value === "medium" || value === "high" || value === "never") {