claude-dev-env 1.59.0 → 1.60.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/audit-rubrics/category_rubrics/category-b-selector-engine-compat.md +1 -1
  2. package/audit-rubrics/category_rubrics/category-e-dead-code.md +1 -0
  3. package/audit-rubrics/category_rubrics/category-o-docstring-vs-impl-drift.md +1 -1
  4. package/audit-rubrics/prompts/category-b-selector-engine-compat.md +2 -2
  5. package/hooks/blocking/code_rules_dead_module_constant.py +321 -0
  6. package/hooks/blocking/code_rules_duplicate_body.py +152 -0
  7. package/hooks/blocking/code_rules_enforcer.py +30 -15
  8. package/hooks/blocking/code_rules_typeddict_stub.py +172 -0
  9. package/hooks/blocking/config/__init__.py +5 -0
  10. package/hooks/blocking/config/verified_commit_constants.py +106 -0
  11. package/hooks/blocking/test_code_rules_enforcer_cross_skill_duplicate.py +146 -0
  12. package/hooks/blocking/test_code_rules_enforcer_dead_module_constant.py +188 -0
  13. package/hooks/blocking/test_code_rules_enforcer_zero_payload_alias.py +415 -0
  14. package/hooks/blocking/test_code_rules_enforcer_zero_payload_alias_hook_routing.py +156 -0
  15. package/hooks/blocking/test_verdict_directory_write_blocker.py +720 -0
  16. package/hooks/blocking/test_verification_verdict_store.py +278 -0
  17. package/hooks/blocking/test_verified_commit_gate.py +368 -0
  18. package/hooks/blocking/test_verified_commit_message_accuracy_blocker.py +131 -0
  19. package/hooks/blocking/test_verifier_verdict_minter.py +214 -0
  20. package/hooks/blocking/verdict_directory_write_blocker.py +667 -0
  21. package/hooks/blocking/verification_verdict_store.py +446 -0
  22. package/hooks/blocking/verified_commit_gate.py +523 -0
  23. package/hooks/blocking/verified_commit_message_accuracy_blocker.py +152 -0
  24. package/hooks/blocking/verifier_verdict_minter.py +299 -0
  25. package/hooks/diagnostic/test_hook_log_extractor.py +3 -3
  26. package/hooks/hooks.json +43 -1
  27. package/hooks/hooks_constants/blocking_check_limits.py +1 -0
  28. package/hooks/hooks_constants/dead_module_constant_constants.py +20 -0
  29. package/hooks/hooks_constants/duplicate_function_body_constants.py +22 -5
  30. package/hooks/hooks_constants/precommit_code_rules_gate_constants.py +1 -1
  31. package/package.json +1 -1
  32. package/rules/file-global-constants.md +7 -1
  33. package/rules/no-cross-skill-duplicate-helpers.md +29 -0
  34. package/skills/_shared/pr-loop/scripts/preflight_worktree.py +392 -0
  35. package/skills/_shared/pr-loop/scripts/skills_pr_loop_constants/preflight_constants.py +70 -0
  36. package/skills/_shared/pr-loop/scripts/test_preflight_worktree.py +263 -0
  37. package/skills/autoconverge/SKILL.md +54 -17
  38. package/skills/autoconverge/reference/closing-report.md +59 -17
  39. package/skills/autoconverge/workflow/aggregate_runs.py +371 -0
  40. package/skills/autoconverge/workflow/autoconverge_report_constants/render_report_constants.py +193 -76
  41. package/skills/autoconverge/workflow/converge.clean-audit.test.mjs +76 -0
  42. package/skills/autoconverge/workflow/converge.contract.test.mjs +206 -206
  43. package/skills/autoconverge/workflow/converge.mjs +128 -6
  44. package/skills/autoconverge/workflow/convergence_summary.py +110 -0
  45. package/skills/autoconverge/workflow/fixtures/wf_run/subagents/workflows/wf_881252e6-700/agent-ab1c2d3e4f5a6b7c8.jsonl +2 -0
  46. package/skills/autoconverge/workflow/fixtures/wf_run/workflows/wf_881252e6-700.json +7 -0
  47. package/skills/autoconverge/workflow/render_report.py +488 -397
  48. package/skills/autoconverge/workflow/test_aggregate_runs.py +134 -0
  49. package/skills/autoconverge/workflow/test_convergence_summary.py +132 -0
  50. package/skills/autoconverge/workflow/test_render_report.py +488 -259
  51. package/skills/pr-converge/reference/per-tick.md +28 -8
  52. package/skills/rebase/SKILL.md +2 -4
  53. package/system-prompts/software-engineer.xml +2 -6
  54. package/hooks/blocking/content_search_to_zoekt_redirector.py +0 -59
  55. package/hooks/blocking/content_search_zoekt_bash_block_reason.py +0 -25
  56. package/hooks/blocking/content_search_zoekt_block_payload.py +0 -21
  57. package/hooks/blocking/content_search_zoekt_indexed_paths.py +0 -24
  58. package/hooks/blocking/content_search_zoekt_indexed_roots_config.py +0 -131
  59. package/hooks/blocking/content_search_zoekt_redirect_guidance.py +0 -52
  60. package/hooks/blocking/test_content_search_to_zoekt_redirector_integration.py +0 -61
  61. package/hooks/blocking/test_content_search_to_zoekt_redirector_unit.py +0 -92
  62. package/hooks/blocking/test_content_search_zoekt_indexed_roots_config.py +0 -102
@@ -15,59 +15,20 @@ FIXTURE_DIR = Path(__file__).resolve().parent / "fixtures" / "wf_run"
15
15
  FIXTURE_JOURNAL = FIXTURE_DIR / "workflows" / "wf_881252e6-700.json"
16
16
 
17
17
  EXPECTED_TOTAL_FINDINGS = 15
18
- EXPECTED_CRITICAL_COUNT = 0
19
- EXPECTED_MINOR_COUNT = 15
20
18
  EXPECTED_FIX_COMMIT_COUNT = 2
21
19
  EXPECTED_GENERATED_DATE = "2026-06-13"
22
- EXPECTED_FINDINGS_BY_ROUND = {1: 11, 2: 2, 3: 2, 4: 0}
23
- EXPECTED_FINDINGS_BY_THEME = {"src/exports": 11, "src/logging": 2, "src/web": 2}
20
+ EXPECTED_ROUND_COUNT = 4
24
21
 
25
22
 
26
- def test_load_run_data_aggregate_counts() -> None:
27
- """Should parse the fixture journal and transcripts into correct aggregate counts."""
28
- run_data = render_report.load_run_data(FIXTURE_JOURNAL, Path("."))
29
-
30
- assert run_data.total_finding_count == EXPECTED_TOTAL_FINDINGS
31
- assert run_data.critical_finding_count == EXPECTED_CRITICAL_COUNT
32
- assert run_data.minor_finding_count == EXPECTED_MINOR_COUNT
33
- assert run_data.fix_commit_count == EXPECTED_FIX_COMMIT_COUNT
34
- assert run_data.generated_date == EXPECTED_GENERATED_DATE
35
-
36
-
37
- def test_load_run_data_by_round_counts() -> None:
38
- """Should assign findings to rounds by workflowProgress position boundary."""
39
- run_data = render_report.load_run_data(FIXTURE_JOURNAL, Path("."))
40
-
41
- for each_round, expected_count in EXPECTED_FINDINGS_BY_ROUND.items():
42
- actual_count = run_data.finding_count_by_round.get(each_round, 0)
43
- assert actual_count == expected_count, (
44
- f"Round {each_round}: expected {expected_count}, got {actual_count}"
45
- )
46
-
47
-
48
- def test_load_run_data_by_theme_counts() -> None:
49
- """Should group distinct findings by the first two path segments."""
50
- run_data = render_report.load_run_data(FIXTURE_JOURNAL, Path("."))
51
-
52
- assert len(run_data.finding_count_by_theme) == len(EXPECTED_FINDINGS_BY_THEME)
53
- for each_theme, expected_count in EXPECTED_FINDINGS_BY_THEME.items():
54
- actual_count = run_data.finding_count_by_theme.get(each_theme, 0)
55
- assert actual_count == expected_count, (
56
- f"Theme {each_theme}: expected {expected_count}, got {actual_count}"
57
- )
58
-
59
-
60
- def test_cli_end_to_end(tmp_path: Path) -> None:
61
- """Should exit 0, print the output path, and write HTML with expected substrings."""
62
- out_path = tmp_path / "report.html"
23
+ def _render_cli(journal_path: Path, out_path: Path) -> subprocess.CompletedProcess[str]:
24
+ """Run the render_report CLI against a journal and return the completed process."""
63
25
  render_script = Path(__file__).resolve().parent / "render_report.py"
64
-
65
- completed = subprocess.run(
26
+ return subprocess.run(
66
27
  [
67
28
  sys.executable,
68
29
  str(render_script),
69
30
  "--journal",
70
- str(FIXTURE_JOURNAL),
31
+ str(journal_path),
71
32
  "--out",
72
33
  str(out_path),
73
34
  "--pr",
@@ -76,58 +37,297 @@ def test_cli_end_to_end(tmp_path: Path) -> None:
76
37
  "7c2f420c4d5b7c83aa47f93d99a0f1420e3373c4",
77
38
  "--rounds",
78
39
  "4",
79
- "--repo",
80
- ".",
81
40
  ],
82
41
  capture_output=True,
83
42
  text=True,
84
43
  )
85
44
 
86
- assert completed.returncode == 0, f"CLI failed:\n{completed.stderr}"
87
45
 
88
- printed_path = completed.stdout.strip()
89
- assert printed_path == str(out_path), (
90
- f"Expected stdout {out_path!r}, got {printed_path!r}"
46
+ def _copy_run_tree_without_summary_entry(destination_root: Path) -> Path:
47
+ """Copy the fixture run tree, dropping the convergence-summary workflowProgress entry.
48
+
49
+ Returns the path to the copied journal whose summarizer entry has been removed.
50
+ """
51
+ shutil.copytree(FIXTURE_DIR, destination_root)
52
+ journal_destination = destination_root / "workflows" / FIXTURE_JOURNAL.name
53
+ journal = json.loads(journal_destination.read_text(encoding="utf-8"))
54
+ journal["workflowProgress"] = [
55
+ each_entry
56
+ for each_entry in journal["workflowProgress"]
57
+ if each_entry.get("label") != render_report.LABEL_CONVERGENCE_SUMMARY
58
+ ]
59
+ journal_destination.write_text(json.dumps(journal, indent=2), encoding="utf-8")
60
+ return journal_destination
61
+
62
+
63
+ def test_load_run_data_aggregate_counts() -> None:
64
+ """Should parse the fixture journal and transcripts into correct aggregate counts."""
65
+ run_data = render_report.load_run_data(FIXTURE_JOURNAL)
66
+
67
+ assert run_data.total_finding_count == EXPECTED_TOTAL_FINDINGS
68
+ assert run_data.fix_commit_count == EXPECTED_FIX_COMMIT_COUNT
69
+ assert run_data.generated_date == EXPECTED_GENERATED_DATE
70
+ assert len(run_data.all_distinct_findings) == EXPECTED_TOTAL_FINDINGS
71
+
72
+
73
+ def test_load_run_data_parses_convergence_summary() -> None:
74
+ """Should locate the convergence-summary entry and parse its StructuredOutput."""
75
+ run_data = render_report.load_run_data(FIXTURE_JOURNAL)
76
+
77
+ assert run_data.convergence_summary is not None
78
+ verdict_line = run_data.convergence_summary["verdictLine"]
79
+ issue_classes = run_data.convergence_summary["issueClasses"]
80
+ assert isinstance(verdict_line, str) and verdict_line
81
+ assert isinstance(issue_classes, list) and len(issue_classes) == 3
82
+
83
+
84
+ def test_load_run_data_carries_category_on_findings() -> None:
85
+ """Should default each finding's category to 'bug' when the raw dict omits it."""
86
+ run_data = render_report.load_run_data(FIXTURE_JOURNAL)
87
+
88
+ assert all(
89
+ each_finding.category == render_report.CATEGORY_BUG
90
+ for each_finding in run_data.all_distinct_findings
91
91
  )
92
92
 
93
+
94
+ def test_cli_renders_verdict_banner_with_python_computed_vsub(tmp_path: Path) -> None:
95
+ """Should render the verdict banner with verdictLine and a Python-computed vsub."""
96
+ out_path = tmp_path / "report.html"
97
+
98
+ completed = _render_cli(FIXTURE_JOURNAL, out_path)
99
+
100
+ assert completed.returncode == 0, f"CLI failed:\n{completed.stderr}"
101
+ assert completed.stdout.strip() == str(out_path)
93
102
  assert out_path.exists(), "Output HTML file was not written"
103
+
94
104
  html_content = out_path.read_text(encoding="utf-8")
105
+ assert "PR #211 Convergence Summary" in html_content
106
+ assert 'class="verdict"' in html_content
107
+ assert 'class="vtext"' in html_content
108
+ assert "Converged in 4 rounds; 3 distinct issue classes were caught and fixed." in (
109
+ html_content
110
+ )
111
+ assert 'class="vsub"' in html_content
112
+ assert "2 fix commits" in html_content
113
+ assert "final commit 7c2f420c" in html_content
95
114
 
96
- expected_substrings = [
97
- "PR #211 Convergence Insights",
98
- "at-a-glance",
99
- "Findings by severity",
100
- "Findings by round",
101
- "Tests added per round",
102
- "Findings by theme",
103
- "Banned identifier",
104
- "result",
105
- "in test",
106
- "Converged",
107
- "7c2f420c",
108
- ]
109
- for each_substring in expected_substrings:
110
- assert each_substring in html_content, (
111
- f"Expected substring not found in HTML: {each_substring!r}"
112
- )
113
115
 
114
- minor_card_count = html_content.count('class="bug-card minor"')
115
- assert minor_card_count == EXPECTED_MINOR_COUNT, (
116
- f"Expected {EXPECTED_MINOR_COUNT} minor cards, found {minor_card_count}"
117
- )
116
+ def test_cli_renders_problem_and_fix_scene_cards(tmp_path: Path) -> None:
117
+ """Should draw problem and fix scene cards with trigger, result, and caption."""
118
+ out_path = tmp_path / "report-scenes.html"
118
119
 
120
+ completed = _render_cli(FIXTURE_JOURNAL, out_path)
121
+ assert completed.returncode == 0, f"CLI failed:\n{completed.stderr}"
119
122
 
120
- def test_html_contains_no_hedging_words(tmp_path: Path) -> None:
121
- """Should produce HTML with no hedging language anywhere in the rendered text."""
122
- out_path = tmp_path / "report-hedge.html"
123
- render_script = Path(__file__).resolve().parent / "render_report.py"
123
+ html_content = out_path.read_text(encoding="utf-8")
124
+ assert 'class="pf-grid"' in html_content
125
+ assert 'class="pf problem"' in html_content
126
+ assert 'class="pf fix"' in html_content
127
+ assert "export stops at batch 90 of 100" in html_content
128
+ assert "starts again at batch 1" in html_content
129
+ assert "continues at batch 91" in html_content
130
+ assert 'class="res-bad"' in html_content
131
+ assert 'class="res-good"' in html_content
132
+ assert "began again" in html_content
133
+
134
+
135
+ def test_render_issue_class_panels_for_each_medium() -> None:
136
+ """Should draw before/after panels: a code panel and a terminal panel per medium."""
137
+ convergence_summary = {
138
+ "verdictLine": "Converged.",
139
+ "problemScenes": [],
140
+ "fixScenes": [],
141
+ "issueClasses": [
142
+ {
143
+ "plainName": "A missing return type",
144
+ "count": 3,
145
+ "severity": "P2",
146
+ "category": "code-standard",
147
+ "status": "fixed",
148
+ "cause": "Tests did not declare their return type.",
149
+ "medium": "code",
150
+ "beforeLines": ["def test_x():"],
151
+ "afterLines": ["def test_x() -> None:"],
152
+ },
153
+ {
154
+ "plainName": "An install that did nothing",
155
+ "count": 1,
156
+ "severity": "P1",
157
+ "category": "bug",
158
+ "status": "fixed",
159
+ "cause": "The command skipped the install.",
160
+ "medium": "terminal",
161
+ "beforeLines": ["~ $ install", "(no output)"],
162
+ "afterLines": ["~ $ install", "Installed."],
163
+ },
164
+ ],
165
+ }
166
+
167
+ panels_html = render_report._render_issue_class_panels(convergence_summary)
168
+
169
+ assert 'class="code-panel"' in panels_html
170
+ assert "def test_x() -> None:" in panels_html
171
+ assert 'class="terminal"' in panels_html
172
+ assert 'class="term-bar"' in panels_html
173
+ assert "Installed." in panels_html
174
+ assert 'class="term-grid"' in panels_html
175
+ assert 'class="bug-head"' in panels_html
176
+ assert "A missing return type" in panels_html
177
+ assert "An install that did nothing" in panels_html
178
+ assert "3 findings" in panels_html
179
+ assert "1 finding" in panels_html
180
+
181
+
182
+ def test_cli_renders_cause_line_with_severity_parenthetical(tmp_path: Path) -> None:
183
+ """Should render a cause line carrying the plain cause and a muted parenthetical."""
184
+ out_path = tmp_path / "report-cause.html"
124
185
 
125
- subprocess.run(
186
+ completed = _render_cli(FIXTURE_JOURNAL, out_path)
187
+ assert completed.returncode == 0, f"CLI failed:\n{completed.stderr}"
188
+
189
+ html_content = out_path.read_text(encoding="utf-8")
190
+ assert 'class="cause"' in html_content
191
+ assert "which the project's type checker wants" in html_content
192
+ assert "P2" in html_content
193
+ assert "code standard" in html_content
194
+ assert "×7" in html_content
195
+ assert "fixed" in html_content
196
+
197
+
198
+ def test_render_issue_class_panels_omitted_when_lines_empty() -> None:
199
+ """Should draw only the cause line when both before and after lines are empty."""
200
+ convergence_summary = {
201
+ "verdictLine": "Converged.",
202
+ "problemScenes": [],
203
+ "fixScenes": [],
204
+ "issueClasses": [
205
+ {
206
+ "plainName": "A cause-only class",
207
+ "count": 1,
208
+ "severity": "P2",
209
+ "category": "code-standard",
210
+ "status": "fixed",
211
+ "cause": "Nothing visual to show.",
212
+ "medium": "text",
213
+ "beforeLines": [],
214
+ "afterLines": [],
215
+ }
216
+ ],
217
+ }
218
+
219
+ panels_html = render_report._render_issue_class_panels(convergence_summary)
220
+
221
+ assert 'class="term-grid"' not in panels_html
222
+ assert 'class="bug-head"' in panels_html
223
+ assert "A cause-only class" in panels_html
224
+ assert 'class="cause"' in panels_html
225
+ assert "Nothing visual to show." in panels_html
226
+
227
+
228
+ def test_render_issue_class_panels_clean_state_when_no_classes() -> None:
229
+ """Should render a clean-state line, not an empty section, when no classes exist."""
230
+ convergence_summary = {
231
+ "verdictLine": "Converged with no issues caught.",
232
+ "problemScenes": [],
233
+ "fixScenes": [],
234
+ "issueClasses": [],
235
+ }
236
+
237
+ panels_html = render_report._render_issue_class_panels(convergence_summary)
238
+
239
+ assert 'class="term-grid"' not in panels_html
240
+ assert "No issues were caught" in panels_html
241
+
242
+
243
+ def test_cli_merges_run_stats_lead_into_caught_section(tmp_path: Path) -> None:
244
+ """Should lead the caught section with run stats and omit any timeline section."""
245
+ out_path = tmp_path / "report-caught-lead.html"
246
+
247
+ completed = _render_cli(FIXTURE_JOURNAL, out_path)
248
+ assert completed.returncode == 0, f"CLI failed:\n{completed.stderr}"
249
+
250
+ html_content = out_path.read_text(encoding="utf-8")
251
+ assert "What was caught" in html_content
252
+ assert "3 bug classes" in html_content
253
+ assert "15 findings in all" in html_content
254
+ assert "caught and fixed across 4 rounds" in html_content
255
+ assert "2 fix commits" in html_content
256
+ assert "How it converged" not in html_content
257
+ assert 'class="timeline"' not in html_content
258
+ assert 'class="tstep' not in html_content
259
+
260
+
261
+ def test_cli_includes_collapsed_appendix(tmp_path: Path) -> None:
262
+ """Should include a collapsed details appendix listing every distinct finding."""
263
+ out_path = tmp_path / "report-appendix.html"
264
+
265
+ completed = _render_cli(FIXTURE_JOURNAL, out_path)
266
+ assert completed.returncode == 0, f"CLI failed:\n{completed.stderr}"
267
+
268
+ html_content = out_path.read_text(encoding="utf-8")
269
+ assert '<details class="appendix"' in html_content
270
+ assert f"Raw findings ({EXPECTED_TOTAL_FINDINGS})" in html_content
271
+ assert "src/exports/tests/test_resume_skip_export.py:35" in html_content
272
+
273
+
274
+ def test_cli_degraded_layout_when_summary_entry_absent(tmp_path: Path) -> None:
275
+ """Should render the timeline and appendix but no scene, table, or rollup markup."""
276
+ run_root = tmp_path / "wf_run_no_summary"
277
+ journal_destination = _copy_run_tree_without_summary_entry(run_root)
278
+
279
+ out_path = tmp_path / "report-degraded.html"
280
+ completed = _render_cli(journal_destination, out_path)
281
+
282
+ assert completed.returncode == 0, f"CLI failed:\n{completed.stderr}"
283
+ html_content = out_path.read_text(encoding="utf-8")
284
+
285
+ assert "PR #211 Convergence Summary" in html_content
286
+ assert 'class="timeline"' not in html_content
287
+ assert "distinct findings across 4 rounds" in html_content
288
+ assert '<details class="appendix"' in html_content
289
+ assert 'class="pf-grid"' not in html_content
290
+ assert 'class="issue-table"' not in html_content
291
+ assert 'class="rollup"' not in html_content
292
+ assert 'class="pr-summary"' not in html_content
293
+
294
+
295
+ def test_cli_injects_summary_from_file_bypassing_transcripts(tmp_path: Path) -> None:
296
+ """Should render the full summary body from --summary-file when no summary transcript exists."""
297
+ run_root = tmp_path / "wf_run_inject"
298
+ journal_destination = _copy_run_tree_without_summary_entry(run_root)
299
+
300
+ summary = {
301
+ "prProblem": "PhotoSync stopped backing up photos after an account switch.",
302
+ "prFix": "It re-checks the account on each backup, so a switch never halts backups.",
303
+ "problemScenes": [],
304
+ "fixScenes": [],
305
+ "verdictLine": "Converged in 4 rounds; every class is fixed.",
306
+ "issueClasses": [
307
+ {
308
+ "plainName": "An injected class the transcript never carried",
309
+ "count": 2,
310
+ "severity": "P1",
311
+ "category": "bug",
312
+ "status": "fixed",
313
+ "cause": "A concrete grounded cause sentence.",
314
+ "medium": "text",
315
+ "beforeLines": [],
316
+ "afterLines": [],
317
+ }
318
+ ],
319
+ }
320
+ summary_path = tmp_path / "summary.json"
321
+ summary_path.write_text(json.dumps(summary), encoding="utf-8")
322
+
323
+ out_path = tmp_path / "report-injected.html"
324
+ render_script = Path(__file__).resolve().parent / "render_report.py"
325
+ completed = subprocess.run(
126
326
  [
127
327
  sys.executable,
128
328
  str(render_script),
129
329
  "--journal",
130
- str(FIXTURE_JOURNAL),
330
+ str(journal_destination),
131
331
  "--out",
132
332
  str(out_path),
133
333
  "--pr",
@@ -136,14 +336,29 @@ def test_html_contains_no_hedging_words(tmp_path: Path) -> None:
136
336
  "7c2f420c4d5b7c83aa47f93d99a0f1420e3373c4",
137
337
  "--rounds",
138
338
  "4",
139
- "--repo",
140
- ".",
339
+ "--summary-file",
340
+ str(summary_path),
141
341
  ],
142
342
  capture_output=True,
143
343
  text=True,
144
- check=True,
145
344
  )
146
345
 
346
+ assert completed.returncode == 0, f"CLI failed:\n{completed.stderr}"
347
+ html_content = out_path.read_text(encoding="utf-8")
348
+ assert 'class="verdict"' in html_content
349
+ assert "Converged in 4 rounds; every class is fixed." in html_content
350
+ assert "An injected class the transcript never carried" in html_content
351
+ assert 'class="pf-grid"' in html_content
352
+ assert f"Raw findings ({EXPECTED_TOTAL_FINDINGS})" in html_content
353
+
354
+
355
+ def test_html_contains_no_hedging_words(tmp_path: Path) -> None:
356
+ """Should produce HTML with no hedging language anywhere in the rendered narrative."""
357
+ out_path = tmp_path / "report-hedge.html"
358
+
359
+ completed = _render_cli(FIXTURE_JOURNAL, out_path)
360
+ assert completed.returncode == 0, f"CLI failed:\n{completed.stderr}"
361
+
147
362
  html_content = out_path.read_text(encoding="utf-8")
148
363
  all_hedging_words = [
149
364
  "could",
@@ -162,141 +377,6 @@ def test_html_contains_no_hedging_words(tmp_path: Path) -> None:
162
377
  )
163
378
 
164
379
 
165
- def _init_git_repo(repo_path: Path) -> None:
166
- """Initialize a git repo with a committed baseline so diffs resolve."""
167
- subprocess.run(
168
- ["git", "-C", str(repo_path), "init"], capture_output=True, check=True
169
- )
170
- subprocess.run(
171
- ["git", "-C", str(repo_path), "config", "user.email", "test@example.com"],
172
- capture_output=True,
173
- check=True,
174
- )
175
- subprocess.run(
176
- ["git", "-C", str(repo_path), "config", "user.name", "Test"],
177
- capture_output=True,
178
- check=True,
179
- )
180
- (repo_path / "README.md").write_text("baseline\n", encoding="utf-8")
181
- subprocess.run(
182
- ["git", "-C", str(repo_path), "add", "."], capture_output=True, check=True
183
- )
184
- subprocess.run(
185
- ["git", "-C", str(repo_path), "commit", "-m", "baseline"],
186
- capture_output=True,
187
- check=True,
188
- )
189
-
190
-
191
- def _resolve_head(repo_path: Path) -> str:
192
- """Return the current HEAD sha of the repo."""
193
- completed = subprocess.run(
194
- ["git", "-C", str(repo_path), "rev-parse", "HEAD"],
195
- capture_output=True,
196
- text=True,
197
- check=True,
198
- )
199
- return completed.stdout.strip()
200
-
201
-
202
- def test_count_tests_added_does_not_double_count_new_file(tmp_path: Path) -> None:
203
- """Should count a new test file with two test functions as exactly two."""
204
- repo_path = tmp_path / "repo"
205
- repo_path.mkdir()
206
- _init_git_repo(repo_path)
207
- base_sha = _resolve_head(repo_path)
208
-
209
- new_test_file = repo_path / "test_feature.py"
210
- new_test_file.write_text(
211
- "def test_one() -> None:\n"
212
- " assert True\n"
213
- "\n"
214
- "def test_two() -> None:\n"
215
- " assert True\n",
216
- encoding="utf-8",
217
- )
218
- subprocess.run(
219
- ["git", "-C", str(repo_path), "add", "."], capture_output=True, check=True
220
- )
221
- subprocess.run(
222
- ["git", "-C", str(repo_path), "commit", "-m", "add tests"],
223
- capture_output=True,
224
- check=True,
225
- )
226
- new_sha = _resolve_head(repo_path)
227
-
228
- test_count = render_report._count_tests_added(base_sha, new_sha, repo_path)
229
-
230
- assert test_count == 2, f"Expected 2 added test definitions, got {test_count}"
231
-
232
-
233
- def test_count_tests_added_counts_nested_test_directory(tmp_path: Path) -> None:
234
- """Should count test functions added under a nested src/<pkg>/tests/ layout."""
235
- repo_path = tmp_path / "repo"
236
- repo_path.mkdir()
237
- _init_git_repo(repo_path)
238
- base_sha = _resolve_head(repo_path)
239
-
240
- nested_test_file = repo_path / "src" / "exports" / "tests" / "test_feature.py"
241
- nested_test_file.parent.mkdir(parents=True)
242
- nested_test_file.write_text(
243
- "def test_one() -> None:\n"
244
- " assert True\n"
245
- "\n"
246
- "def test_two() -> None:\n"
247
- " assert True\n",
248
- encoding="utf-8",
249
- )
250
- subprocess.run(
251
- ["git", "-C", str(repo_path), "add", "."], capture_output=True, check=True
252
- )
253
- subprocess.run(
254
- ["git", "-C", str(repo_path), "commit", "-m", "add nested tests"],
255
- capture_output=True,
256
- check=True,
257
- )
258
- new_sha = _resolve_head(repo_path)
259
-
260
- test_count = render_report._count_tests_added(base_sha, new_sha, repo_path)
261
-
262
- assert test_count == 2, (
263
- f"Expected 2 added test definitions in nested dir, got {test_count}"
264
- )
265
-
266
-
267
- def test_count_tests_added_counts_should_functions(tmp_path: Path) -> None:
268
- """Should count pytest should_* functions, not only def test functions."""
269
- repo_path = tmp_path / "repo"
270
- repo_path.mkdir()
271
- _init_git_repo(repo_path)
272
- base_sha = _resolve_head(repo_path)
273
-
274
- new_test_file = repo_path / "test_behavior.py"
275
- new_test_file.write_text(
276
- "def should_validate_order() -> None:\n"
277
- " assert True\n"
278
- "\n"
279
- "def test_explicit() -> None:\n"
280
- " assert True\n",
281
- encoding="utf-8",
282
- )
283
- subprocess.run(
284
- ["git", "-C", str(repo_path), "add", "."], capture_output=True, check=True
285
- )
286
- subprocess.run(
287
- ["git", "-C", str(repo_path), "commit", "-m", "add should and test"],
288
- capture_output=True,
289
- check=True,
290
- )
291
- new_sha = _resolve_head(repo_path)
292
-
293
- test_count = render_report._count_tests_added(base_sha, new_sha, repo_path)
294
-
295
- assert test_count == 2, (
296
- f"Expected 2 added definitions (should_ + test), got {test_count}"
297
- )
298
-
299
-
300
380
  def test_extract_structured_output_returns_last_tool_input(tmp_path: Path) -> None:
301
381
  """Should return the input of the last StructuredOutput tool_use in the transcript."""
302
382
  transcript_path = tmp_path / "agent-stream.jsonl"
@@ -326,7 +406,9 @@ def test_extract_structured_output_returns_last_tool_input(tmp_path: Path) -> No
326
406
  }
327
407
  }
328
408
  )
329
- transcript_path.write_text(earlier_line + "\n" + later_line + "\n", encoding="utf-8")
409
+ transcript_path.write_text(
410
+ earlier_line + "\n" + later_line + "\n", encoding="utf-8"
411
+ )
330
412
 
331
413
  extracted = render_report._extract_structured_output(transcript_path)
332
414
 
@@ -342,32 +424,14 @@ def test_extract_structured_output_returns_none_on_missing_file(tmp_path: Path)
342
424
  assert extracted is None
343
425
 
344
426
 
345
- def test_render_fix_block_falls_back_when_sha_empty() -> None:
346
- """Should not claim a commit when the fix record has an empty new sha."""
347
- finding = render_report.RawFinding(
348
- file="src/exports/writer.py",
349
- line=10,
350
- severity="P2",
351
- title="example finding",
352
- detail="example detail",
353
- round_number=2,
354
- sha="abc",
427
+ def test_fix_record_carries_summary_text() -> None:
428
+ """Should read the fix agent's summary field into the FixRecord."""
429
+ fix_record = render_report._parse_fix_record(
430
+ {"newSha": "abcd1234", "pushed": True, "summary": "renamed and annotated"},
431
+ base_sha="base",
355
432
  )
356
- fix_by_round = {
357
- 2: render_report.FixRecord(
358
- new_sha="",
359
- pushed=False,
360
- resolved_without_commit=False,
361
- round_number=2,
362
- base_sha="base",
363
- )
364
- }
365
-
366
- fix_html = render_report._render_fix_block(finding, fix_by_round)
367
433
 
368
- assert "<code></code>" not in fix_html
369
- assert "fix commit" not in fix_html
370
- assert "resolved during convergence" in fix_html
434
+ assert fix_record.summary == "renamed and annotated"
371
435
 
372
436
 
373
437
  def _write_structured_output_transcript(
@@ -423,7 +487,10 @@ def test_base_sha_resets_each_round_when_prior_fix_transcript_missing(
423
487
  {"label": render_report.LABEL_PREFIX_FIX + "copilot", "agentId": "missing-fix"},
424
488
  {"label": render_report.LABEL_RESOLVE_HEAD, "agentId": "round-two-resolve"},
425
489
  {"label": render_report.LABEL_COPILOT_GATE, "agentId": round_two_gate_id},
426
- {"label": render_report.LABEL_PREFIX_FIX + "copilot", "agentId": round_two_fix_id},
490
+ {
491
+ "label": render_report.LABEL_PREFIX_FIX + "copilot",
492
+ "agentId": round_two_fix_id,
493
+ },
427
494
  ]
428
495
 
429
496
  _all_findings, fix_by_round = render_report._parse_progress_entries(
@@ -436,26 +503,173 @@ def test_base_sha_resets_each_round_when_prior_fix_transcript_missing(
436
503
  )
437
504
 
438
505
 
439
- def test_robustness_with_missing_transcripts(tmp_path: Path) -> None:
440
- """Should exit 0 and render zero finding cards when no agent transcripts exist."""
441
- run_root = tmp_path / "wf_run"
442
- journal_destination = run_root / "workflows" / FIXTURE_JOURNAL.name
443
- journal_destination.parent.mkdir(parents=True)
444
- shutil.copy(FIXTURE_JOURNAL, journal_destination)
506
+ def _render_cli_with_summary_file(
507
+ journal_path: Path, out_path: Path, summary_path: Path
508
+ ) -> subprocess.CompletedProcess[str]:
509
+ """Run the render CLI with an injected --summary-file and return the process."""
510
+ render_script = Path(__file__).resolve().parent / "render_report.py"
511
+ return subprocess.run(
512
+ [
513
+ sys.executable,
514
+ str(render_script),
515
+ "--journal",
516
+ str(journal_path),
517
+ "--out",
518
+ str(out_path),
519
+ "--pr",
520
+ "example-owner/example-repo#211",
521
+ "--final-sha",
522
+ "7c2f420c4d5b7c83aa47f93d99a0f1420e3373c4",
523
+ "--rounds",
524
+ "4",
525
+ "--summary-file",
526
+ str(summary_path),
527
+ ],
528
+ capture_output=True,
529
+ text=True,
530
+ )
445
531
 
446
- run_id = FIXTURE_JOURNAL.stem
447
- empty_agents_dir = run_root / "subagents" / "workflows" / run_id
448
- empty_agents_dir.mkdir(parents=True)
449
532
 
450
- out_path = tmp_path / "report-robust.html"
451
- render_script = Path(__file__).resolve().parent / "render_report.py"
533
+ def test_cli_renders_when_issue_class_count_is_null(tmp_path: Path) -> None:
534
+ """Should exit 0 and show a zero count when an issue class carries count: null."""
535
+ run_root = tmp_path / "wf_run_null_count"
536
+ journal_destination = _copy_run_tree_without_summary_entry(run_root)
537
+
538
+ summary = {
539
+ "prProblem": "A problem.",
540
+ "prFix": "A fix.",
541
+ "problemScenes": [],
542
+ "fixScenes": [],
543
+ "verdictLine": "Converged.",
544
+ "issueClasses": [
545
+ {
546
+ "plainName": "A class with a null count",
547
+ "count": None,
548
+ "severity": "P2",
549
+ "category": "bug",
550
+ "status": "fixed",
551
+ "cause": "A grounded cause.",
552
+ "medium": "text",
553
+ "beforeLines": [],
554
+ "afterLines": [],
555
+ }
556
+ ],
557
+ }
558
+ summary_path = tmp_path / "summary-null-count.json"
559
+ summary_path.write_text(json.dumps(summary), encoding="utf-8")
560
+
561
+ out_path = tmp_path / "report-null-count.html"
562
+ completed = _render_cli_with_summary_file(
563
+ journal_destination, out_path, summary_path
564
+ )
565
+
566
+ assert completed.returncode == 0, f"CLI crashed on null count:\n{completed.stderr}"
567
+ html_content = out_path.read_text(encoding="utf-8")
568
+ assert "A class with a null count" in html_content
569
+ assert "0 findings" in html_content
570
+ assert "&times;0" in html_content
571
+
572
+
573
+ def test_cli_renders_when_issue_class_count_is_non_numeric(tmp_path: Path) -> None:
574
+ """Should exit 0 and show a zero count when an issue class count is a bad string."""
575
+ run_root = tmp_path / "wf_run_bad_count"
576
+ journal_destination = _copy_run_tree_without_summary_entry(run_root)
577
+
578
+ summary = {
579
+ "prProblem": "A problem.",
580
+ "prFix": "A fix.",
581
+ "problemScenes": [],
582
+ "fixScenes": [],
583
+ "verdictLine": "Converged.",
584
+ "issueClasses": [
585
+ {
586
+ "plainName": "A class with a non-numeric count",
587
+ "count": "x",
588
+ "severity": "P2",
589
+ "category": "bug",
590
+ "status": "fixed",
591
+ "cause": "A grounded cause.",
592
+ "medium": "text",
593
+ "beforeLines": [],
594
+ "afterLines": [],
595
+ }
596
+ ],
597
+ }
598
+ summary_path = tmp_path / "summary-bad-count.json"
599
+ summary_path.write_text(json.dumps(summary), encoding="utf-8")
600
+
601
+ out_path = tmp_path / "report-bad-count.html"
602
+ completed = _render_cli_with_summary_file(
603
+ journal_destination, out_path, summary_path
604
+ )
605
+
606
+ assert completed.returncode == 0, (
607
+ f"CLI crashed on non-numeric count:\n{completed.stderr}"
608
+ )
609
+ html_content = out_path.read_text(encoding="utf-8")
610
+ assert "A class with a non-numeric count" in html_content
611
+ assert "0 findings" in html_content
612
+
613
+
614
+ def test_cli_renders_degraded_body_when_summary_is_a_list(tmp_path: Path) -> None:
615
+ """Should render the degraded layout and exit 0 when --summary-file holds a list."""
616
+ run_root = tmp_path / "wf_run_list_summary"
617
+ journal_destination = _copy_run_tree_without_summary_entry(run_root)
618
+
619
+ summary_path = tmp_path / "summary-list.json"
620
+ summary_path.write_text(json.dumps([]), encoding="utf-8")
621
+
622
+ out_path = tmp_path / "report-list-summary.html"
623
+ completed = _render_cli_with_summary_file(
624
+ journal_destination, out_path, summary_path
625
+ )
626
+
627
+ assert completed.returncode == 0, (
628
+ f"CLI crashed on a list summary:\n{completed.stderr}"
629
+ )
630
+ html_content = out_path.read_text(encoding="utf-8")
631
+ assert "distinct findings across 4 rounds" in html_content
632
+ assert 'class="pf-grid"' not in html_content
633
+
452
634
 
635
+ def test_cli_renders_degraded_body_when_summary_is_a_scalar(tmp_path: Path) -> None:
636
+ """Should render the degraded layout and exit 0 when --summary-file holds a scalar."""
637
+ run_root = tmp_path / "wf_run_scalar_summary"
638
+ journal_destination = _copy_run_tree_without_summary_entry(run_root)
639
+
640
+ summary_path = tmp_path / "summary-scalar.json"
641
+ summary_path.write_text(json.dumps(5), encoding="utf-8")
642
+
643
+ out_path = tmp_path / "report-scalar-summary.html"
644
+ completed = _render_cli_with_summary_file(
645
+ journal_destination, out_path, summary_path
646
+ )
647
+
648
+ assert completed.returncode == 0, (
649
+ f"CLI crashed on a scalar summary:\n{completed.stderr}"
650
+ )
651
+ html_content = out_path.read_text(encoding="utf-8")
652
+ assert "distinct findings across 4 rounds" in html_content
653
+ assert 'class="pf-grid"' not in html_content
654
+
655
+
656
+ def test_is_summary_structurally_valid_false_for_non_dict_summary() -> None:
657
+ """Should return False for a list, string, or scalar summary, never raising."""
658
+ assert render_report._is_summary_structurally_valid([]) is False
659
+ assert render_report._is_summary_structurally_valid("str") is False
660
+ assert render_report._is_summary_structurally_valid(5) is False
661
+
662
+
663
+ def test_cli_rejects_orphaned_repo_argument(tmp_path: Path) -> None:
664
+ """Should reject --repo with a usage error, proving the flag is no longer declared."""
665
+ render_script = Path(__file__).resolve().parent / "render_report.py"
666
+ out_path = tmp_path / "report-repo-rejected.html"
453
667
  completed = subprocess.run(
454
668
  [
455
669
  sys.executable,
456
670
  str(render_script),
457
671
  "--journal",
458
- str(journal_destination),
672
+ str(FIXTURE_JOURNAL),
459
673
  "--out",
460
674
  str(out_path),
461
675
  "--pr",
@@ -471,14 +685,29 @@ def test_robustness_with_missing_transcripts(tmp_path: Path) -> None:
471
685
  text=True,
472
686
  )
473
687
 
688
+ assert completed.returncode != 0
689
+ assert "unrecognized arguments: --repo" in completed.stderr
690
+
691
+
692
+ def test_robustness_with_missing_transcripts(tmp_path: Path) -> None:
693
+ """Should exit 0 and render the timeline and appendix when no transcripts exist."""
694
+ run_root = tmp_path / "wf_run"
695
+ journal_destination = run_root / "workflows" / FIXTURE_JOURNAL.name
696
+ journal_destination.parent.mkdir(parents=True)
697
+ shutil.copy(FIXTURE_JOURNAL, journal_destination)
698
+
699
+ run_id = FIXTURE_JOURNAL.stem
700
+ empty_agents_dir = run_root / "subagents" / "workflows" / run_id
701
+ empty_agents_dir.mkdir(parents=True)
702
+
703
+ out_path = tmp_path / "report-robust.html"
704
+ completed = _render_cli(journal_destination, out_path)
705
+
474
706
  assert completed.returncode == 0, (
475
707
  f"Render failed despite missing transcripts:\n{completed.stderr}"
476
708
  )
477
709
 
478
710
  html_content = out_path.read_text(encoding="utf-8")
479
- assert "PR #211 Convergence Insights" in html_content
480
-
481
- finding_card_count = html_content.count('class="bug-card')
482
- assert finding_card_count == 0, (
483
- f"Missing transcripts yielded findings: expected 0 cards, got {finding_card_count}"
484
- )
711
+ assert "PR #211 Convergence Summary" in html_content
712
+ assert 'class="timeline"' not in html_content
713
+ assert 'class="pf-grid"' not in html_content