claude-dev-env 1.59.0 → 1.61.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +4 -0
- package/audit-rubrics/category_rubrics/category-b-selector-engine-compat.md +1 -1
- package/audit-rubrics/category_rubrics/category-e-dead-code.md +1 -0
- package/audit-rubrics/category_rubrics/category-f-silent-failures.md +1 -1
- package/audit-rubrics/category_rubrics/category-o-docstring-vs-impl-drift.md +1 -1
- package/audit-rubrics/prompts/category-b-selector-engine-compat.md +2 -2
- package/audit-rubrics/prompts/category-e-dead-code.md +17 -4
- package/audit-rubrics/prompts/category-f-silent-failures.md +1 -0
- package/docs/CODE_RULES.md +2 -2
- package/hooks/blocking/code_rules_annotations_length.py +189 -10
- package/hooks/blocking/code_rules_dead_module_constant.py +321 -0
- package/hooks/blocking/code_rules_duplicate_body.py +152 -0
- package/hooks/blocking/code_rules_enforcer.py +38 -15
- package/hooks/blocking/code_rules_orphan_css_class.py +196 -0
- package/hooks/blocking/code_rules_typeddict_stub.py +172 -0
- package/hooks/blocking/config/__init__.py +5 -0
- package/hooks/blocking/config/verified_commit_constants.py +118 -0
- package/hooks/blocking/destructive_command_blocker.py +483 -61
- package/hooks/blocking/test_code_rules_enforcer_annotations.py +240 -0
- package/hooks/blocking/test_code_rules_enforcer_cap_meta.py +1 -0
- package/hooks/blocking/test_code_rules_enforcer_cross_skill_duplicate.py +146 -0
- package/hooks/blocking/test_code_rules_enforcer_dead_module_constant.py +188 -0
- package/hooks/blocking/test_code_rules_enforcer_dispatch_wiring.py +82 -0
- package/hooks/blocking/test_code_rules_enforcer_orphan_css_class.py +196 -0
- package/hooks/blocking/test_code_rules_enforcer_zero_payload_alias.py +415 -0
- package/hooks/blocking/test_code_rules_enforcer_zero_payload_alias_hook_routing.py +156 -0
- package/hooks/blocking/test_destructive_command_blocker.py +213 -0
- package/hooks/blocking/test_verdict_directory_write_blocker.py +720 -0
- package/hooks/blocking/test_verification_verdict_store.py +490 -0
- package/hooks/blocking/test_verified_commit_gate.py +495 -0
- package/hooks/blocking/test_verified_commit_message_accuracy_blocker.py +131 -0
- package/hooks/blocking/test_verifier_verdict_minter.py +193 -0
- package/hooks/blocking/verdict_directory_write_blocker.py +667 -0
- package/hooks/blocking/verification_verdict_store.py +686 -0
- package/hooks/blocking/verified_commit_gate.py +535 -0
- package/hooks/blocking/verified_commit_message_accuracy_blocker.py +152 -0
- package/hooks/blocking/verifier_verdict_minter.py +221 -0
- package/hooks/diagnostic/test_hook_log_extractor.py +3 -3
- package/hooks/hooks.json +43 -1
- package/hooks/hooks_constants/blocking_check_limits.py +1 -0
- package/hooks/hooks_constants/code_rules_enforcer_constants.py +6 -0
- package/hooks/hooks_constants/dead_module_constant_constants.py +20 -0
- package/hooks/hooks_constants/destructive_command_segment_constants.py +15 -0
- package/hooks/hooks_constants/duplicate_function_body_constants.py +22 -5
- package/hooks/hooks_constants/orphan_css_class_constants.py +40 -0
- package/hooks/hooks_constants/precommit_code_rules_gate_constants.py +1 -1
- package/hooks/validation/mypy_validator.py +59 -7
- package/hooks/validation/test_mypy_validator.py +94 -0
- package/package.json +1 -1
- package/rules/file-global-constants.md +7 -1
- package/rules/no-cross-skill-duplicate-helpers.md +29 -0
- package/rules/orphan-css-class.md +23 -0
- package/skills/_shared/pr-loop/scripts/preflight_worktree.py +392 -0
- package/skills/_shared/pr-loop/scripts/skills_pr_loop_constants/preflight_constants.py +70 -0
- package/skills/_shared/pr-loop/scripts/test_preflight_worktree.py +263 -0
- package/skills/autoconverge/SKILL.md +54 -17
- package/skills/autoconverge/reference/closing-report.md +59 -17
- package/skills/autoconverge/workflow/aggregate_runs.py +371 -0
- package/skills/autoconverge/workflow/autoconverge_report_constants/render_report_constants.py +192 -76
- package/skills/autoconverge/workflow/converge.clean-audit.test.mjs +76 -0
- package/skills/autoconverge/workflow/converge.contract.test.mjs +395 -206
- package/skills/autoconverge/workflow/converge.mjs +520 -57
- package/skills/autoconverge/workflow/convergence_summary.py +110 -0
- package/skills/autoconverge/workflow/fixtures/wf_run/subagents/workflows/wf_881252e6-700/agent-ab1c2d3e4f5a6b7c8.jsonl +2 -0
- package/skills/autoconverge/workflow/fixtures/wf_run/workflows/wf_881252e6-700.json +7 -0
- package/skills/autoconverge/workflow/render_report.py +488 -397
- package/skills/autoconverge/workflow/test_aggregate_runs.py +134 -0
- package/skills/autoconverge/workflow/test_convergence_summary.py +132 -0
- package/skills/autoconverge/workflow/test_render_report.py +518 -259
- package/skills/pr-converge/reference/per-tick.md +28 -8
- package/skills/rebase/SKILL.md +2 -4
- package/system-prompts/software-engineer.xml +2 -6
- package/hooks/blocking/content_search_to_zoekt_redirector.py +0 -59
- package/hooks/blocking/content_search_zoekt_bash_block_reason.py +0 -25
- package/hooks/blocking/content_search_zoekt_block_payload.py +0 -21
- package/hooks/blocking/content_search_zoekt_indexed_paths.py +0 -24
- package/hooks/blocking/content_search_zoekt_indexed_roots_config.py +0 -131
- package/hooks/blocking/content_search_zoekt_redirect_guidance.py +0 -52
- package/hooks/blocking/test_content_search_to_zoekt_redirector_integration.py +0 -61
- package/hooks/blocking/test_content_search_to_zoekt_redirector_unit.py +0 -92
- package/hooks/blocking/test_content_search_zoekt_indexed_roots_config.py +0 -102
|
@@ -0,0 +1,490 @@
|
|
|
1
|
+
"""Tests for the mechanical commit-gate exemption in verification_verdict_store.
|
|
2
|
+
|
|
3
|
+
Each test builds a real git repository with a real origin remote and asserts
|
|
4
|
+
the exemption decision against the live work tree, exercising the same code
|
|
5
|
+
path the verified_commit_gate hook runs.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import importlib.util
|
|
9
|
+
import json
|
|
10
|
+
import pathlib
|
|
11
|
+
import subprocess
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
_HOOK_DIR = pathlib.Path(__file__).parent
|
|
15
|
+
if str(_HOOK_DIR) not in sys.path:
|
|
16
|
+
sys.path.insert(0, str(_HOOK_DIR))
|
|
17
|
+
|
|
18
|
+
store_spec = importlib.util.spec_from_file_location(
|
|
19
|
+
"verification_verdict_store",
|
|
20
|
+
_HOOK_DIR / "verification_verdict_store.py",
|
|
21
|
+
)
|
|
22
|
+
assert store_spec is not None
|
|
23
|
+
assert store_spec.loader is not None
|
|
24
|
+
store_module = importlib.util.module_from_spec(store_spec)
|
|
25
|
+
store_spec.loader.exec_module(store_module)
|
|
26
|
+
is_verification_exempt_diff = store_module.is_verification_exempt_diff
|
|
27
|
+
resolve_merge_base = store_module.resolve_merge_base
|
|
28
|
+
branch_surface_manifest = store_module.branch_surface_manifest
|
|
29
|
+
manifest_sha256 = store_module.manifest_sha256
|
|
30
|
+
workflow_verdict_covers_surface = store_module.workflow_verdict_covers_surface
|
|
31
|
+
|
|
32
|
+
constants_spec = importlib.util.spec_from_file_location(
|
|
33
|
+
"verified_commit_constants",
|
|
34
|
+
_HOOK_DIR / "config" / "verified_commit_constants.py",
|
|
35
|
+
)
|
|
36
|
+
assert constants_spec is not None
|
|
37
|
+
assert constants_spec.loader is not None
|
|
38
|
+
constants_module = importlib.util.module_from_spec(constants_spec)
|
|
39
|
+
constants_spec.loader.exec_module(constants_module)
|
|
40
|
+
CORRECTIVE_MESSAGE = constants_module.CORRECTIVE_MESSAGE
|
|
41
|
+
|
|
42
|
+
PRODUCTION_SOURCE = "def add(left: int, right: int) -> int:\n return left + right\n"
|
|
43
|
+
TEST_SOURCE = "def test_add() -> None:\n assert 1 + 1 == 2\n"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _run_git(repo_dir: pathlib.Path, *git_arguments: str) -> None:
|
|
47
|
+
subprocess.run(
|
|
48
|
+
["git", "-C", str(repo_dir), *git_arguments],
|
|
49
|
+
check=True,
|
|
50
|
+
capture_output=True,
|
|
51
|
+
text=True,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _make_repo_on_branch(
|
|
56
|
+
tmp_path: pathlib.Path, branch_name: str
|
|
57
|
+
) -> pathlib.Path:
|
|
58
|
+
origin_dir = tmp_path / "origin.git"
|
|
59
|
+
work_dir = tmp_path / "work"
|
|
60
|
+
work_dir.mkdir()
|
|
61
|
+
subprocess.run(
|
|
62
|
+
["git", "init", "--bare", f"--initial-branch={branch_name}", str(origin_dir)],
|
|
63
|
+
check=True,
|
|
64
|
+
capture_output=True,
|
|
65
|
+
text=True,
|
|
66
|
+
)
|
|
67
|
+
empty_hooks_dir = tmp_path / "nohooks"
|
|
68
|
+
empty_hooks_dir.mkdir()
|
|
69
|
+
_run_git(work_dir, "init", f"--initial-branch={branch_name}")
|
|
70
|
+
_run_git(work_dir, "config", "user.email", "tests@example.com")
|
|
71
|
+
_run_git(work_dir, "config", "user.name", "Verdict Store Tests")
|
|
72
|
+
_run_git(work_dir, "config", "core.hooksPath", str(empty_hooks_dir))
|
|
73
|
+
(work_dir / "src").mkdir()
|
|
74
|
+
(work_dir / "tests").mkdir()
|
|
75
|
+
(work_dir / "src" / "app.py").write_text(PRODUCTION_SOURCE, encoding="utf-8")
|
|
76
|
+
(work_dir / "tests" / "test_app.py").write_text(TEST_SOURCE, encoding="utf-8")
|
|
77
|
+
(work_dir / "README.md").write_text("# Fixture repo\n", encoding="utf-8")
|
|
78
|
+
_run_git(work_dir, "add", "-A")
|
|
79
|
+
_run_git(work_dir, "commit", "-m", "base")
|
|
80
|
+
_run_git(work_dir, "remote", "add", "origin", str(origin_dir))
|
|
81
|
+
_run_git(work_dir, "push", "-u", "origin", branch_name)
|
|
82
|
+
return work_dir
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _make_repo_with_origin(tmp_path: pathlib.Path) -> pathlib.Path:
|
|
86
|
+
return _make_repo_on_branch(tmp_path, "main")
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _exemption_for(work_dir: pathlib.Path) -> bool:
|
|
90
|
+
merge_base_sha = resolve_merge_base(str(work_dir))
|
|
91
|
+
assert merge_base_sha is not None
|
|
92
|
+
return is_verification_exempt_diff(str(work_dir), merge_base_sha)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def test_production_change_is_gated(tmp_path: pathlib.Path) -> None:
|
|
96
|
+
work_dir = _make_repo_with_origin(tmp_path)
|
|
97
|
+
(work_dir / "src" / "app.py").write_text(
|
|
98
|
+
"def add(left: int, right: int) -> int:\n return left - right\n",
|
|
99
|
+
encoding="utf-8",
|
|
100
|
+
)
|
|
101
|
+
assert _exemption_for(work_dir) is False
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def test_docs_only_change_is_exempt(tmp_path: pathlib.Path) -> None:
|
|
105
|
+
work_dir = _make_repo_with_origin(tmp_path)
|
|
106
|
+
(work_dir / "README.md").write_text(
|
|
107
|
+
"# Fixture repo\n\nUpdated.\n", encoding="utf-8"
|
|
108
|
+
)
|
|
109
|
+
assert _exemption_for(work_dir) is True
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def test_docstring_only_python_change_is_exempt(tmp_path: pathlib.Path) -> None:
|
|
113
|
+
work_dir = _make_repo_with_origin(tmp_path)
|
|
114
|
+
(work_dir / "src" / "app.py").write_text(
|
|
115
|
+
'def add(left: int, right: int) -> int:\n """Add two integers."""\n'
|
|
116
|
+
" return left + right\n",
|
|
117
|
+
encoding="utf-8",
|
|
118
|
+
)
|
|
119
|
+
assert _exemption_for(work_dir) is True
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def test_modified_test_file_is_exempt(tmp_path: pathlib.Path) -> None:
|
|
123
|
+
work_dir = _make_repo_with_origin(tmp_path)
|
|
124
|
+
(work_dir / "tests" / "test_app.py").write_text(
|
|
125
|
+
TEST_SOURCE + "\n\ndef test_add_zero() -> None:\n assert 0 + 0 == 0\n",
|
|
126
|
+
encoding="utf-8",
|
|
127
|
+
)
|
|
128
|
+
assert _exemption_for(work_dir) is True
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def test_untracked_test_prefix_file_is_exempt(tmp_path: pathlib.Path) -> None:
|
|
132
|
+
work_dir = _make_repo_with_origin(tmp_path)
|
|
133
|
+
(work_dir / "tests" / "test_extra.py").write_text(TEST_SOURCE, encoding="utf-8")
|
|
134
|
+
assert _exemption_for(work_dir) is True
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def test_untracked_test_suffix_file_is_exempt(tmp_path: pathlib.Path) -> None:
|
|
138
|
+
work_dir = _make_repo_with_origin(tmp_path)
|
|
139
|
+
(work_dir / "tests" / "app_test.py").write_text(TEST_SOURCE, encoding="utf-8")
|
|
140
|
+
assert _exemption_for(work_dir) is True
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def test_modified_conftest_is_exempt(tmp_path: pathlib.Path) -> None:
|
|
144
|
+
work_dir = _make_repo_with_origin(tmp_path)
|
|
145
|
+
(work_dir / "tests" / "conftest.py").write_text(
|
|
146
|
+
"import pytest\n\n\n@pytest.fixture\ndef sample() -> int:\n return 3\n",
|
|
147
|
+
encoding="utf-8",
|
|
148
|
+
)
|
|
149
|
+
assert _exemption_for(work_dir) is True
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def test_deleted_test_file_is_exempt(tmp_path: pathlib.Path) -> None:
|
|
153
|
+
work_dir = _make_repo_with_origin(tmp_path)
|
|
154
|
+
(work_dir / "tests" / "test_app.py").unlink()
|
|
155
|
+
assert _exemption_for(work_dir) is True
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def test_mixed_test_and_production_change_is_gated(tmp_path: pathlib.Path) -> None:
|
|
159
|
+
work_dir = _make_repo_with_origin(tmp_path)
|
|
160
|
+
(work_dir / "tests" / "test_app.py").write_text(
|
|
161
|
+
TEST_SOURCE + "\n", encoding="utf-8"
|
|
162
|
+
)
|
|
163
|
+
(work_dir / "src" / "app.py").write_text(
|
|
164
|
+
"def add(left: int, right: int) -> int:\n return left * right\n",
|
|
165
|
+
encoding="utf-8",
|
|
166
|
+
)
|
|
167
|
+
assert _exemption_for(work_dir) is False
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def test_untracked_production_file_is_gated(tmp_path: pathlib.Path) -> None:
|
|
171
|
+
work_dir = _make_repo_with_origin(tmp_path)
|
|
172
|
+
(work_dir / "src" / "extra.py").write_text(PRODUCTION_SOURCE, encoding="utf-8")
|
|
173
|
+
assert _exemption_for(work_dir) is False
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def test_production_file_named_like_test_outside_python_is_gated(
|
|
177
|
+
tmp_path: pathlib.Path,
|
|
178
|
+
) -> None:
|
|
179
|
+
work_dir = _make_repo_with_origin(tmp_path)
|
|
180
|
+
(work_dir / "src" / "test_data.json").write_text("{}", encoding="utf-8")
|
|
181
|
+
assert _exemption_for(work_dir) is False
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def test_comment_only_change_in_non_python_file_is_gated(
|
|
185
|
+
tmp_path: pathlib.Path,
|
|
186
|
+
) -> None:
|
|
187
|
+
work_dir = _make_repo_with_origin(tmp_path)
|
|
188
|
+
shell_script_path = work_dir / "src" / "deploy.sh"
|
|
189
|
+
shell_script_path.write_text("# build the project\nmake build\n", encoding="utf-8")
|
|
190
|
+
_run_git(work_dir, "add", "-A")
|
|
191
|
+
_run_git(work_dir, "commit", "-m", "add deploy script")
|
|
192
|
+
shell_script_path.write_text(
|
|
193
|
+
"# build the release artifact\nmake build\n", encoding="utf-8"
|
|
194
|
+
)
|
|
195
|
+
assert _exemption_for(work_dir) is False
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def test_corrective_message_scopes_comment_exemption_to_python() -> None:
|
|
199
|
+
lowered_message = CORRECTIVE_MESSAGE.lower()
|
|
200
|
+
assert "comment" in lowered_message
|
|
201
|
+
assert "python" in lowered_message
|
|
202
|
+
assert "comment-, and test-only surfaces are exempt" not in lowered_message
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def test_untracked_claude_production_hook_is_gated(tmp_path: pathlib.Path) -> None:
|
|
206
|
+
work_dir = _make_repo_with_origin(tmp_path)
|
|
207
|
+
new_hook_dir = work_dir / ".claude" / "hooks" / "blocking"
|
|
208
|
+
new_hook_dir.mkdir(parents=True)
|
|
209
|
+
(new_hook_dir / "evil_new_hook.py").write_text(PRODUCTION_SOURCE, encoding="utf-8")
|
|
210
|
+
assert _exemption_for(work_dir) is False
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def test_untracked_claude_production_hook_is_in_surface_manifest(
|
|
214
|
+
tmp_path: pathlib.Path,
|
|
215
|
+
) -> None:
|
|
216
|
+
work_dir = _make_repo_with_origin(tmp_path)
|
|
217
|
+
new_hook_dir = work_dir / ".claude" / "hooks" / "blocking"
|
|
218
|
+
new_hook_dir.mkdir(parents=True)
|
|
219
|
+
(new_hook_dir / "evil_new_hook.py").write_text(PRODUCTION_SOURCE, encoding="utf-8")
|
|
220
|
+
merge_base_sha = resolve_merge_base(str(work_dir))
|
|
221
|
+
assert merge_base_sha is not None
|
|
222
|
+
surface_manifest_text = branch_surface_manifest(str(work_dir), merge_base_sha)
|
|
223
|
+
assert surface_manifest_text is not None
|
|
224
|
+
assert ".claude/hooks/blocking/evil_new_hook.py" in surface_manifest_text
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def test_untracked_claude_worktree_scratch_copy_stays_filtered(
|
|
228
|
+
tmp_path: pathlib.Path,
|
|
229
|
+
) -> None:
|
|
230
|
+
work_dir = _make_repo_with_origin(tmp_path)
|
|
231
|
+
scratch_dir = work_dir / ".claude" / "worktrees" / "feature" / "src"
|
|
232
|
+
scratch_dir.mkdir(parents=True)
|
|
233
|
+
(scratch_dir / "app.py").write_text(PRODUCTION_SOURCE, encoding="utf-8")
|
|
234
|
+
assert _exemption_for(work_dir) is True
|
|
235
|
+
merge_base_sha = resolve_merge_base(str(work_dir))
|
|
236
|
+
assert merge_base_sha is not None
|
|
237
|
+
surface_manifest_text = branch_surface_manifest(str(work_dir), merge_base_sha)
|
|
238
|
+
assert surface_manifest_text == ""
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def _git_output(work_dir: pathlib.Path, *git_arguments: str) -> str:
|
|
242
|
+
completed_process = subprocess.run(
|
|
243
|
+
["git", "-C", str(work_dir), *git_arguments],
|
|
244
|
+
check=True,
|
|
245
|
+
capture_output=True,
|
|
246
|
+
text=True,
|
|
247
|
+
)
|
|
248
|
+
return completed_process.stdout.strip()
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def test_resolve_merge_base_finds_nonstandard_default_branch(
|
|
252
|
+
tmp_path: pathlib.Path,
|
|
253
|
+
) -> None:
|
|
254
|
+
work_dir = _make_repo_on_branch(tmp_path, "develop")
|
|
255
|
+
subprocess.run(
|
|
256
|
+
["git", "-C", str(work_dir), "remote", "set-head", "origin", "--delete"],
|
|
257
|
+
check=True,
|
|
258
|
+
capture_output=True,
|
|
259
|
+
text=True,
|
|
260
|
+
)
|
|
261
|
+
expected_merge_base = _git_output(
|
|
262
|
+
work_dir, "merge-base", "HEAD", "origin/develop"
|
|
263
|
+
)
|
|
264
|
+
assert resolve_merge_base(str(work_dir)) == expected_merge_base
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def test_production_change_is_gated_on_nonstandard_default_branch(
|
|
268
|
+
tmp_path: pathlib.Path,
|
|
269
|
+
) -> None:
|
|
270
|
+
work_dir = _make_repo_on_branch(tmp_path, "develop")
|
|
271
|
+
subprocess.run(
|
|
272
|
+
["git", "-C", str(work_dir), "remote", "set-head", "origin", "--delete"],
|
|
273
|
+
check=True,
|
|
274
|
+
capture_output=True,
|
|
275
|
+
text=True,
|
|
276
|
+
)
|
|
277
|
+
(work_dir / "src" / "app.py").write_text(
|
|
278
|
+
"def add(left: int, right: int) -> int:\n return left - right\n",
|
|
279
|
+
encoding="utf-8",
|
|
280
|
+
)
|
|
281
|
+
assert _exemption_for(work_dir) is False
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
MATCHING_MANIFEST_SHA256 = "a" * 64
|
|
285
|
+
OTHER_MANIFEST_SHA256 = "b" * 64
|
|
286
|
+
VERIFIER_AGENT_TYPE = "code-verifier"
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def _verdict_transcript_text(is_all_pass: bool, bound_manifest_sha256: str) -> str:
|
|
290
|
+
verdict_record = {
|
|
291
|
+
"all_pass": is_all_pass,
|
|
292
|
+
"findings": [],
|
|
293
|
+
"manifest_sha256": bound_manifest_sha256,
|
|
294
|
+
}
|
|
295
|
+
assistant_text = (
|
|
296
|
+
"Verification complete.\n\n```verdict\n"
|
|
297
|
+
+ json.dumps(verdict_record)
|
|
298
|
+
+ "\n```\n"
|
|
299
|
+
)
|
|
300
|
+
assistant_entry = {
|
|
301
|
+
"type": "assistant",
|
|
302
|
+
"message": {"content": [{"type": "text", "text": assistant_text}]},
|
|
303
|
+
}
|
|
304
|
+
return json.dumps(assistant_entry) + "\n"
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def _write_agent_transcript(
|
|
308
|
+
subagents_dir: pathlib.Path,
|
|
309
|
+
agent_id: str,
|
|
310
|
+
agent_type: str,
|
|
311
|
+
transcript_text: str,
|
|
312
|
+
should_write_sidecar: bool,
|
|
313
|
+
) -> None:
|
|
314
|
+
workflow_dir = subagents_dir / "workflows" / "wf_x"
|
|
315
|
+
workflow_dir.mkdir(parents=True, exist_ok=True)
|
|
316
|
+
(workflow_dir / f"agent-{agent_id}.jsonl").write_text(
|
|
317
|
+
transcript_text, encoding="utf-8"
|
|
318
|
+
)
|
|
319
|
+
if should_write_sidecar:
|
|
320
|
+
(workflow_dir / f"agent-{agent_id}.meta.json").write_text(
|
|
321
|
+
json.dumps({"agentType": agent_type}), encoding="utf-8"
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def _session_transcript_path(tmp_path: pathlib.Path, session_id: str) -> pathlib.Path:
|
|
326
|
+
session_root = tmp_path / "projects" / "demo"
|
|
327
|
+
session_root.mkdir(parents=True)
|
|
328
|
+
transcript_path = session_root / f"{session_id}.jsonl"
|
|
329
|
+
transcript_path.write_text("", encoding="utf-8")
|
|
330
|
+
return transcript_path
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def test_workflow_verdict_covers_surface_true_for_matching_passing_verifier(
|
|
334
|
+
tmp_path: pathlib.Path,
|
|
335
|
+
) -> None:
|
|
336
|
+
transcript_path = _session_transcript_path(tmp_path, "sess1")
|
|
337
|
+
subagents_dir = tmp_path / "projects" / "demo" / "sess1" / "subagents"
|
|
338
|
+
_write_agent_transcript(
|
|
339
|
+
subagents_dir,
|
|
340
|
+
"01",
|
|
341
|
+
VERIFIER_AGENT_TYPE,
|
|
342
|
+
_verdict_transcript_text(True, MATCHING_MANIFEST_SHA256),
|
|
343
|
+
should_write_sidecar=True,
|
|
344
|
+
)
|
|
345
|
+
assert (
|
|
346
|
+
workflow_verdict_covers_surface(
|
|
347
|
+
str(transcript_path), MATCHING_MANIFEST_SHA256
|
|
348
|
+
)
|
|
349
|
+
is True
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def test_workflow_verdict_covers_surface_false_for_nonmatching_hash(
|
|
354
|
+
tmp_path: pathlib.Path,
|
|
355
|
+
) -> None:
|
|
356
|
+
transcript_path = _session_transcript_path(tmp_path, "sess1")
|
|
357
|
+
subagents_dir = tmp_path / "projects" / "demo" / "sess1" / "subagents"
|
|
358
|
+
_write_agent_transcript(
|
|
359
|
+
subagents_dir,
|
|
360
|
+
"01",
|
|
361
|
+
VERIFIER_AGENT_TYPE,
|
|
362
|
+
_verdict_transcript_text(True, OTHER_MANIFEST_SHA256),
|
|
363
|
+
should_write_sidecar=True,
|
|
364
|
+
)
|
|
365
|
+
assert (
|
|
366
|
+
workflow_verdict_covers_surface(
|
|
367
|
+
str(transcript_path), MATCHING_MANIFEST_SHA256
|
|
368
|
+
)
|
|
369
|
+
is False
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def test_workflow_verdict_covers_surface_false_for_all_pass_false(
|
|
374
|
+
tmp_path: pathlib.Path,
|
|
375
|
+
) -> None:
|
|
376
|
+
transcript_path = _session_transcript_path(tmp_path, "sess1")
|
|
377
|
+
subagents_dir = tmp_path / "projects" / "demo" / "sess1" / "subagents"
|
|
378
|
+
_write_agent_transcript(
|
|
379
|
+
subagents_dir,
|
|
380
|
+
"01",
|
|
381
|
+
VERIFIER_AGENT_TYPE,
|
|
382
|
+
_verdict_transcript_text(False, MATCHING_MANIFEST_SHA256),
|
|
383
|
+
should_write_sidecar=True,
|
|
384
|
+
)
|
|
385
|
+
assert (
|
|
386
|
+
workflow_verdict_covers_surface(
|
|
387
|
+
str(transcript_path), MATCHING_MANIFEST_SHA256
|
|
388
|
+
)
|
|
389
|
+
is False
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
def test_workflow_verdict_covers_surface_false_for_non_verifier_sidecar(
|
|
394
|
+
tmp_path: pathlib.Path,
|
|
395
|
+
) -> None:
|
|
396
|
+
transcript_path = _session_transcript_path(tmp_path, "sess1")
|
|
397
|
+
subagents_dir = tmp_path / "projects" / "demo" / "sess1" / "subagents"
|
|
398
|
+
_write_agent_transcript(
|
|
399
|
+
subagents_dir,
|
|
400
|
+
"01",
|
|
401
|
+
"clean-coder",
|
|
402
|
+
_verdict_transcript_text(True, MATCHING_MANIFEST_SHA256),
|
|
403
|
+
should_write_sidecar=True,
|
|
404
|
+
)
|
|
405
|
+
assert (
|
|
406
|
+
workflow_verdict_covers_surface(
|
|
407
|
+
str(transcript_path), MATCHING_MANIFEST_SHA256
|
|
408
|
+
)
|
|
409
|
+
is False
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def test_workflow_verdict_covers_surface_false_for_missing_sidecar(
|
|
414
|
+
tmp_path: pathlib.Path,
|
|
415
|
+
) -> None:
|
|
416
|
+
transcript_path = _session_transcript_path(tmp_path, "sess1")
|
|
417
|
+
subagents_dir = tmp_path / "projects" / "demo" / "sess1" / "subagents"
|
|
418
|
+
_write_agent_transcript(
|
|
419
|
+
subagents_dir,
|
|
420
|
+
"01",
|
|
421
|
+
VERIFIER_AGENT_TYPE,
|
|
422
|
+
_verdict_transcript_text(True, MATCHING_MANIFEST_SHA256),
|
|
423
|
+
should_write_sidecar=False,
|
|
424
|
+
)
|
|
425
|
+
assert (
|
|
426
|
+
workflow_verdict_covers_surface(
|
|
427
|
+
str(transcript_path), MATCHING_MANIFEST_SHA256
|
|
428
|
+
)
|
|
429
|
+
is False
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def test_workflow_verdict_covers_surface_false_for_missing_subagents_dir(
|
|
434
|
+
tmp_path: pathlib.Path,
|
|
435
|
+
) -> None:
|
|
436
|
+
transcript_path = _session_transcript_path(tmp_path, "sess1")
|
|
437
|
+
assert (
|
|
438
|
+
workflow_verdict_covers_surface(
|
|
439
|
+
str(transcript_path), MATCHING_MANIFEST_SHA256
|
|
440
|
+
)
|
|
441
|
+
is False
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def test_workflow_verdict_covers_surface_true_when_transcript_is_under_subagents(
|
|
446
|
+
tmp_path: pathlib.Path,
|
|
447
|
+
) -> None:
|
|
448
|
+
subagents_dir = tmp_path / "projects" / "demo" / "sess1" / "subagents"
|
|
449
|
+
_write_agent_transcript(
|
|
450
|
+
subagents_dir,
|
|
451
|
+
"01",
|
|
452
|
+
VERIFIER_AGENT_TYPE,
|
|
453
|
+
_verdict_transcript_text(True, MATCHING_MANIFEST_SHA256),
|
|
454
|
+
should_write_sidecar=True,
|
|
455
|
+
)
|
|
456
|
+
caller_transcript_path = (
|
|
457
|
+
subagents_dir / "workflows" / "wf_x" / "agent-00.jsonl"
|
|
458
|
+
)
|
|
459
|
+
caller_transcript_path.write_text("", encoding="utf-8")
|
|
460
|
+
assert (
|
|
461
|
+
workflow_verdict_covers_surface(
|
|
462
|
+
str(caller_transcript_path), MATCHING_MANIFEST_SHA256
|
|
463
|
+
)
|
|
464
|
+
is True
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def test_manifest_hash_cli_prints_live_surface_hash(tmp_path: pathlib.Path) -> None:
|
|
469
|
+
work_dir = _make_repo_with_origin(tmp_path)
|
|
470
|
+
(work_dir / "src" / "app.py").write_text(
|
|
471
|
+
"def add(left: int, right: int) -> int:\n return left - right\n",
|
|
472
|
+
encoding="utf-8",
|
|
473
|
+
)
|
|
474
|
+
merge_base_sha = resolve_merge_base(str(work_dir))
|
|
475
|
+
assert merge_base_sha is not None
|
|
476
|
+
surface_manifest_text = branch_surface_manifest(str(work_dir), merge_base_sha)
|
|
477
|
+
assert surface_manifest_text is not None
|
|
478
|
+
expected_hash = manifest_sha256(surface_manifest_text)
|
|
479
|
+
completed_process = subprocess.run(
|
|
480
|
+
[
|
|
481
|
+
sys.executable,
|
|
482
|
+
str(_HOOK_DIR / "verification_verdict_store.py"),
|
|
483
|
+
"--manifest-hash",
|
|
484
|
+
str(work_dir),
|
|
485
|
+
],
|
|
486
|
+
check=True,
|
|
487
|
+
capture_output=True,
|
|
488
|
+
text=True,
|
|
489
|
+
)
|
|
490
|
+
assert completed_process.stdout.strip() == expected_hash
|