claude-dev-env 1.59.0 → 1.61.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +4 -0
- package/audit-rubrics/category_rubrics/category-b-selector-engine-compat.md +1 -1
- package/audit-rubrics/category_rubrics/category-e-dead-code.md +1 -0
- package/audit-rubrics/category_rubrics/category-f-silent-failures.md +1 -1
- package/audit-rubrics/category_rubrics/category-o-docstring-vs-impl-drift.md +1 -1
- package/audit-rubrics/prompts/category-b-selector-engine-compat.md +2 -2
- package/audit-rubrics/prompts/category-e-dead-code.md +17 -4
- package/audit-rubrics/prompts/category-f-silent-failures.md +1 -0
- package/docs/CODE_RULES.md +2 -2
- package/hooks/blocking/code_rules_annotations_length.py +189 -10
- package/hooks/blocking/code_rules_dead_module_constant.py +321 -0
- package/hooks/blocking/code_rules_duplicate_body.py +152 -0
- package/hooks/blocking/code_rules_enforcer.py +38 -15
- package/hooks/blocking/code_rules_orphan_css_class.py +196 -0
- package/hooks/blocking/code_rules_typeddict_stub.py +172 -0
- package/hooks/blocking/config/__init__.py +5 -0
- package/hooks/blocking/config/verified_commit_constants.py +118 -0
- package/hooks/blocking/destructive_command_blocker.py +483 -61
- package/hooks/blocking/test_code_rules_enforcer_annotations.py +240 -0
- package/hooks/blocking/test_code_rules_enforcer_cap_meta.py +1 -0
- package/hooks/blocking/test_code_rules_enforcer_cross_skill_duplicate.py +146 -0
- package/hooks/blocking/test_code_rules_enforcer_dead_module_constant.py +188 -0
- package/hooks/blocking/test_code_rules_enforcer_dispatch_wiring.py +82 -0
- package/hooks/blocking/test_code_rules_enforcer_orphan_css_class.py +196 -0
- package/hooks/blocking/test_code_rules_enforcer_zero_payload_alias.py +415 -0
- package/hooks/blocking/test_code_rules_enforcer_zero_payload_alias_hook_routing.py +156 -0
- package/hooks/blocking/test_destructive_command_blocker.py +213 -0
- package/hooks/blocking/test_verdict_directory_write_blocker.py +720 -0
- package/hooks/blocking/test_verification_verdict_store.py +490 -0
- package/hooks/blocking/test_verified_commit_gate.py +495 -0
- package/hooks/blocking/test_verified_commit_message_accuracy_blocker.py +131 -0
- package/hooks/blocking/test_verifier_verdict_minter.py +193 -0
- package/hooks/blocking/verdict_directory_write_blocker.py +667 -0
- package/hooks/blocking/verification_verdict_store.py +686 -0
- package/hooks/blocking/verified_commit_gate.py +535 -0
- package/hooks/blocking/verified_commit_message_accuracy_blocker.py +152 -0
- package/hooks/blocking/verifier_verdict_minter.py +221 -0
- package/hooks/diagnostic/test_hook_log_extractor.py +3 -3
- package/hooks/hooks.json +43 -1
- package/hooks/hooks_constants/blocking_check_limits.py +1 -0
- package/hooks/hooks_constants/code_rules_enforcer_constants.py +6 -0
- package/hooks/hooks_constants/dead_module_constant_constants.py +20 -0
- package/hooks/hooks_constants/destructive_command_segment_constants.py +15 -0
- package/hooks/hooks_constants/duplicate_function_body_constants.py +22 -5
- package/hooks/hooks_constants/orphan_css_class_constants.py +40 -0
- package/hooks/hooks_constants/precommit_code_rules_gate_constants.py +1 -1
- package/hooks/validation/mypy_validator.py +59 -7
- package/hooks/validation/test_mypy_validator.py +94 -0
- package/package.json +1 -1
- package/rules/file-global-constants.md +7 -1
- package/rules/no-cross-skill-duplicate-helpers.md +29 -0
- package/rules/orphan-css-class.md +23 -0
- package/skills/_shared/pr-loop/scripts/preflight_worktree.py +392 -0
- package/skills/_shared/pr-loop/scripts/skills_pr_loop_constants/preflight_constants.py +70 -0
- package/skills/_shared/pr-loop/scripts/test_preflight_worktree.py +263 -0
- package/skills/autoconverge/SKILL.md +54 -17
- package/skills/autoconverge/reference/closing-report.md +59 -17
- package/skills/autoconverge/workflow/aggregate_runs.py +371 -0
- package/skills/autoconverge/workflow/autoconverge_report_constants/render_report_constants.py +192 -76
- package/skills/autoconverge/workflow/converge.clean-audit.test.mjs +76 -0
- package/skills/autoconverge/workflow/converge.contract.test.mjs +395 -206
- package/skills/autoconverge/workflow/converge.mjs +520 -57
- package/skills/autoconverge/workflow/convergence_summary.py +110 -0
- package/skills/autoconverge/workflow/fixtures/wf_run/subagents/workflows/wf_881252e6-700/agent-ab1c2d3e4f5a6b7c8.jsonl +2 -0
- package/skills/autoconverge/workflow/fixtures/wf_run/workflows/wf_881252e6-700.json +7 -0
- package/skills/autoconverge/workflow/render_report.py +488 -397
- package/skills/autoconverge/workflow/test_aggregate_runs.py +134 -0
- package/skills/autoconverge/workflow/test_convergence_summary.py +132 -0
- package/skills/autoconverge/workflow/test_render_report.py +518 -259
- package/skills/pr-converge/reference/per-tick.md +28 -8
- package/skills/rebase/SKILL.md +2 -4
- package/system-prompts/software-engineer.xml +2 -6
- package/hooks/blocking/content_search_to_zoekt_redirector.py +0 -59
- package/hooks/blocking/content_search_zoekt_bash_block_reason.py +0 -25
- package/hooks/blocking/content_search_zoekt_block_payload.py +0 -21
- package/hooks/blocking/content_search_zoekt_indexed_paths.py +0 -24
- package/hooks/blocking/content_search_zoekt_indexed_roots_config.py +0 -131
- package/hooks/blocking/content_search_zoekt_redirect_guidance.py +0 -52
- package/hooks/blocking/test_content_search_to_zoekt_redirector_integration.py +0 -61
- package/hooks/blocking/test_content_search_to_zoekt_redirector_unit.py +0 -92
- package/hooks/blocking/test_content_search_zoekt_indexed_roots_config.py +0 -102
|
@@ -0,0 +1,686 @@
|
|
|
1
|
+
"""Shared verdict storage and branch-diff logic for the verified-commit gate.
|
|
2
|
+
|
|
3
|
+
The verified-commit workflow has two halves that must agree byte-for-byte on
|
|
4
|
+
what a verdict covers: ``verifier_verdict_minter.py`` (SubagentStop) writes a
|
|
5
|
+
verdict bound to the current change surface, and ``verified_commit_gate.py``
|
|
6
|
+
(PreToolUse on Bash) refuses ``git commit`` / ``git push`` unless a verdict
|
|
7
|
+
matching the live surface exists. This module owns that shared contract:
|
|
8
|
+
locating the repo, computing the canonical surface manifest and its hash,
|
|
9
|
+
deriving the verdict file path, deciding the mechanical docs-only exemption,
|
|
10
|
+
and reading/writing verdict files.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import ast
|
|
16
|
+
import hashlib
|
|
17
|
+
import json
|
|
18
|
+
import re
|
|
19
|
+
import subprocess
|
|
20
|
+
import sys
|
|
21
|
+
import time
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
blocking_directory = str(Path(__file__).resolve().parent)
|
|
25
|
+
if blocking_directory not in sys.path:
|
|
26
|
+
sys.path.insert(0, blocking_directory)
|
|
27
|
+
|
|
28
|
+
from config.verified_commit_constants import (
|
|
29
|
+
AGENT_META_SIDECAR_SUFFIX,
|
|
30
|
+
AGENT_META_TYPE_KEY,
|
|
31
|
+
AGENT_TRANSCRIPT_GLOB,
|
|
32
|
+
CLAUDE_HOME_DIRECTORY_NAME,
|
|
33
|
+
CONFTEST_FILE_NAME,
|
|
34
|
+
DOCS_ONLY_EXTENSIONS,
|
|
35
|
+
ALL_FALLBACK_BASE_REFERENCES,
|
|
36
|
+
GIT_TIMEOUT_SECONDS,
|
|
37
|
+
MANIFEST_HASH_CLI_FLAG,
|
|
38
|
+
MINIMUM_STATUS_FIELD_COUNT,
|
|
39
|
+
MINTING_AGENT_TYPE,
|
|
40
|
+
PYTHON_EXTENSION,
|
|
41
|
+
ROOT_KEY_HEX_LENGTH,
|
|
42
|
+
SUBAGENTS_DIRECTORY_NAME,
|
|
43
|
+
TEST_FILE_PREFIX,
|
|
44
|
+
TEST_FILE_SUFFIX,
|
|
45
|
+
ALL_TOOLING_STATE_PREFIXES,
|
|
46
|
+
TRANSCRIPT_ASSISTANT_ENTRY_TYPE,
|
|
47
|
+
TRANSCRIPT_CONTENT_KEY,
|
|
48
|
+
TRANSCRIPT_CONTENT_TYPE_KEY,
|
|
49
|
+
TRANSCRIPT_ENTRY_TYPE_KEY,
|
|
50
|
+
TRANSCRIPT_MESSAGE_KEY,
|
|
51
|
+
TRANSCRIPT_TEXT_CONTENT_TYPE,
|
|
52
|
+
TRANSCRIPT_TEXT_KEY,
|
|
53
|
+
VERDICT_DIRECTORY_NAME,
|
|
54
|
+
VERDICT_FENCE_PATTERN,
|
|
55
|
+
VERDICT_JSON_INDENT,
|
|
56
|
+
VERDICT_KEY_ALL_PASS,
|
|
57
|
+
VERDICT_KEY_FINDINGS,
|
|
58
|
+
VERDICT_KEY_MANIFEST_SHA256,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def run_git(repo_directory: str, *git_arguments: str) -> str | None:
|
|
63
|
+
"""Run a git command and return its stdout, or None on any failure.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
repo_directory: Directory git runs in (``git -C``).
|
|
67
|
+
*git_arguments: The git subcommand and its arguments.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Decoded stdout with trailing whitespace stripped, or None when git
|
|
71
|
+
exits nonzero, times out, or is not installed.
|
|
72
|
+
"""
|
|
73
|
+
try:
|
|
74
|
+
completed_process = subprocess.run(
|
|
75
|
+
["git", "-C", repo_directory, *git_arguments],
|
|
76
|
+
capture_output=True,
|
|
77
|
+
text=True,
|
|
78
|
+
encoding="utf-8",
|
|
79
|
+
errors="replace",
|
|
80
|
+
timeout=GIT_TIMEOUT_SECONDS,
|
|
81
|
+
check=False,
|
|
82
|
+
)
|
|
83
|
+
except (OSError, subprocess.TimeoutExpired):
|
|
84
|
+
return None
|
|
85
|
+
if completed_process.returncode != 0:
|
|
86
|
+
return None
|
|
87
|
+
return completed_process.stdout.rstrip()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def resolve_repo_root(start_directory: str) -> str | None:
|
|
91
|
+
"""Resolve the repository top level for a directory.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
start_directory: Any directory inside (or outside) a work tree.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
The absolute repo root path, or None when the directory is not
|
|
98
|
+
inside a git work tree.
|
|
99
|
+
"""
|
|
100
|
+
return run_git(start_directory, "rev-parse", "--show-toplevel")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _tracked_upstream_reference(repo_root: str) -> str | None:
|
|
104
|
+
"""Read HEAD's configured upstream tracking reference.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
repo_root: The repository top-level directory.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
The upstream reference (``origin/develop`` and the like) when HEAD
|
|
111
|
+
tracks one, or None when no upstream is configured.
|
|
112
|
+
"""
|
|
113
|
+
return run_git(
|
|
114
|
+
repo_root, "rev-parse", "--abbrev-ref", "--symbolic-full-name", "@{u}"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def candidate_base_references(repo_root: str) -> tuple[str, ...]:
|
|
119
|
+
"""Collect the upstream references to probe for the merge base, in order.
|
|
120
|
+
|
|
121
|
+
Probes ``origin/HEAD`` first, then HEAD's configured upstream tracking
|
|
122
|
+
reference (so a non-standard default branch like ``origin/develop`` is
|
|
123
|
+
found regardless of its name), then the fixed ``origin/main`` /
|
|
124
|
+
``origin/master`` fallbacks for checkouts with no tracking ref set.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
repo_root: The repository top-level directory.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
The ordered upstream references to try, with duplicates removed.
|
|
131
|
+
"""
|
|
132
|
+
upstream_head = run_git(repo_root, "symbolic-ref", "--quiet", "refs/remotes/origin/HEAD")
|
|
133
|
+
tracked_upstream = _tracked_upstream_reference(repo_root)
|
|
134
|
+
ordered_references = (
|
|
135
|
+
((upstream_head,) if upstream_head else ())
|
|
136
|
+
+ ((tracked_upstream,) if tracked_upstream else ())
|
|
137
|
+
+ ALL_FALLBACK_BASE_REFERENCES
|
|
138
|
+
)
|
|
139
|
+
return tuple(dict.fromkeys(ordered_references))
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def resolve_merge_base(repo_root: str) -> str | None:
|
|
143
|
+
"""Find the merge base between HEAD and the default upstream branch.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
repo_root: The repository top-level directory.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
The merge-base commit sha, or None when no upstream base resolves —
|
|
150
|
+
the caller decides how to treat base-less repositories.
|
|
151
|
+
"""
|
|
152
|
+
for each_reference in candidate_base_references(repo_root):
|
|
153
|
+
merge_base_sha = run_git(repo_root, "merge-base", "HEAD", each_reference)
|
|
154
|
+
if merge_base_sha:
|
|
155
|
+
return merge_base_sha
|
|
156
|
+
return None
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def untracked_file_paths(repo_root: str) -> list[str] | None:
|
|
160
|
+
"""List untracked, non-ignored files outside tooling-state directories.
|
|
161
|
+
|
|
162
|
+
Paths under the transient tooling-state subtrees (the Claude and Cursor
|
|
163
|
+
scratch subdirectories named in ``ALL_TOOLING_STATE_PREFIXES`` —
|
|
164
|
+
verification verdicts, worktree copies, daemon and team session state)
|
|
165
|
+
are skipped: they hold session state and stale worktree copies, never
|
|
166
|
+
the branch's work, and in real checkouts they run to thousands of
|
|
167
|
+
files. Production hook, agent, and skill files tracked elsewhere under
|
|
168
|
+
``.claude/`` are kept, so a new untracked one still binds to the
|
|
169
|
+
verdict surface.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
repo_root: The repository top-level directory.
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
Sorted repo-relative paths, or None when git fails.
|
|
176
|
+
"""
|
|
177
|
+
listing_text = run_git(
|
|
178
|
+
repo_root, "-c", "core.quotePath=false", "ls-files", "--others", "--exclude-standard"
|
|
179
|
+
)
|
|
180
|
+
if listing_text is None:
|
|
181
|
+
return None
|
|
182
|
+
return sorted(
|
|
183
|
+
each_line
|
|
184
|
+
for each_line in listing_text.splitlines()
|
|
185
|
+
if each_line and not each_line.startswith(ALL_TOOLING_STATE_PREFIXES)
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def branch_surface_manifest(repo_root: str, merge_base_sha: str) -> str | None:
|
|
190
|
+
"""Compute the canonical change-surface manifest a verdict covers.
|
|
191
|
+
|
|
192
|
+
The surface is every path that differs from the merge base plus every
|
|
193
|
+
untracked file, each bound by a digest of its current work-tree
|
|
194
|
+
content. Binding paths and contents — not patch text or index state —
|
|
195
|
+
makes the hash invariant under ``git add`` and commit slicing, while
|
|
196
|
+
any content edit or new file after verification still changes it.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
repo_root: The repository top-level directory.
|
|
200
|
+
merge_base_sha: The merge-base commit sha the branch grew from.
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
One ``<path> sha256=<digest>`` line per surface file (deleted
|
|
204
|
+
files carry a ``deleted`` marker), or None when git or a file
|
|
205
|
+
read fails.
|
|
206
|
+
"""
|
|
207
|
+
changed_paths_text = run_git(
|
|
208
|
+
repo_root, "-c", "core.quotePath=false", "diff", "--name-only", "--no-renames",
|
|
209
|
+
merge_base_sha,
|
|
210
|
+
)
|
|
211
|
+
if changed_paths_text is None:
|
|
212
|
+
return None
|
|
213
|
+
untracked_paths = untracked_file_paths(repo_root)
|
|
214
|
+
if untracked_paths is None:
|
|
215
|
+
return None
|
|
216
|
+
surface_paths = sorted(
|
|
217
|
+
{each_path for each_path in changed_paths_text.splitlines() if each_path}
|
|
218
|
+
| set(untracked_paths)
|
|
219
|
+
)
|
|
220
|
+
manifest_lines = []
|
|
221
|
+
for each_path in surface_paths:
|
|
222
|
+
surface_file = Path(repo_root) / each_path
|
|
223
|
+
if not surface_file.is_file():
|
|
224
|
+
manifest_lines.append(f"{each_path} deleted")
|
|
225
|
+
continue
|
|
226
|
+
try:
|
|
227
|
+
content_digest = hashlib.sha256(surface_file.read_bytes()).hexdigest()
|
|
228
|
+
except OSError:
|
|
229
|
+
return None
|
|
230
|
+
manifest_lines.append(f"{each_path} sha256={content_digest}")
|
|
231
|
+
return "\n".join(manifest_lines)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def manifest_sha256(surface_manifest_text: str) -> str:
|
|
235
|
+
"""Hash a change-surface manifest.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
surface_manifest_text: The manifest from ``branch_surface_manifest``.
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
The hex sha256 digest of the encoded manifest text.
|
|
242
|
+
"""
|
|
243
|
+
return hashlib.sha256(surface_manifest_text.encode("utf-8")).hexdigest()
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def verdict_path_for_repo(repo_root: str) -> Path:
|
|
247
|
+
"""Derive the verdict file path for a repository work tree.
|
|
248
|
+
|
|
249
|
+
Verdicts live outside the repository (under the user's Claude home) so
|
|
250
|
+
no repo accumulates untracked files, keyed by a hash of the normalized
|
|
251
|
+
work-tree path so every worktree gets its own verdict.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
repo_root: The repository top-level directory.
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
The verdict file path for this work tree.
|
|
258
|
+
"""
|
|
259
|
+
normalized_root = str(Path(repo_root).resolve()).replace("\\", "/").lower()
|
|
260
|
+
root_key = hashlib.sha256(normalized_root.encode("utf-8")).hexdigest()[:ROOT_KEY_HEX_LENGTH]
|
|
261
|
+
return (
|
|
262
|
+
Path.home() / CLAUDE_HOME_DIRECTORY_NAME / VERDICT_DIRECTORY_NAME / f"{root_key}.json"
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def load_valid_verdict(repo_root: str, expected_manifest_sha256: str) -> dict | None:
|
|
267
|
+
"""Load the verdict for a repo when it passes and covers the live surface.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
repo_root: The repository top-level directory.
|
|
271
|
+
expected_manifest_sha256: Hash of the live surface manifest the
|
|
272
|
+
verdict must match exactly.
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
The verdict mapping when it exists, parses, reports ``all_pass``
|
|
276
|
+
true, and binds to the expected manifest hash; otherwise None.
|
|
277
|
+
"""
|
|
278
|
+
verdict_file = verdict_path_for_repo(repo_root)
|
|
279
|
+
try:
|
|
280
|
+
verdict_record = json.loads(verdict_file.read_text(encoding="utf-8"))
|
|
281
|
+
except (OSError, json.JSONDecodeError):
|
|
282
|
+
return None
|
|
283
|
+
if not isinstance(verdict_record, dict):
|
|
284
|
+
return None
|
|
285
|
+
if verdict_record.get(VERDICT_KEY_ALL_PASS) is not True:
|
|
286
|
+
return None
|
|
287
|
+
if verdict_record.get(VERDICT_KEY_MANIFEST_SHA256) != expected_manifest_sha256:
|
|
288
|
+
return None
|
|
289
|
+
return verdict_record
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def _subagents_directory_for_transcript(transcript_path: str) -> Path | None:
|
|
293
|
+
"""Locate the live session's subagents directory from a transcript path.
|
|
294
|
+
|
|
295
|
+
Handles both transcript shapes the runtime produces: a transcript already
|
|
296
|
+
inside a ``.../subagents/...`` tree resolves to its nearest ancestor named
|
|
297
|
+
``subagents``; a session transcript ``<dir>/<session-id>.jsonl`` resolves
|
|
298
|
+
to ``<dir>/<session-id>/subagents``.
|
|
299
|
+
|
|
300
|
+
Args:
|
|
301
|
+
transcript_path: The live session's transcript path from the payload.
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
The existing subagents directory, or None when neither shape yields
|
|
305
|
+
an existing directory.
|
|
306
|
+
"""
|
|
307
|
+
if not transcript_path:
|
|
308
|
+
return None
|
|
309
|
+
transcript_file = Path(transcript_path)
|
|
310
|
+
for each_ancestor in transcript_file.parents:
|
|
311
|
+
if each_ancestor.name == SUBAGENTS_DIRECTORY_NAME and each_ancestor.is_dir():
|
|
312
|
+
return each_ancestor
|
|
313
|
+
session_subagents_directory = (
|
|
314
|
+
transcript_file.with_suffix("") / SUBAGENTS_DIRECTORY_NAME
|
|
315
|
+
)
|
|
316
|
+
if session_subagents_directory.is_dir():
|
|
317
|
+
return session_subagents_directory
|
|
318
|
+
return None
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def _agent_type_for_transcript(transcript_file: Path) -> str | None:
|
|
322
|
+
"""Read an agent transcript's sidecar to learn the agent type it ran as.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
transcript_file: An ``agent-*.jsonl`` transcript path.
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
The ``agentType`` recorded in the ``<stem>.meta.json`` sidecar, or
|
|
329
|
+
None when the sidecar is missing, unreadable, or carries no type.
|
|
330
|
+
"""
|
|
331
|
+
sidecar_file = transcript_file.with_suffix(AGENT_META_SIDECAR_SUFFIX)
|
|
332
|
+
try:
|
|
333
|
+
sidecar_record = json.loads(sidecar_file.read_text(encoding="utf-8"))
|
|
334
|
+
except (OSError, json.JSONDecodeError):
|
|
335
|
+
return None
|
|
336
|
+
if not isinstance(sidecar_record, dict):
|
|
337
|
+
return None
|
|
338
|
+
recorded_agent_type = sidecar_record.get(AGENT_META_TYPE_KEY)
|
|
339
|
+
return recorded_agent_type if isinstance(recorded_agent_type, str) else None
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def _assistant_text_blocks(transcript_file: Path) -> list[str]:
|
|
343
|
+
"""Collect every assistant text block from an agent transcript.
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
transcript_file: An ``agent-*.jsonl`` transcript path.
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
The text of each assistant message content block, in order; empty
|
|
350
|
+
when the file is missing, unreadable, or holds no assistant text.
|
|
351
|
+
"""
|
|
352
|
+
try:
|
|
353
|
+
transcript_lines = transcript_file.read_text(encoding="utf-8").splitlines()
|
|
354
|
+
except OSError:
|
|
355
|
+
return []
|
|
356
|
+
all_text_blocks: list[str] = []
|
|
357
|
+
for each_line in transcript_lines:
|
|
358
|
+
if not each_line.strip():
|
|
359
|
+
continue
|
|
360
|
+
try:
|
|
361
|
+
transcript_entry = json.loads(each_line)
|
|
362
|
+
except json.JSONDecodeError:
|
|
363
|
+
continue
|
|
364
|
+
all_text_blocks.extend(_entry_text_blocks(transcript_entry))
|
|
365
|
+
return all_text_blocks
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def _entry_text_blocks(transcript_entry: object) -> list[str]:
|
|
369
|
+
"""Extract assistant text from one parsed transcript entry.
|
|
370
|
+
|
|
371
|
+
Args:
|
|
372
|
+
transcript_entry: One parsed JSONL transcript entry.
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
The text of each text content block on an assistant entry, in order;
|
|
376
|
+
empty for any other entry shape.
|
|
377
|
+
"""
|
|
378
|
+
if not isinstance(transcript_entry, dict):
|
|
379
|
+
return []
|
|
380
|
+
if transcript_entry.get(TRANSCRIPT_ENTRY_TYPE_KEY) != TRANSCRIPT_ASSISTANT_ENTRY_TYPE:
|
|
381
|
+
return []
|
|
382
|
+
message_record = transcript_entry.get(TRANSCRIPT_MESSAGE_KEY)
|
|
383
|
+
if not isinstance(message_record, dict):
|
|
384
|
+
return []
|
|
385
|
+
content_blocks = message_record.get(TRANSCRIPT_CONTENT_KEY)
|
|
386
|
+
if not isinstance(content_blocks, list):
|
|
387
|
+
return []
|
|
388
|
+
all_text_blocks: list[str] = []
|
|
389
|
+
for each_block in content_blocks:
|
|
390
|
+
if not isinstance(each_block, dict):
|
|
391
|
+
continue
|
|
392
|
+
if each_block.get(TRANSCRIPT_CONTENT_TYPE_KEY) != TRANSCRIPT_TEXT_CONTENT_TYPE:
|
|
393
|
+
continue
|
|
394
|
+
block_text = each_block.get(TRANSCRIPT_TEXT_KEY)
|
|
395
|
+
if isinstance(block_text, str):
|
|
396
|
+
all_text_blocks.append(block_text)
|
|
397
|
+
return all_text_blocks
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
def _last_verdict_record(all_text_blocks: list[str]) -> dict | None:
|
|
401
|
+
"""Parse the last verdict fence across an agent's assistant text blocks.
|
|
402
|
+
|
|
403
|
+
Args:
|
|
404
|
+
all_text_blocks: The assistant text blocks from one transcript.
|
|
405
|
+
|
|
406
|
+
Returns:
|
|
407
|
+
The parsed verdict mapping when the last verdict fence carries a bool
|
|
408
|
+
``all_pass``, a list ``findings``, and a string ``manifest_sha256``;
|
|
409
|
+
otherwise None.
|
|
410
|
+
"""
|
|
411
|
+
verdict_fence_pattern = re.compile(VERDICT_FENCE_PATTERN, re.DOTALL)
|
|
412
|
+
all_fence_bodies = [
|
|
413
|
+
each_match.group(1)
|
|
414
|
+
for each_block in all_text_blocks
|
|
415
|
+
for each_match in verdict_fence_pattern.finditer(each_block)
|
|
416
|
+
]
|
|
417
|
+
if not all_fence_bodies:
|
|
418
|
+
return None
|
|
419
|
+
try:
|
|
420
|
+
verdict_record = json.loads(all_fence_bodies[-1])
|
|
421
|
+
except json.JSONDecodeError:
|
|
422
|
+
return None
|
|
423
|
+
if not isinstance(verdict_record, dict):
|
|
424
|
+
return None
|
|
425
|
+
if not isinstance(verdict_record.get(VERDICT_KEY_ALL_PASS), bool):
|
|
426
|
+
return None
|
|
427
|
+
if not isinstance(verdict_record.get(VERDICT_KEY_FINDINGS), list):
|
|
428
|
+
return None
|
|
429
|
+
if not isinstance(verdict_record.get(VERDICT_KEY_MANIFEST_SHA256), str):
|
|
430
|
+
return None
|
|
431
|
+
return verdict_record
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def workflow_verdict_covers_surface(
|
|
435
|
+
transcript_path: str, expected_manifest_sha256: str
|
|
436
|
+
) -> bool:
|
|
437
|
+
"""Decide whether a workflow code-verifier verdict covers the live surface.
|
|
438
|
+
|
|
439
|
+
A workflow-spawned ``code-verifier`` emits its verdict as assistant text in
|
|
440
|
+
its own transcript rather than through the SubagentStop minter, so this
|
|
441
|
+
walks the live session's subagent transcripts for a ``code-verifier`` whose
|
|
442
|
+
final verdict reports ``all_pass`` true and binds to the expected manifest
|
|
443
|
+
hash.
|
|
444
|
+
|
|
445
|
+
Args:
|
|
446
|
+
transcript_path: The live session's transcript path from the payload.
|
|
447
|
+
expected_manifest_sha256: Hash of the live surface manifest the verdict
|
|
448
|
+
must match exactly.
|
|
449
|
+
|
|
450
|
+
Returns:
|
|
451
|
+
True as soon as one ``code-verifier`` transcript carries a passing
|
|
452
|
+
verdict bound to the expected hash; False when none match or the
|
|
453
|
+
subagents directory cannot be located.
|
|
454
|
+
"""
|
|
455
|
+
subagents_directory = _subagents_directory_for_transcript(transcript_path)
|
|
456
|
+
if subagents_directory is None:
|
|
457
|
+
return False
|
|
458
|
+
for each_transcript_file in subagents_directory.rglob(AGENT_TRANSCRIPT_GLOB):
|
|
459
|
+
if _agent_type_for_transcript(each_transcript_file) != MINTING_AGENT_TYPE:
|
|
460
|
+
continue
|
|
461
|
+
verdict_record = _last_verdict_record(
|
|
462
|
+
_assistant_text_blocks(each_transcript_file)
|
|
463
|
+
)
|
|
464
|
+
if verdict_record is None:
|
|
465
|
+
continue
|
|
466
|
+
if verdict_record[VERDICT_KEY_ALL_PASS] is not True:
|
|
467
|
+
continue
|
|
468
|
+
if verdict_record[VERDICT_KEY_MANIFEST_SHA256] == expected_manifest_sha256:
|
|
469
|
+
return True
|
|
470
|
+
return False
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def write_verdict(
|
|
474
|
+
repo_root: str,
|
|
475
|
+
bound_manifest_sha256: str,
|
|
476
|
+
is_all_pass: bool,
|
|
477
|
+
all_findings: list,
|
|
478
|
+
minted_from_agent_id: str,
|
|
479
|
+
) -> Path:
|
|
480
|
+
"""Write a verdict file binding a verification outcome to a surface hash.
|
|
481
|
+
|
|
482
|
+
Args:
|
|
483
|
+
repo_root: The repository top-level directory.
|
|
484
|
+
bound_manifest_sha256: Hash of the surface manifest the verdict covers.
|
|
485
|
+
is_all_pass: Whether the verifier reported a clean verdict.
|
|
486
|
+
all_findings: The verifier's findings list (empty when clean).
|
|
487
|
+
minted_from_agent_id: The subagent invocation id, kept for audit.
|
|
488
|
+
|
|
489
|
+
Returns:
|
|
490
|
+
The path the verdict was written to.
|
|
491
|
+
"""
|
|
492
|
+
verdict_file = verdict_path_for_repo(repo_root)
|
|
493
|
+
verdict_file.parent.mkdir(parents=True, exist_ok=True)
|
|
494
|
+
verdict_record = {
|
|
495
|
+
VERDICT_KEY_ALL_PASS: is_all_pass,
|
|
496
|
+
VERDICT_KEY_MANIFEST_SHA256: bound_manifest_sha256,
|
|
497
|
+
"repo_root": repo_root,
|
|
498
|
+
"findings": all_findings,
|
|
499
|
+
"minted_from_agent_id": minted_from_agent_id,
|
|
500
|
+
"minted_at_epoch_seconds": int(time.time()),
|
|
501
|
+
}
|
|
502
|
+
verdict_file.write_text(
|
|
503
|
+
json.dumps(verdict_record, indent=VERDICT_JSON_INDENT), encoding="utf-8"
|
|
504
|
+
)
|
|
505
|
+
return verdict_file
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
def stripped_ast_dump(python_source: str) -> str | None:
|
|
509
|
+
"""Dump a Python module's AST with every docstring removed.
|
|
510
|
+
|
|
511
|
+
Comments never reach the AST, so two sources with equal stripped dumps
|
|
512
|
+
differ only in docstrings, comments, or formatting — never in behavior.
|
|
513
|
+
|
|
514
|
+
Args:
|
|
515
|
+
python_source: The module source text.
|
|
516
|
+
|
|
517
|
+
Returns:
|
|
518
|
+
The ``ast.dump`` text of the stripped tree, or None when the source
|
|
519
|
+
does not parse (callers treat unparseable sources as changed).
|
|
520
|
+
"""
|
|
521
|
+
try:
|
|
522
|
+
module_tree = ast.parse(python_source)
|
|
523
|
+
except (SyntaxError, ValueError):
|
|
524
|
+
return None
|
|
525
|
+
for each_node in ast.walk(module_tree):
|
|
526
|
+
if not isinstance(
|
|
527
|
+
each_node, (ast.Module, ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)
|
|
528
|
+
):
|
|
529
|
+
continue
|
|
530
|
+
node_body = each_node.body
|
|
531
|
+
if (
|
|
532
|
+
node_body
|
|
533
|
+
and isinstance(node_body[0], ast.Expr)
|
|
534
|
+
and isinstance(node_body[0].value, ast.Constant)
|
|
535
|
+
and isinstance(node_body[0].value.value, str)
|
|
536
|
+
):
|
|
537
|
+
each_node.body = node_body[1:] or [ast.Pass()]
|
|
538
|
+
return ast.dump(module_tree)
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
def _is_python_change_docstring_only(
|
|
542
|
+
repo_root: str, merge_base_sha: str, repo_relative_path: str
|
|
543
|
+
) -> bool:
|
|
544
|
+
"""Decide whether one Python file changed only in docstrings or comments.
|
|
545
|
+
|
|
546
|
+
Args:
|
|
547
|
+
repo_root: The repository top-level directory.
|
|
548
|
+
merge_base_sha: The merge-base commit holding the old version.
|
|
549
|
+
repo_relative_path: The file's path relative to the repo root.
|
|
550
|
+
|
|
551
|
+
Returns:
|
|
552
|
+
True only when both versions parse and their docstring-stripped
|
|
553
|
+
ASTs match exactly.
|
|
554
|
+
"""
|
|
555
|
+
old_source = run_git(repo_root, "show", f"{merge_base_sha}:{repo_relative_path}")
|
|
556
|
+
if old_source is None:
|
|
557
|
+
return False
|
|
558
|
+
try:
|
|
559
|
+
new_source = (Path(repo_root) / repo_relative_path).read_text(
|
|
560
|
+
encoding="utf-8", errors="replace"
|
|
561
|
+
)
|
|
562
|
+
except OSError:
|
|
563
|
+
return False
|
|
564
|
+
old_dump = stripped_ast_dump(old_source)
|
|
565
|
+
new_dump = stripped_ast_dump(new_source)
|
|
566
|
+
return old_dump is not None and old_dump == new_dump
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
def _is_test_file_path(repo_relative_path: str) -> bool:
|
|
570
|
+
"""Decide whether a path names a pytest test file.
|
|
571
|
+
|
|
572
|
+
Args:
|
|
573
|
+
repo_relative_path: The file's path relative to the repo root.
|
|
574
|
+
|
|
575
|
+
Returns:
|
|
576
|
+
True when the file name follows a pytest collection convention
|
|
577
|
+
(``test_*.py``, ``*_test.py``, or ``conftest.py``).
|
|
578
|
+
"""
|
|
579
|
+
file_name = Path(repo_relative_path).name
|
|
580
|
+
if file_name == CONFTEST_FILE_NAME:
|
|
581
|
+
return True
|
|
582
|
+
if not file_name.endswith(PYTHON_EXTENSION):
|
|
583
|
+
return False
|
|
584
|
+
return file_name.startswith(TEST_FILE_PREFIX) or file_name.endswith(TEST_FILE_SUFFIX)
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
def is_verification_exempt_diff(repo_root: str, merge_base_sha: str) -> bool:
|
|
588
|
+
"""Decide the mechanical exemption: nothing production-behavioral changed.
|
|
589
|
+
|
|
590
|
+
A diff is exempt only when every changed file is a docs/image file (by
|
|
591
|
+
extension), a pytest test file (by name convention), or a Python file
|
|
592
|
+
whose docstring-stripped AST is unchanged. Untracked files count as
|
|
593
|
+
changes: only docs-extension and test-named ones are exempt, since an
|
|
594
|
+
untracked production Python file has no merge-base version to compare
|
|
595
|
+
against. Renames are decomposed into a delete plus an add
|
|
596
|
+
(``--no-renames``) so renaming code to a docs extension still gates
|
|
597
|
+
the deletion. Production edits key on a fact the diff author cannot
|
|
598
|
+
steer — any behavioral edit changes the AST and gets gated. Test files
|
|
599
|
+
are exempt by policy: a test-only surface cannot change production
|
|
600
|
+
behavior, and test quality is covered by review, not by the verifier.
|
|
601
|
+
|
|
602
|
+
Args:
|
|
603
|
+
repo_root: The repository top-level directory.
|
|
604
|
+
merge_base_sha: The merge-base commit sha the branch grew from.
|
|
605
|
+
|
|
606
|
+
Returns:
|
|
607
|
+
True when every change is exempt; False otherwise, and False
|
|
608
|
+
whenever git output cannot be read (fail closed).
|
|
609
|
+
"""
|
|
610
|
+
name_status_text = run_git(
|
|
611
|
+
repo_root, "-c", "core.quotePath=false", "diff", "--name-status", "--no-renames",
|
|
612
|
+
merge_base_sha,
|
|
613
|
+
)
|
|
614
|
+
if name_status_text is None:
|
|
615
|
+
return False
|
|
616
|
+
untracked_paths = untracked_file_paths(repo_root)
|
|
617
|
+
if untracked_paths is None:
|
|
618
|
+
return False
|
|
619
|
+
for each_untracked_path in untracked_paths:
|
|
620
|
+
if _is_test_file_path(each_untracked_path):
|
|
621
|
+
continue
|
|
622
|
+
if Path(each_untracked_path).suffix.lower() not in DOCS_ONLY_EXTENSIONS:
|
|
623
|
+
return False
|
|
624
|
+
if not name_status_text:
|
|
625
|
+
return True
|
|
626
|
+
for each_status_line in name_status_text.splitlines():
|
|
627
|
+
status_fields = each_status_line.split("\t")
|
|
628
|
+
if len(status_fields) < MINIMUM_STATUS_FIELD_COUNT:
|
|
629
|
+
return False
|
|
630
|
+
change_code = status_fields[0]
|
|
631
|
+
changed_path = status_fields[-1]
|
|
632
|
+
if _is_test_file_path(changed_path):
|
|
633
|
+
continue
|
|
634
|
+
file_extension = Path(changed_path).suffix.lower()
|
|
635
|
+
if file_extension in DOCS_ONLY_EXTENSIONS:
|
|
636
|
+
continue
|
|
637
|
+
if file_extension != PYTHON_EXTENSION:
|
|
638
|
+
return False
|
|
639
|
+
if not change_code.startswith("M"):
|
|
640
|
+
return False
|
|
641
|
+
if not _is_python_change_docstring_only(repo_root, merge_base_sha, changed_path):
|
|
642
|
+
return False
|
|
643
|
+
return True
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
def _print_live_manifest_hash(repo_directory: str) -> int:
|
|
647
|
+
"""Print the live surface manifest hash for a repo, for a workflow verifier.
|
|
648
|
+
|
|
649
|
+
A workflow code-verifier runs this to learn the exact hash to bind its
|
|
650
|
+
verdict to, so stdout carries only the hash and nothing else.
|
|
651
|
+
|
|
652
|
+
Args:
|
|
653
|
+
repo_directory: A directory inside the work tree to bind the verdict to.
|
|
654
|
+
|
|
655
|
+
Returns:
|
|
656
|
+
0 after printing the hash; nonzero with no stdout when the repo root or
|
|
657
|
+
merge base cannot be resolved.
|
|
658
|
+
"""
|
|
659
|
+
repo_root = resolve_repo_root(repo_directory)
|
|
660
|
+
if repo_root is None:
|
|
661
|
+
return 1
|
|
662
|
+
merge_base_sha = resolve_merge_base(repo_root)
|
|
663
|
+
if merge_base_sha is None:
|
|
664
|
+
return 1
|
|
665
|
+
surface_manifest_text = branch_surface_manifest(repo_root, merge_base_sha)
|
|
666
|
+
if surface_manifest_text is None:
|
|
667
|
+
return 1
|
|
668
|
+
print(manifest_sha256(surface_manifest_text))
|
|
669
|
+
return 0
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
def main() -> None:
|
|
673
|
+
"""Run the verdict-store CLI: compute the live surface-manifest hash.
|
|
674
|
+
|
|
675
|
+
Reads ``--manifest-hash <repo_root>`` from argv and prints the live
|
|
676
|
+
``manifest_sha256`` so a workflow code-verifier can bind its verdict to the
|
|
677
|
+
exact surface the gate checks. Exits nonzero with no stdout on any other
|
|
678
|
+
argument shape or when the surface cannot be resolved.
|
|
679
|
+
"""
|
|
680
|
+
if len(sys.argv) == 3 and sys.argv[1] == MANIFEST_HASH_CLI_FLAG:
|
|
681
|
+
sys.exit(_print_live_manifest_hash(sys.argv[2]))
|
|
682
|
+
sys.exit(1)
|
|
683
|
+
|
|
684
|
+
|
|
685
|
+
if __name__ == "__main__":
|
|
686
|
+
main()
|