claude-dev-env 1.58.0 → 1.60.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +2 -2
- package/_shared/pr-loop/scripts/code_rules_gate.py +36 -3
- package/_shared/pr-loop/scripts/pr_loop_shared_constants/code_rules_gate_constants.py +6 -0
- package/_shared/pr-loop/scripts/pr_loop_shared_constants/reviews_disabled_constants.py +1 -0
- package/_shared/pr-loop/scripts/reviews_disabled.py +12 -0
- package/_shared/pr-loop/scripts/tests/test_code_rules_gate.py +265 -0
- package/_shared/pr-loop/scripts/tests/test_reviews_disabled.py +29 -0
- package/audit-rubrics/category_rubrics/category-b-selector-engine-compat.md +1 -1
- package/audit-rubrics/category_rubrics/category-e-dead-code.md +1 -0
- package/audit-rubrics/category_rubrics/category-o-docstring-vs-impl-drift.md +1 -1
- package/audit-rubrics/prompts/category-b-selector-engine-compat.md +2 -2
- package/bin/install.mjs +100 -27
- package/bin/install.test.mjs +133 -1
- package/docs/CODE_RULES.md +3 -3
- package/hooks/blocking/code_rules_annotations_length.py +153 -0
- package/hooks/blocking/code_rules_dead_dataclass_field.py +319 -0
- package/hooks/blocking/code_rules_dead_module_constant.py +321 -0
- package/hooks/blocking/code_rules_duplicate_body.py +439 -0
- package/hooks/blocking/code_rules_enforcer.py +190 -21
- package/hooks/blocking/code_rules_magic_values.py +98 -0
- package/hooks/blocking/code_rules_shared.py +41 -0
- package/hooks/blocking/code_rules_typeddict_stub.py +172 -0
- package/hooks/blocking/config/__init__.py +5 -0
- package/hooks/blocking/config/verified_commit_constants.py +106 -0
- package/hooks/blocking/destructive_command_blocker.py +1027 -12
- package/hooks/blocking/hook_prose_detector_consistency.py +150 -0
- package/hooks/blocking/subprocess_budget_completeness.py +380 -0
- package/hooks/blocking/test_code_rules_enforcer_annotations.py +225 -0
- package/hooks/blocking/test_code_rules_enforcer_cap_meta.py +1 -0
- package/hooks/blocking/test_code_rules_enforcer_cross_skill_duplicate.py +146 -0
- package/hooks/blocking/test_code_rules_enforcer_dead_dataclass_field.py +467 -0
- package/hooks/blocking/test_code_rules_enforcer_dead_module_constant.py +188 -0
- package/hooks/blocking/test_code_rules_enforcer_duplicate_body.py +330 -0
- package/hooks/blocking/test_code_rules_enforcer_duplicate_body_hook_routing.py +179 -0
- package/hooks/blocking/test_code_rules_enforcer_magic_slice_bounds.py +133 -0
- package/hooks/blocking/test_code_rules_enforcer_zero_payload_alias.py +415 -0
- package/hooks/blocking/test_code_rules_enforcer_zero_payload_alias_hook_routing.py +156 -0
- package/hooks/blocking/test_destructive_command_blocker.py +622 -3
- package/hooks/blocking/test_hook_prose_detector_consistency.py +265 -0
- package/hooks/blocking/test_subprocess_budget_completeness.py +588 -0
- package/hooks/blocking/test_verdict_directory_write_blocker.py +720 -0
- package/hooks/blocking/test_verification_verdict_store.py +278 -0
- package/hooks/blocking/test_verified_commit_gate.py +368 -0
- package/hooks/blocking/test_verified_commit_message_accuracy_blocker.py +131 -0
- package/hooks/blocking/test_verifier_verdict_minter.py +214 -0
- package/hooks/blocking/test_workflow_substitution_slot_blocker.py +242 -0
- package/hooks/blocking/verdict_directory_write_blocker.py +667 -0
- package/hooks/blocking/verification_verdict_store.py +446 -0
- package/hooks/blocking/verified_commit_gate.py +523 -0
- package/hooks/blocking/verified_commit_message_accuracy_blocker.py +152 -0
- package/hooks/blocking/verifier_verdict_minter.py +299 -0
- package/hooks/blocking/workflow_substitution_slot_blocker.py +159 -0
- package/hooks/diagnostic/test_hook_log_extractor.py +3 -3
- package/hooks/hooks.json +58 -1
- package/hooks/hooks_constants/blocking_check_limits.py +1 -0
- package/hooks/hooks_constants/code_rules_enforcer_constants.py +16 -0
- package/hooks/hooks_constants/dead_dataclass_field_constants.py +25 -0
- package/hooks/hooks_constants/dead_module_constant_constants.py +20 -0
- package/hooks/hooks_constants/destructive_command_segment_constants.py +178 -0
- package/hooks/hooks_constants/duplicate_function_body_constants.py +34 -0
- package/hooks/hooks_constants/hook_prose_detector_consistency_constants.py +30 -0
- package/hooks/hooks_constants/precommit_code_rules_gate_constants.py +1 -1
- package/hooks/hooks_constants/subprocess_budget_completeness_constants.py +5 -0
- package/hooks/hooks_constants/workflow_substitution_slot_blocker_constants.py +22 -0
- package/package.json +1 -1
- package/rules/docstring-prose-matches-implementation.md +43 -0
- package/rules/file-global-constants.md +7 -1
- package/rules/hook-prose-matches-detector.md +26 -0
- package/rules/no-cross-skill-duplicate-helpers.md +29 -0
- package/rules/no-inline-destructive-literals.md +11 -0
- package/rules/workflow-substitution-slots.md +7 -0
- package/skills/_shared/pr-loop/scripts/preflight_worktree.py +392 -0
- package/skills/_shared/pr-loop/scripts/skills_pr_loop_constants/preflight_constants.py +70 -0
- package/skills/_shared/pr-loop/scripts/test_preflight_worktree.py +263 -0
- package/skills/autoconverge/SKILL.md +67 -19
- package/skills/autoconverge/reference/closing-report.md +59 -17
- package/skills/autoconverge/reference/convergence.md +7 -3
- package/skills/autoconverge/reference/stop-conditions.md +7 -2
- package/skills/autoconverge/workflow/aggregate_runs.py +371 -0
- package/skills/autoconverge/workflow/autoconverge_report_constants/render_report_constants.py +193 -76
- package/skills/autoconverge/workflow/converge.clean-audit.test.mjs +76 -0
- package/skills/autoconverge/workflow/converge.contract.test.mjs +206 -206
- package/skills/autoconverge/workflow/converge.copilot-gate.test.mjs +265 -0
- package/skills/autoconverge/workflow/converge.mjs +234 -42
- package/skills/autoconverge/workflow/convergence_summary.py +110 -0
- package/skills/autoconverge/workflow/fixtures/wf_run/subagents/workflows/wf_881252e6-700/agent-ab1c2d3e4f5a6b7c8.jsonl +2 -0
- package/skills/autoconverge/workflow/fixtures/wf_run/workflows/wf_881252e6-700.json +7 -0
- package/skills/autoconverge/workflow/render_report.py +488 -397
- package/skills/autoconverge/workflow/test_aggregate_runs.py +134 -0
- package/skills/autoconverge/workflow/test_convergence_summary.py +132 -0
- package/skills/autoconverge/workflow/test_render_report.py +488 -259
- package/skills/pr-converge/reference/per-tick.md +28 -8
- package/skills/pr-converge/scripts/check_convergence.py +195 -64
- package/skills/pr-converge/scripts/test_check_convergence.py +173 -2
- package/skills/rebase/SKILL.md +2 -4
- package/skills/update/SKILL.md +37 -5
- package/system-prompts/software-engineer.xml +2 -6
- package/hooks/blocking/content_search_to_zoekt_redirector.py +0 -59
- package/hooks/blocking/content_search_zoekt_bash_block_reason.py +0 -25
- package/hooks/blocking/content_search_zoekt_block_payload.py +0 -21
- package/hooks/blocking/content_search_zoekt_indexed_paths.py +0 -24
- package/hooks/blocking/content_search_zoekt_indexed_roots_config.py +0 -131
- package/hooks/blocking/content_search_zoekt_redirect_guidance.py +0 -52
- package/hooks/blocking/test_content_search_to_zoekt_redirector_integration.py +0 -61
- package/hooks/blocking/test_content_search_to_zoekt_redirector_unit.py +0 -92
- package/hooks/blocking/test_content_search_zoekt_indexed_roots_config.py +0 -102
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""Unit tests for the verified-commit-message-accuracy PreToolUse hook."""
|
|
2
|
+
|
|
3
|
+
import importlib.util
|
|
4
|
+
import pathlib
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
_HOOK_DIR = pathlib.Path(__file__).parent
|
|
8
|
+
if str(_HOOK_DIR) not in sys.path:
|
|
9
|
+
sys.path.insert(0, str(_HOOK_DIR))
|
|
10
|
+
|
|
11
|
+
hook_spec = importlib.util.spec_from_file_location(
|
|
12
|
+
"verified_commit_message_accuracy_blocker",
|
|
13
|
+
_HOOK_DIR / "verified_commit_message_accuracy_blocker.py",
|
|
14
|
+
)
|
|
15
|
+
assert hook_spec is not None
|
|
16
|
+
assert hook_spec.loader is not None
|
|
17
|
+
hook_module = importlib.util.module_from_spec(hook_spec)
|
|
18
|
+
hook_spec.loader.exec_module(hook_module)
|
|
19
|
+
is_guarded_file = hook_module.is_guarded_file
|
|
20
|
+
claims_blanket_comment_exemption = hook_module.claims_blanket_comment_exemption
|
|
21
|
+
extract_written_text = hook_module.extract_written_text
|
|
22
|
+
build_corrective_message = hook_module.build_corrective_message
|
|
23
|
+
|
|
24
|
+
OFFENDING_MESSAGE = (
|
|
25
|
+
"CORRECTIVE_MESSAGE = (\n"
|
|
26
|
+
' "BLOCKED: [VERIFIED_COMMIT_GATE] This branch surface has no passing "\n'
|
|
27
|
+
' "verification verdict. Spawn the code-verifier agent (Agent tool, "\n'
|
|
28
|
+
" \"subagent_type 'code-verifier') with the task texts, the diff scope, \"\n"
|
|
29
|
+
' "and recorded baselines; when it finishes with a clean verdict the "\n'
|
|
30
|
+
' "SubagentStop hook mints the verdict and this command will pass. Any "\n'
|
|
31
|
+
' "file change after verification invalidates the verdict, so verify "\n'
|
|
32
|
+
' "last. Docs-, docstring-, comment-, and test-only surfaces are exempt "\n'
|
|
33
|
+
' "automatically."\n'
|
|
34
|
+
")\n"
|
|
35
|
+
)
|
|
36
|
+
ACCURATE_DOCS_EXEMPTION_MENTIONING_COMMENTS = (
|
|
37
|
+
"Comments inside Python files are stripped; docs are exempt "
|
|
38
|
+
"automatically by extension."
|
|
39
|
+
)
|
|
40
|
+
ACCURATE_MESSAGE = (
|
|
41
|
+
"CORRECTIVE_MESSAGE = (\n"
|
|
42
|
+
' "BLOCKED: [VERIFIED_COMMIT_GATE] ... Docs and images are exempt by "\n'
|
|
43
|
+
' "extension, and Python files whose docstring- and comment-stripped AST "\n'
|
|
44
|
+
' "is unchanged."\n'
|
|
45
|
+
")\n"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_constants_file_is_guarded() -> None:
|
|
50
|
+
assert is_guarded_file(
|
|
51
|
+
"/repo/.claude/hooks/blocking/config/verified_commit_constants.py"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_unrelated_file_is_not_guarded() -> None:
|
|
56
|
+
assert not is_guarded_file("/repo/.claude/hooks/blocking/gh_body_arg_blocker.py")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_blanket_comment_exemption_claim_is_detected() -> None:
|
|
60
|
+
assert claims_blanket_comment_exemption(OFFENDING_MESSAGE)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def test_blanket_claim_detected_regardless_of_leading_words() -> None:
|
|
64
|
+
assert claims_blanket_comment_exemption(
|
|
65
|
+
"Comment-only surfaces are exempt automatically."
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def test_accurate_exemption_wording_passes() -> None:
|
|
70
|
+
assert not claims_blanket_comment_exemption(ACCURATE_MESSAGE)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_accurate_docs_exemption_mentioning_comments_passes() -> None:
|
|
74
|
+
assert not claims_blanket_comment_exemption(
|
|
75
|
+
ACCURATE_DOCS_EXEMPTION_MENTIONING_COMMENTS
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_comma_joined_docs_exemption_mentioning_comments_passes() -> None:
|
|
80
|
+
assert not claims_blanket_comment_exemption(
|
|
81
|
+
"Comments are handled, and docs are exempt automatically."
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def test_python_ast_clause_mentioning_comments_passes() -> None:
|
|
86
|
+
assert not claims_blanket_comment_exemption(
|
|
87
|
+
"Python comment-stripped AST changes and docs are exempt automatically "
|
|
88
|
+
"by extension"
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def test_single_clause_python_ast_exemption_passes() -> None:
|
|
93
|
+
assert not claims_blanket_comment_exemption(
|
|
94
|
+
"Python files whose comment-stripped AST is unchanged are exempt "
|
|
95
|
+
"automatically."
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def test_commentary_word_stem_passes() -> None:
|
|
100
|
+
assert not claims_blanket_comment_exemption(
|
|
101
|
+
"Our commentary on the approach is exempt automatically from blame."
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def test_corrective_message_names_only_the_two_real_exemptions() -> None:
|
|
106
|
+
corrective_message = build_corrective_message()
|
|
107
|
+
assert "exempt by extension" in corrective_message
|
|
108
|
+
assert "docstring- and comment-stripped AST" in corrective_message
|
|
109
|
+
assert "test file" not in corrective_message
|
|
110
|
+
assert "by name convention" not in corrective_message
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def test_message_without_exemption_claim_passes() -> None:
|
|
114
|
+
assert not claims_blanket_comment_exemption(
|
|
115
|
+
'CORRECTIVE_MESSAGE = "Spawn the code-verifier agent to earn a verdict."'
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def test_write_content_is_extracted() -> None:
|
|
120
|
+
written_text = extract_written_text({"content": OFFENDING_MESSAGE})
|
|
121
|
+
assert claims_blanket_comment_exemption(written_text)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def test_edit_new_string_is_extracted() -> None:
|
|
125
|
+
written_text = extract_written_text({"new_string": OFFENDING_MESSAGE})
|
|
126
|
+
assert claims_blanket_comment_exemption(written_text)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def test_edit_new_string_with_accurate_wording_is_clean() -> None:
|
|
130
|
+
written_text = extract_written_text({"new_string": ACCURATE_MESSAGE})
|
|
131
|
+
assert not claims_blanket_comment_exemption(written_text)
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""Tests for the agent-type gate in verifier_verdict_minter.
|
|
2
|
+
|
|
3
|
+
The minter mints a verdict only for a code-verifier stop event. The live
|
|
4
|
+
SubagentStop payload names the stopping subagent by ``agent_id`` and carries
|
|
5
|
+
no flat agent-type key, so the minter recovers the spawning agent type from
|
|
6
|
+
the parent transcript: it walks the parent transcript for the completion
|
|
7
|
+
record whose ``agentId`` matches the payload and reads that record's sibling
|
|
8
|
+
``agentType``. These tests build a faithful parent transcript and assert the
|
|
9
|
+
minter gates on the resolved type and on the shared MINTING_AGENT_TYPE
|
|
10
|
+
constant, so a rename in config propagates to the minter without a second
|
|
11
|
+
edit. One test proves that only a structured ``agentType`` key resolves: a
|
|
12
|
+
text block that merely quotes the identity keys mints nothing. A further test
|
|
13
|
+
holds the shipped settings.json to the minter docstring's anti-forgery claim:
|
|
14
|
+
the main session is denied writes to the verdict directory, so only this hook
|
|
15
|
+
can mint a passing verdict.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import importlib.util
|
|
19
|
+
import json
|
|
20
|
+
import pathlib
|
|
21
|
+
import subprocess
|
|
22
|
+
import sys
|
|
23
|
+
|
|
24
|
+
import pytest
|
|
25
|
+
|
|
26
|
+
_HOOK_DIR = pathlib.Path(__file__).parent
|
|
27
|
+
if str(_HOOK_DIR) not in sys.path:
|
|
28
|
+
sys.path.insert(0, str(_HOOK_DIR))
|
|
29
|
+
|
|
30
|
+
_SETTINGS_PATH = _HOOK_DIR.parent.parent / "settings.json"
|
|
31
|
+
|
|
32
|
+
minter_spec = importlib.util.spec_from_file_location(
|
|
33
|
+
"verifier_verdict_minter",
|
|
34
|
+
_HOOK_DIR / "verifier_verdict_minter.py",
|
|
35
|
+
)
|
|
36
|
+
assert minter_spec is not None
|
|
37
|
+
assert minter_spec.loader is not None
|
|
38
|
+
minter_module = importlib.util.module_from_spec(minter_spec)
|
|
39
|
+
minter_spec.loader.exec_module(minter_module)
|
|
40
|
+
mint_for_payload = minter_module.mint_for_payload
|
|
41
|
+
resolved_subagent_type = minter_module.resolved_subagent_type
|
|
42
|
+
|
|
43
|
+
constants_spec = importlib.util.spec_from_file_location(
|
|
44
|
+
"verified_commit_constants",
|
|
45
|
+
_HOOK_DIR / "config" / "verified_commit_constants.py",
|
|
46
|
+
)
|
|
47
|
+
assert constants_spec is not None
|
|
48
|
+
assert constants_spec.loader is not None
|
|
49
|
+
constants_module = importlib.util.module_from_spec(constants_spec)
|
|
50
|
+
constants_spec.loader.exec_module(constants_module)
|
|
51
|
+
MINTING_AGENT_TYPE = constants_module.MINTING_AGENT_TYPE
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _write_parent_transcript(transcript_file: pathlib.Path, agent_id: str, agent_type: str) -> None:
|
|
55
|
+
spawn_record = {
|
|
56
|
+
"type": "assistant",
|
|
57
|
+
"message": {
|
|
58
|
+
"content": [
|
|
59
|
+
{
|
|
60
|
+
"type": "tool_use",
|
|
61
|
+
"name": "Task",
|
|
62
|
+
"input": {"subagent_type": agent_type, "description": "Verify"},
|
|
63
|
+
"agentId": agent_id,
|
|
64
|
+
"agentType": agent_type,
|
|
65
|
+
"content": [{"type": "text", "text": "verification complete"}],
|
|
66
|
+
}
|
|
67
|
+
]
|
|
68
|
+
},
|
|
69
|
+
}
|
|
70
|
+
transcript_file.write_text(json.dumps(spawn_record) + "\n", encoding="utf-8")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_resolves_subagent_type_from_parent_transcript(tmp_path: pathlib.Path) -> None:
|
|
74
|
+
transcript_file = tmp_path / "parent.jsonl"
|
|
75
|
+
_write_parent_transcript(transcript_file, "agent-7", MINTING_AGENT_TYPE)
|
|
76
|
+
payload = {"agent_id": "agent-7", "transcript_path": str(transcript_file)}
|
|
77
|
+
assert resolved_subagent_type(payload) == MINTING_AGENT_TYPE
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def test_resolves_none_when_agent_id_absent_from_transcript(
|
|
81
|
+
tmp_path: pathlib.Path,
|
|
82
|
+
) -> None:
|
|
83
|
+
transcript_file = tmp_path / "parent.jsonl"
|
|
84
|
+
_write_parent_transcript(transcript_file, "agent-7", MINTING_AGENT_TYPE)
|
|
85
|
+
payload = {"agent_id": "different-agent", "transcript_path": str(transcript_file)}
|
|
86
|
+
assert resolved_subagent_type(payload) is None
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def test_resolves_type_when_record_arrives_after_first_read(
|
|
90
|
+
tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
|
|
91
|
+
) -> None:
|
|
92
|
+
transcript_file = tmp_path / "parent.jsonl"
|
|
93
|
+
transcript_file.write_text("", encoding="utf-8")
|
|
94
|
+
|
|
95
|
+
def write_record_on_first_sleep(_seconds: float) -> None:
|
|
96
|
+
if transcript_file.read_text(encoding="utf-8"):
|
|
97
|
+
return
|
|
98
|
+
_write_parent_transcript(transcript_file, "agent-7", MINTING_AGENT_TYPE)
|
|
99
|
+
|
|
100
|
+
monkeypatch.setattr(minter_module.time, "sleep", write_record_on_first_sleep)
|
|
101
|
+
payload = {"agent_id": "agent-7", "transcript_path": str(transcript_file)}
|
|
102
|
+
assert resolved_subagent_type(payload) == MINTING_AGENT_TYPE
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def test_quoted_agent_type_in_text_block_does_not_resolve(
|
|
106
|
+
tmp_path: pathlib.Path,
|
|
107
|
+
) -> None:
|
|
108
|
+
transcript_file = tmp_path / "parent.jsonl"
|
|
109
|
+
forged_entry = {
|
|
110
|
+
"type": "assistant",
|
|
111
|
+
"message": {
|
|
112
|
+
"content": [
|
|
113
|
+
{
|
|
114
|
+
"type": "text",
|
|
115
|
+
"text": json.dumps({"agentId": "agent-7", "agentType": MINTING_AGENT_TYPE}),
|
|
116
|
+
}
|
|
117
|
+
]
|
|
118
|
+
},
|
|
119
|
+
}
|
|
120
|
+
transcript_file.write_text(json.dumps(forged_entry) + "\n", encoding="utf-8")
|
|
121
|
+
payload = {"agent_id": "agent-7", "transcript_path": str(transcript_file)}
|
|
122
|
+
assert resolved_subagent_type(payload) is None
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def test_non_verifier_agent_type_mints_nothing(tmp_path: pathlib.Path) -> None:
|
|
126
|
+
transcript_file = tmp_path / "parent.jsonl"
|
|
127
|
+
_write_parent_transcript(transcript_file, "agent-7", "general-purpose")
|
|
128
|
+
payload = {
|
|
129
|
+
"agent_id": "agent-7",
|
|
130
|
+
"transcript_path": str(transcript_file),
|
|
131
|
+
"agent_transcript_path": "",
|
|
132
|
+
"cwd": ".",
|
|
133
|
+
}
|
|
134
|
+
assert mint_for_payload(payload) is None
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def test_minting_agent_type_passes_the_agent_type_gate(
|
|
138
|
+
tmp_path: pathlib.Path,
|
|
139
|
+
) -> None:
|
|
140
|
+
transcript_file = tmp_path / "parent.jsonl"
|
|
141
|
+
_write_parent_transcript(transcript_file, "agent-7", MINTING_AGENT_TYPE)
|
|
142
|
+
payload = {
|
|
143
|
+
"agent_id": "agent-7",
|
|
144
|
+
"transcript_path": str(transcript_file),
|
|
145
|
+
"agent_transcript_path": "",
|
|
146
|
+
"cwd": ".",
|
|
147
|
+
}
|
|
148
|
+
assert mint_for_payload(payload) is None
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _init_repo_with_upstream_and_edit(repo_root: pathlib.Path) -> None:
|
|
152
|
+
subprocess.run(["git", "-C", str(repo_root), "init", "-q"], check=True)
|
|
153
|
+
subprocess.run(
|
|
154
|
+
["git", "-C", str(repo_root), "config", "user.email", "verifier@test"], check=True
|
|
155
|
+
)
|
|
156
|
+
subprocess.run(["git", "-C", str(repo_root), "config", "user.name", "verifier"], check=True)
|
|
157
|
+
(repo_root / "module.py").write_text("answer = 1\n", encoding="utf-8")
|
|
158
|
+
subprocess.run(["git", "-C", str(repo_root), "add", "-A"], check=True)
|
|
159
|
+
subprocess.run(["git", "-C", str(repo_root), "commit", "-qm", "init"], check=True)
|
|
160
|
+
subprocess.run(["git", "-C", str(repo_root), "branch", "-f", "origin/main", "HEAD"], check=True)
|
|
161
|
+
(repo_root / "module.py").write_text("answer = 2\n", encoding="utf-8")
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def test_clean_verifier_verdict_mints_a_verdict_file(tmp_path: pathlib.Path) -> None:
|
|
165
|
+
repo_root = tmp_path / "repo"
|
|
166
|
+
repo_root.mkdir()
|
|
167
|
+
_init_repo_with_upstream_and_edit(repo_root)
|
|
168
|
+
transcript_file = tmp_path / "parent.jsonl"
|
|
169
|
+
_write_parent_transcript(transcript_file, "agent-7", MINTING_AGENT_TYPE)
|
|
170
|
+
agent_transcript = tmp_path / "agent.jsonl"
|
|
171
|
+
agent_transcript.write_text(
|
|
172
|
+
json.dumps(
|
|
173
|
+
{
|
|
174
|
+
"type": "assistant",
|
|
175
|
+
"message": {
|
|
176
|
+
"content": [
|
|
177
|
+
{
|
|
178
|
+
"type": "text",
|
|
179
|
+
"text": 'ok\n```verdict\n{"all_pass": true, "findings": []}\n```\n',
|
|
180
|
+
}
|
|
181
|
+
]
|
|
182
|
+
},
|
|
183
|
+
}
|
|
184
|
+
)
|
|
185
|
+
+ "\n",
|
|
186
|
+
encoding="utf-8",
|
|
187
|
+
)
|
|
188
|
+
payload = {
|
|
189
|
+
"agent_id": "agent-7",
|
|
190
|
+
"transcript_path": str(transcript_file),
|
|
191
|
+
"agent_transcript_path": str(agent_transcript),
|
|
192
|
+
"cwd": str(repo_root),
|
|
193
|
+
}
|
|
194
|
+
verdict_path = mint_for_payload(payload)
|
|
195
|
+
try:
|
|
196
|
+
assert verdict_path is not None
|
|
197
|
+
verdict_record = json.loads(verdict_path.read_text(encoding="utf-8"))
|
|
198
|
+
assert verdict_record["all_pass"] is True
|
|
199
|
+
finally:
|
|
200
|
+
if verdict_path is not None and verdict_path.exists():
|
|
201
|
+
verdict_path.unlink()
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _deny_rules() -> list[str]:
|
|
205
|
+
settings_record = json.loads(_SETTINGS_PATH.read_text(encoding="utf-8"))
|
|
206
|
+
return settings_record["permissions"]["deny"]
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def test_settings_deny_verdict_directory_write() -> None:
|
|
210
|
+
assert "Write($HOME/.claude/verification/**)" in _deny_rules()
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def test_settings_deny_verdict_directory_edit() -> None:
|
|
214
|
+
assert "Edit($HOME/.claude/verification/**)" in _deny_rules()
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
"""Unit tests for workflow_substitution_slot_blocker PreToolUse hook."""
|
|
2
|
+
|
|
3
|
+
import importlib.util
|
|
4
|
+
import io
|
|
5
|
+
import json
|
|
6
|
+
import pathlib
|
|
7
|
+
import sys
|
|
8
|
+
from unittest import mock
|
|
9
|
+
|
|
10
|
+
_HOOK_DIR = pathlib.Path(__file__).parent
|
|
11
|
+
_HOOKS_ROOT = _HOOK_DIR.parent
|
|
12
|
+
for _each_root in (str(_HOOK_DIR), str(_HOOKS_ROOT)):
|
|
13
|
+
if _each_root not in sys.path:
|
|
14
|
+
sys.path.insert(0, _each_root)
|
|
15
|
+
|
|
16
|
+
hook_spec = importlib.util.spec_from_file_location(
|
|
17
|
+
"workflow_substitution_slot_blocker",
|
|
18
|
+
_HOOK_DIR / "workflow_substitution_slot_blocker.py",
|
|
19
|
+
)
|
|
20
|
+
assert hook_spec is not None
|
|
21
|
+
assert hook_spec.loader is not None
|
|
22
|
+
hook_module = importlib.util.module_from_spec(hook_spec)
|
|
23
|
+
hook_spec.loader.exec_module(hook_module)
|
|
24
|
+
|
|
25
|
+
content_has_violation = hook_module.content_has_violation
|
|
26
|
+
find_bare_index_segments = hook_module.find_bare_index_segments
|
|
27
|
+
find_bare_path_segments = hook_module.find_bare_path_segments
|
|
28
|
+
has_iteration_loop = hook_module.has_iteration_loop
|
|
29
|
+
written_content = hook_module.written_content
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
_VIOLATING_TEMPLATE = (
|
|
33
|
+
"For EACH candidate i, build a bible dir cand_i per the contract.\n"
|
|
34
|
+
" & ${PY} -c \"...Path(r'${args.work_dir}\\\\cand_i\\\\plate.svg')...\"\n"
|
|
35
|
+
" & ${PY} compose.py --out ${args.work_dir}\\\\cand_i\\\\sample.png "
|
|
36
|
+
"--glow <candidate glow_hex>\n"
|
|
37
|
+
'Return: {key: "cand_i", name, sample_png}\n'
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
_FIXED_TEMPLATE = (
|
|
41
|
+
"For EACH candidate i, build a bible dir cand_<i> per the contract.\n"
|
|
42
|
+
" & ${PY} -c \"...Path(r'${args.work_dir}\\\\cand_<i>\\\\plate.svg')...\"\n"
|
|
43
|
+
" & ${PY} compose.py --out ${args.work_dir}\\\\cand_<i>\\\\sample.png "
|
|
44
|
+
"--glow <candidate glow_hex>\n"
|
|
45
|
+
'Return: {key: "cand_<i>", name, sample_png}\n'
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_detects_bare_index_in_path_segment() -> None:
|
|
50
|
+
assert find_bare_index_segments(
|
|
51
|
+
"render Path(r'${args.work_dir}\\\\cand_i\\\\plate.svg')"
|
|
52
|
+
) == {"cand_i"}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_detects_quoted_key_when_token_also_appears_as_path_segment() -> None:
|
|
56
|
+
looped_path_and_key = "write ${work}\\\\cand_i\\\\plate.svg\n{key: \"cand_i\", name}"
|
|
57
|
+
assert "cand_i" in find_bare_index_segments(looped_path_and_key)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_quoted_key_alone_without_path_segment_is_not_detected() -> None:
|
|
61
|
+
assert find_bare_index_segments('{key: "metric_i", name}') == set()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def test_index_segments_equal_path_segments_for_looped_path_and_key() -> None:
|
|
65
|
+
looped_path_and_key = "write ${work}\\\\cand_i\\\\plate.svg\n{key: \"cand_i\", name}"
|
|
66
|
+
assert find_bare_index_segments(looped_path_and_key) == find_bare_path_segments(
|
|
67
|
+
looped_path_and_key
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def test_index_segments_equal_path_segments_for_quoted_only_key() -> None:
|
|
72
|
+
quoted_only_key = '{key: "metric_i", name}'
|
|
73
|
+
assert find_bare_index_segments(quoted_only_key) == find_bare_path_segments(
|
|
74
|
+
quoted_only_key
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def test_marked_substitution_slot_is_not_a_bare_segment() -> None:
|
|
79
|
+
assert (
|
|
80
|
+
find_bare_index_segments(
|
|
81
|
+
"render Path(r'${args.work_dir}\\\\cand_<i>\\\\plate.svg')"
|
|
82
|
+
)
|
|
83
|
+
== set()
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def test_violating_template_is_flagged() -> None:
|
|
88
|
+
assert content_has_violation(_VIOLATING_TEMPLATE) is True
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def test_fixed_template_passes() -> None:
|
|
92
|
+
assert content_has_violation(_FIXED_TEMPLATE) is False
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def test_template_without_angle_convention_is_not_flagged() -> None:
|
|
96
|
+
no_convention = (
|
|
97
|
+
"For EACH candidate i, write to ${work}\\\\cand_i\\\\plate.svg and return.\n"
|
|
98
|
+
)
|
|
99
|
+
assert content_has_violation(no_convention) is False
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def test_template_without_loop_is_not_flagged() -> None:
|
|
103
|
+
no_loop = "Write the plate to ${work}\\\\cand_i\\\\plate.svg using <glow_hex>.\n"
|
|
104
|
+
assert content_has_violation(no_loop) is False
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def test_each_inside_an_ordinary_word_is_not_a_loop() -> None:
|
|
108
|
+
for each_word in ("reach", "teach", "breach", "bleach", "preach", "impeach"):
|
|
109
|
+
assert has_iteration_loop(each_word + " the end") is False
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def test_standalone_lowercase_each_in_prose_is_not_a_loop() -> None:
|
|
113
|
+
assert has_iteration_loop("use each color once") is False
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def test_standalone_each_keyword_is_a_loop() -> None:
|
|
117
|
+
assert has_iteration_loop("For EACH candidate i") is True
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def test_lowercase_for_each_phrase_is_still_a_loop() -> None:
|
|
121
|
+
assert has_iteration_loop("for each candidate") is True
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def test_benign_prose_each_with_fixed_literal_is_not_flagged() -> None:
|
|
125
|
+
benign_template = (
|
|
126
|
+
"Render each layer to <layer.svg>.\n"
|
|
127
|
+
"The protocol field is named 'tier_i' as a permanent identifier.\n"
|
|
128
|
+
)
|
|
129
|
+
assert content_has_violation(benign_template) is False
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def test_quoted_permanent_identifier_key_is_not_flagged() -> None:
|
|
133
|
+
permanent_identifier_template = (
|
|
134
|
+
'For EACH candidate, render <plate.svg>.\nReturn {key: "metric_i", value}'
|
|
135
|
+
)
|
|
136
|
+
assert content_has_violation(permanent_identifier_template) is False
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def test_quoted_key_flagged_only_when_token_also_appears_as_path_segment() -> None:
|
|
140
|
+
looping_path_and_key = (
|
|
141
|
+
"For EACH candidate, write <plate.svg> to ${work}\\\\cand_i\\\\plate.svg.\n"
|
|
142
|
+
'Return {key: "cand_i", name}\n'
|
|
143
|
+
)
|
|
144
|
+
assert content_has_violation(looping_path_and_key) is True
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def test_written_content_reads_multiedit_new_strings() -> None:
|
|
148
|
+
multi_edit_input = {
|
|
149
|
+
"edits": [
|
|
150
|
+
{"old_string": "x", "new_string": "first ${work}\\\\cand_i\\\\plate.svg"},
|
|
151
|
+
{"old_string": "y", "new_string": "second <glow_hex>"},
|
|
152
|
+
]
|
|
153
|
+
}
|
|
154
|
+
combined = written_content("MultiEdit", multi_edit_input)
|
|
155
|
+
assert "cand_i" in combined
|
|
156
|
+
assert "<glow_hex>" in combined
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _run_main_with_io(input_text: str) -> str:
|
|
160
|
+
with mock.patch("sys.stdin", io.StringIO(input_text)):
|
|
161
|
+
with mock.patch("sys.stdout", new_callable=io.StringIO) as mock_stdout:
|
|
162
|
+
try:
|
|
163
|
+
hook_module.main()
|
|
164
|
+
except SystemExit:
|
|
165
|
+
pass
|
|
166
|
+
return mock_stdout.getvalue()
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def test_main_blocks_violating_workflow_write() -> None:
|
|
170
|
+
hook_input = {
|
|
171
|
+
"tool_name": "Write",
|
|
172
|
+
"tool_input": {
|
|
173
|
+
"file_path": "/repo/scripts/shared_palette_gate.workflow.js",
|
|
174
|
+
"content": _VIOLATING_TEMPLATE,
|
|
175
|
+
},
|
|
176
|
+
}
|
|
177
|
+
output_text = _run_main_with_io(json.dumps(hook_input))
|
|
178
|
+
payload = json.loads(output_text)
|
|
179
|
+
assert payload["hookSpecificOutput"]["permissionDecision"] == "deny"
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def test_main_blocks_violating_workflow_edit() -> None:
|
|
183
|
+
hook_input = {
|
|
184
|
+
"tool_name": "Edit",
|
|
185
|
+
"tool_input": {
|
|
186
|
+
"file_path": "/repo/scripts/shared_palette_gate.workflow.js",
|
|
187
|
+
"new_string": _VIOLATING_TEMPLATE,
|
|
188
|
+
},
|
|
189
|
+
}
|
|
190
|
+
output_text = _run_main_with_io(json.dumps(hook_input))
|
|
191
|
+
payload = json.loads(output_text)
|
|
192
|
+
assert payload["hookSpecificOutput"]["permissionDecision"] == "deny"
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def test_main_blocks_violating_workflow_multiedit() -> None:
|
|
196
|
+
hook_input = {
|
|
197
|
+
"tool_name": "MultiEdit",
|
|
198
|
+
"tool_input": {
|
|
199
|
+
"file_path": "/repo/scripts/shared_palette_gate.workflow.js",
|
|
200
|
+
"edits": [{"old_string": "placeholder", "new_string": _VIOLATING_TEMPLATE}],
|
|
201
|
+
},
|
|
202
|
+
}
|
|
203
|
+
output_text = _run_main_with_io(json.dumps(hook_input))
|
|
204
|
+
payload = json.loads(output_text)
|
|
205
|
+
assert payload["hookSpecificOutput"]["permissionDecision"] == "deny"
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def test_main_passes_fixed_workflow_write() -> None:
|
|
209
|
+
hook_input = {
|
|
210
|
+
"tool_name": "Write",
|
|
211
|
+
"tool_input": {
|
|
212
|
+
"file_path": "/repo/scripts/shared_palette_gate.workflow.js",
|
|
213
|
+
"content": _FIXED_TEMPLATE,
|
|
214
|
+
},
|
|
215
|
+
}
|
|
216
|
+
assert _run_main_with_io(json.dumps(hook_input)) == ""
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def test_main_passes_non_workflow_path() -> None:
|
|
220
|
+
hook_input = {
|
|
221
|
+
"tool_name": "Write",
|
|
222
|
+
"tool_input": {
|
|
223
|
+
"file_path": "/repo/scripts/helper.js",
|
|
224
|
+
"content": _VIOLATING_TEMPLATE,
|
|
225
|
+
},
|
|
226
|
+
}
|
|
227
|
+
assert _run_main_with_io(json.dumps(hook_input)) == ""
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def test_main_passes_wrong_tool_name() -> None:
|
|
231
|
+
hook_input = {
|
|
232
|
+
"tool_name": "Bash",
|
|
233
|
+
"tool_input": {
|
|
234
|
+
"file_path": "/repo/scripts/x.workflow.js",
|
|
235
|
+
"command": "echo cand_i",
|
|
236
|
+
},
|
|
237
|
+
}
|
|
238
|
+
assert _run_main_with_io(json.dumps(hook_input)) == ""
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def test_main_passes_malformed_json() -> None:
|
|
242
|
+
assert _run_main_with_io("not valid json {{{") == ""
|