claude-dev-env 1.12.1 → 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/install.mjs +1 -1
- package/hooks/HOOK_SPECS_PROMPT_WORKFLOW.md +2 -14
- package/hooks/blocking/prompt-workflow-stop-guard.py +14 -0
- package/hooks/blocking/prompt_workflow_gate_config.py +117 -0
- package/hooks/blocking/prompt_workflow_gate_core.py +30 -145
- package/hooks/blocking/test_context_control_policy_files.py +2 -2
- package/hooks/blocking/test_prompt_workflow_gate_core.py +51 -14
- package/hooks/blocking/test_prompt_workflow_stop_guard.py +38 -4
- package/hooks/hooks.json +0 -5
- package/package.json +1 -1
- package/skills/agent-prompt/SKILL.md +1 -2
- package/skills/prompt-generator/REFINEMENT_PIPELINE_RUNBOOK.md +6 -8
- package/skills/prompt-generator/SKILL.md +6 -1
- package/skills/prompt-generator/TARGET_OUTPUT.md +7 -0
- package/skills/prompt-generator/evals/prompt-generator.json +37 -0
- package/hooks/blocking/agent-execution-intent-gate.py +0 -63
- package/hooks/blocking/test_agent_execution_intent_gate.py +0 -84
package/bin/install.mjs
CHANGED
|
@@ -29,7 +29,7 @@ const INSTALL_GROUPS = {
|
|
|
29
29
|
description: 'Prompt engineering tools',
|
|
30
30
|
skills: ['prompt-generator', 'agent-prompt'],
|
|
31
31
|
includeHookFiles: [
|
|
32
|
-
'blocking/
|
|
32
|
+
'blocking/prompt_workflow_gate_config.py',
|
|
33
33
|
'blocking/prompt_workflow_gate_core.py',
|
|
34
34
|
'blocking/prompt-workflow-stop-guard.py',
|
|
35
35
|
'HOOK_SPECS_PROMPT_WORKFLOW.md',
|
|
@@ -2,21 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
Deterministic runtime gates for prompt workflows.
|
|
4
4
|
|
|
5
|
-
##
|
|
5
|
+
## PreToolUse Task/Agent (removed)
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
- Event: `PreToolUse`
|
|
9
|
-
- Matcher: `Task|Agent`
|
|
10
|
-
- Fail condition:
|
|
11
|
-
- Missing structured execution intent contract field:
|
|
12
|
-
- `tool_input.execution_intent: explicit|execute|delegate`, or
|
|
13
|
-
- `tool_input.execution_intent_explicit: true`, or
|
|
14
|
-
- `tool_input.metadata.execution_intent: explicit|execute|delegate`
|
|
15
|
-
- Missing required scope anchors in launch payload (always enforced when execution launch is evaluated)
|
|
16
|
-
- Compatibility fallback:
|
|
17
|
-
- Text markers are only accepted when `PROMPT_WORKFLOW_ALLOW_TEXT_INTENT_FALLBACK=1` is set.
|
|
18
|
-
- Fallback usage is logged to stderr.
|
|
19
|
-
- Action: `deny` with concrete missing requirement list.
|
|
7
|
+
The former `agent-execution-intent-gate.py` hook is **removed**. Native Agent/Task launches do not carry stable custom metadata; enforcing scope text on every spawn blocked legitimate `/agent-prompt` and refinement delegations. Scope and checklist rules remain enforced by the Stop guard when a prompt-workflow response is detected.
|
|
20
8
|
|
|
21
9
|
## Gate: Leakage + Checklist + Scope (Stop)
|
|
22
10
|
|
|
@@ -18,6 +18,7 @@ from prompt_workflow_gate_core import (
|
|
|
18
18
|
is_prompt_workflow_response,
|
|
19
19
|
missing_context_control_signals,
|
|
20
20
|
missing_checklist_rows,
|
|
21
|
+
missing_required_xml_sections,
|
|
21
22
|
missing_scope_anchors,
|
|
22
23
|
)
|
|
23
24
|
|
|
@@ -150,10 +151,23 @@ def _check_negative_keywords_in_artifact(assistant_message: str) -> dict | None:
|
|
|
150
151
|
),
|
|
151
152
|
)
|
|
152
153
|
|
|
154
|
+
def _check_required_xml_sections(assistant_message: str) -> dict | None:
|
|
155
|
+
missing_sections = missing_required_xml_sections(assistant_message)
|
|
156
|
+
if not missing_sections:
|
|
157
|
+
return None
|
|
158
|
+
return _build_block(
|
|
159
|
+
brief_label="retrying: include all required XML sections",
|
|
160
|
+
full_reason=(
|
|
161
|
+
"PROMPT-WORKFLOW GATE: Fenced XML artifact missing required sections: "
|
|
162
|
+
+ ", ".join(missing_sections)
|
|
163
|
+
),
|
|
164
|
+
)
|
|
165
|
+
|
|
153
166
|
def _evaluate_workflow_gates(assistant_message: str) -> dict | None:
|
|
154
167
|
if not is_prompt_workflow_response(assistant_message):
|
|
155
168
|
return None
|
|
156
169
|
workflow_gate_checks: tuple[Callable[[str], dict | None], ...] = (
|
|
170
|
+
_check_required_xml_sections,
|
|
157
171
|
_check_missing_checklist_rows,
|
|
158
172
|
_check_missing_scope_anchors,
|
|
159
173
|
_check_missing_context_signals,
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Static lists and compiled regexes for prompt-workflow gate checks.
|
|
2
|
+
|
|
3
|
+
Edit this file to change scope anchors, checklist rows, markers, or keyword lists
|
|
4
|
+
without touching gate logic in prompt_workflow_gate_core.py.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
|
|
11
|
+
REQUIRED_SCOPE_ANCHORS: tuple[str, ...] = (
|
|
12
|
+
"target_local_roots",
|
|
13
|
+
"target_canonical_roots",
|
|
14
|
+
"target_file_globs",
|
|
15
|
+
"comparison_basis",
|
|
16
|
+
"completion_boundary",
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
REQUIRED_CHECKLIST_ROWS: tuple[str, ...] = (
|
|
20
|
+
"structured_scoped_instructions",
|
|
21
|
+
"sequential_steps_present",
|
|
22
|
+
"positive_framing",
|
|
23
|
+
"acceptance_criteria_defined",
|
|
24
|
+
"safety_reversibility_language",
|
|
25
|
+
"reversible_action_and_safety_check_guidance",
|
|
26
|
+
"concrete_output_contract",
|
|
27
|
+
"scope_boundary_present",
|
|
28
|
+
"explicit_scope_anchors_present",
|
|
29
|
+
"all_instructions_artifact_bound",
|
|
30
|
+
"scope_terms_explicit_and_anchored",
|
|
31
|
+
"completion_boundary_measurable",
|
|
32
|
+
"citation_grounding_policy_present",
|
|
33
|
+
"source_priority_rules_present",
|
|
34
|
+
"artifact_language_confidence",
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
AMBIGUOUS_SCOPE_TERMS: tuple[str, ...] = (
|
|
38
|
+
"this session",
|
|
39
|
+
"current files",
|
|
40
|
+
"here",
|
|
41
|
+
"above",
|
|
42
|
+
"as needed",
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
INTERNAL_OBJECT_MARKERS: tuple[str, ...] = (
|
|
46
|
+
'"pipeline_mode": "internal_section_refinement_with_final_audit"',
|
|
47
|
+
'"scope_block": {',
|
|
48
|
+
'"required_sections": [',
|
|
49
|
+
'"section_output_contract": {',
|
|
50
|
+
'"merge_output_contract": {',
|
|
51
|
+
'"audit_output_contract": {',
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
PROMPT_WORKFLOW_RESPONSE_MARKERS: tuple[str, ...] = (
|
|
55
|
+
"checklist_results",
|
|
56
|
+
"overall_status",
|
|
57
|
+
"scope anchors",
|
|
58
|
+
"target_local_roots",
|
|
59
|
+
"target_canonical_roots",
|
|
60
|
+
"target_file_globs",
|
|
61
|
+
"comparison_basis",
|
|
62
|
+
"completion_boundary",
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
DEBUG_INTENT_MARKERS: tuple[str, ...] = (
|
|
66
|
+
"debug",
|
|
67
|
+
"show internal",
|
|
68
|
+
"raw internal object",
|
|
69
|
+
"pipeline object",
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
NEGATIVE_KEYWORDS_IN_ARTIFACT: tuple[str, ...] = (
|
|
73
|
+
"no",
|
|
74
|
+
"not",
|
|
75
|
+
"don't",
|
|
76
|
+
"do not",
|
|
77
|
+
"never",
|
|
78
|
+
"avoid",
|
|
79
|
+
"without",
|
|
80
|
+
"refrain",
|
|
81
|
+
"stop",
|
|
82
|
+
"prevent",
|
|
83
|
+
"exclude",
|
|
84
|
+
"prohibit",
|
|
85
|
+
"forbid",
|
|
86
|
+
"reject",
|
|
87
|
+
"cannot",
|
|
88
|
+
"unless",
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
NEGATIVE_INDIRECT_PATTERNS_IN_ARTIFACT: tuple[str, ...] = (
|
|
92
|
+
r"instead of\s+\w+",
|
|
93
|
+
r"rather than\s+\w+",
|
|
94
|
+
r"as opposed to\s+\w+",
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
REQUIRED_XML_SECTIONS: tuple[str, ...] = (
|
|
98
|
+
"role",
|
|
99
|
+
"context",
|
|
100
|
+
"instructions",
|
|
101
|
+
"constraints",
|
|
102
|
+
"output_format",
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
FENCED_XML_BLOCK_PATTERN: re.Pattern[str] = re.compile(
|
|
106
|
+
r"```xml\s*\n(.*?)```", re.DOTALL
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
COMPILED_NEGATIVE_KEYWORD_PATTERNS: tuple[re.Pattern[str], ...] = tuple(
|
|
110
|
+
re.compile(rf"\b{re.escape(keyword)}\b", re.IGNORECASE)
|
|
111
|
+
for keyword in NEGATIVE_KEYWORDS_IN_ARTIFACT
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
COMPILED_NEGATIVE_INDIRECT_PATTERNS: tuple[re.Pattern[str], ...] = tuple(
|
|
115
|
+
re.compile(pattern, re.IGNORECASE)
|
|
116
|
+
for pattern in NEGATIVE_INDIRECT_PATTERNS_IN_ARTIFACT
|
|
117
|
+
)
|
|
@@ -6,118 +6,17 @@ from __future__ import annotations
|
|
|
6
6
|
import re
|
|
7
7
|
from typing import Iterable
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
REQUIRED_CHECKLIST_ROWS
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
"positive_framing",
|
|
21
|
-
"acceptance_criteria_defined",
|
|
22
|
-
"safety_reversibility_language",
|
|
23
|
-
"reversible_action_and_safety_check_guidance",
|
|
24
|
-
"concrete_output_contract",
|
|
25
|
-
"scope_boundary_present",
|
|
26
|
-
"explicit_scope_anchors_present",
|
|
27
|
-
"all_instructions_artifact_bound",
|
|
28
|
-
"scope_terms_explicit_and_anchored",
|
|
29
|
-
"completion_boundary_measurable",
|
|
30
|
-
"citation_grounding_policy_present",
|
|
31
|
-
"source_priority_rules_present",
|
|
32
|
-
"artifact_language_confidence",
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
REQUIRED_CONTEXT_CONTROL_SIGNALS: tuple[str, ...] = (
|
|
36
|
-
"base_minimal_instruction_layer: true",
|
|
37
|
-
"on_demand_skill_loading: true",
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
AMBIGUOUS_SCOPE_TERMS: tuple[str, ...] = (
|
|
41
|
-
"this session",
|
|
42
|
-
"current files",
|
|
43
|
-
"here",
|
|
44
|
-
"above",
|
|
45
|
-
"as needed",
|
|
46
|
-
)
|
|
47
|
-
|
|
48
|
-
INTERNAL_OBJECT_MARKERS: tuple[str, ...] = (
|
|
49
|
-
'"pipeline_mode": "internal_section_refinement_with_final_audit"',
|
|
50
|
-
'"scope_block": {',
|
|
51
|
-
'"required_sections": [',
|
|
52
|
-
'"section_output_contract": {',
|
|
53
|
-
'"merge_output_contract": {',
|
|
54
|
-
'"audit_output_contract": {',
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
EXPLICIT_EXECUTION_MARKERS: tuple[str, ...] = (
|
|
58
|
-
"/agent-prompt",
|
|
59
|
-
"execution_intent: explicit",
|
|
60
|
-
"execution_intent_explicit: true",
|
|
61
|
-
"explicit execution intent",
|
|
62
|
-
"explicit delegation intent",
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
PROMPT_WORKFLOW_RESPONSE_MARKERS: tuple[str, ...] = (
|
|
66
|
-
"checklist_results",
|
|
67
|
-
"overall_status",
|
|
68
|
-
"scope anchors",
|
|
69
|
-
"target_local_roots",
|
|
70
|
-
"target_canonical_roots",
|
|
71
|
-
"target_file_globs",
|
|
72
|
-
"comparison_basis",
|
|
73
|
-
"completion_boundary",
|
|
74
|
-
)
|
|
75
|
-
|
|
76
|
-
DEBUG_INTENT_MARKERS: tuple[str, ...] = (
|
|
77
|
-
"debug",
|
|
78
|
-
"show internal",
|
|
79
|
-
"raw internal object",
|
|
80
|
-
"pipeline object",
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
NEGATIVE_KEYWORDS_IN_ARTIFACT: tuple[str, ...] = (
|
|
85
|
-
"no",
|
|
86
|
-
"not",
|
|
87
|
-
"don't",
|
|
88
|
-
"do not",
|
|
89
|
-
"never",
|
|
90
|
-
"avoid",
|
|
91
|
-
"without",
|
|
92
|
-
"refrain",
|
|
93
|
-
"stop",
|
|
94
|
-
"prevent",
|
|
95
|
-
"exclude",
|
|
96
|
-
"prohibit",
|
|
97
|
-
"forbid",
|
|
98
|
-
"reject",
|
|
99
|
-
"cannot",
|
|
100
|
-
"unless",
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
NEGATIVE_INDIRECT_PATTERNS_IN_ARTIFACT: tuple[str, ...] = (
|
|
104
|
-
r"instead of\s+\w+",
|
|
105
|
-
r"rather than\s+\w+",
|
|
106
|
-
r"as opposed to\s+\w+",
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
COMPILED_NEGATIVE_KEYWORD_PATTERNS: tuple[re.Pattern[str], ...] = tuple(
|
|
110
|
-
re.compile(rf"\b{re.escape(keyword)}\b", re.IGNORECASE)
|
|
111
|
-
for keyword in NEGATIVE_KEYWORDS_IN_ARTIFACT
|
|
112
|
-
)
|
|
113
|
-
|
|
114
|
-
COMPILED_NEGATIVE_INDIRECT_PATTERNS: tuple[re.Pattern[str], ...] = tuple(
|
|
115
|
-
re.compile(pattern, re.IGNORECASE)
|
|
116
|
-
for pattern in NEGATIVE_INDIRECT_PATTERNS_IN_ARTIFACT
|
|
117
|
-
)
|
|
118
|
-
|
|
119
|
-
FENCED_XML_BLOCK_PATTERN: re.Pattern[str] = re.compile(
|
|
120
|
-
r"```xml\s*\n(.*?)```", re.DOTALL
|
|
9
|
+
from prompt_workflow_gate_config import (
|
|
10
|
+
AMBIGUOUS_SCOPE_TERMS,
|
|
11
|
+
COMPILED_NEGATIVE_INDIRECT_PATTERNS,
|
|
12
|
+
COMPILED_NEGATIVE_KEYWORD_PATTERNS,
|
|
13
|
+
DEBUG_INTENT_MARKERS,
|
|
14
|
+
FENCED_XML_BLOCK_PATTERN,
|
|
15
|
+
INTERNAL_OBJECT_MARKERS,
|
|
16
|
+
PROMPT_WORKFLOW_RESPONSE_MARKERS,
|
|
17
|
+
REQUIRED_CHECKLIST_ROWS,
|
|
18
|
+
REQUIRED_SCOPE_ANCHORS,
|
|
19
|
+
REQUIRED_XML_SECTIONS,
|
|
121
20
|
)
|
|
122
21
|
|
|
123
22
|
|
|
@@ -126,6 +25,19 @@ def extract_fenced_xml_content(text: str) -> str:
|
|
|
126
25
|
return "\n".join(all_matches)
|
|
127
26
|
|
|
128
27
|
|
|
28
|
+
def missing_required_xml_sections(text: str) -> list[str]:
|
|
29
|
+
fenced_body = extract_fenced_xml_content(text)
|
|
30
|
+
if not fenced_body.strip():
|
|
31
|
+
return []
|
|
32
|
+
missing_sections: list[str] = []
|
|
33
|
+
for section_name in REQUIRED_XML_SECTIONS:
|
|
34
|
+
open_tag = re.compile(rf"<{re.escape(section_name)}(\s[^>]*)?>")
|
|
35
|
+
close_tag = re.compile(rf"</{re.escape(section_name)}>")
|
|
36
|
+
if not open_tag.search(fenced_body) or not close_tag.search(fenced_body):
|
|
37
|
+
missing_sections.append(section_name)
|
|
38
|
+
return missing_sections
|
|
39
|
+
|
|
40
|
+
|
|
129
41
|
def find_negative_keywords_in_fenced_xml(
|
|
130
42
|
text: str,
|
|
131
43
|
) -> list[dict[str, str | int]]:
|
|
@@ -159,36 +71,6 @@ def _contains_any_marker(text: str, markers: Iterable[str]) -> bool:
|
|
|
159
71
|
return any(marker.lower() in lower_text for marker in markers)
|
|
160
72
|
|
|
161
73
|
|
|
162
|
-
def has_explicit_execution_intent(text: str) -> bool:
|
|
163
|
-
return _contains_any_marker(text, EXPLICIT_EXECUTION_MARKERS)
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
def has_structured_execution_intent(tool_input: object) -> bool:
|
|
167
|
-
if not isinstance(tool_input, dict):
|
|
168
|
-
return False
|
|
169
|
-
|
|
170
|
-
explicit_flag = tool_input.get("execution_intent_explicit")
|
|
171
|
-
if isinstance(explicit_flag, bool):
|
|
172
|
-
return explicit_flag
|
|
173
|
-
|
|
174
|
-
intent_value = tool_input.get("execution_intent")
|
|
175
|
-
if isinstance(intent_value, str):
|
|
176
|
-
normalized = intent_value.strip().lower()
|
|
177
|
-
return normalized in {"explicit", "execute", "delegation", "delegate"}
|
|
178
|
-
if isinstance(intent_value, bool):
|
|
179
|
-
return intent_value
|
|
180
|
-
|
|
181
|
-
metadata = tool_input.get("metadata")
|
|
182
|
-
if isinstance(metadata, dict):
|
|
183
|
-
metadata_intent = metadata.get("execution_intent")
|
|
184
|
-
if isinstance(metadata_intent, str):
|
|
185
|
-
return metadata_intent.strip().lower() in {"explicit", "execute", "delegate"}
|
|
186
|
-
if isinstance(metadata_intent, bool):
|
|
187
|
-
return metadata_intent
|
|
188
|
-
|
|
189
|
-
return False
|
|
190
|
-
|
|
191
|
-
|
|
192
74
|
def has_debug_intent(text: str) -> bool:
|
|
193
75
|
return _contains_any_marker(text, DEBUG_INTENT_MARKERS)
|
|
194
76
|
|
|
@@ -230,6 +112,9 @@ def is_prompt_workflow_response(text: str) -> bool:
|
|
|
230
112
|
|
|
231
113
|
|
|
232
114
|
def missing_context_control_signals(text: str) -> list[str]:
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
115
|
+
required_signals: tuple[str, ...] = (
|
|
116
|
+
"base_minimal_instruction_layer: true",
|
|
117
|
+
"on_demand_skill_loading: true",
|
|
118
|
+
)
|
|
119
|
+
lowered = text.lower()
|
|
120
|
+
return [signal for signal in required_signals if signal not in lowered]
|
|
@@ -20,8 +20,8 @@ def test_context_control_rule_exists_with_required_sections() -> None:
|
|
|
20
20
|
|
|
21
21
|
def test_hook_spec_exists_with_required_gates() -> None:
|
|
22
22
|
text = HOOK_SPEC_PATH.read_text(encoding="utf-8")
|
|
23
|
-
assert "
|
|
23
|
+
assert "PreToolUse Task/Agent (removed)" in text
|
|
24
|
+
assert "agent-execution-intent-gate.py" in text
|
|
24
25
|
assert "Leakage + Checklist + Scope (Stop)" in text
|
|
25
26
|
assert "Required Deterministic Checklist Rows" in text
|
|
26
|
-
assert "structured execution intent contract" in text
|
|
27
27
|
assert "Runtime Context-Control Signals" in text
|
|
@@ -3,28 +3,15 @@
|
|
|
3
3
|
from prompt_workflow_gate_core import (
|
|
4
4
|
find_ambiguous_scope_terms,
|
|
5
5
|
has_checklist_container,
|
|
6
|
-
has_explicit_execution_intent,
|
|
7
|
-
has_structured_execution_intent,
|
|
8
6
|
has_internal_object_leak,
|
|
9
7
|
is_prompt_workflow_response,
|
|
10
8
|
missing_context_control_signals,
|
|
11
9
|
missing_checklist_rows,
|
|
10
|
+
missing_required_xml_sections,
|
|
12
11
|
missing_scope_anchors,
|
|
13
12
|
)
|
|
14
13
|
|
|
15
14
|
|
|
16
|
-
def test_execution_intent_marker_detected() -> None:
|
|
17
|
-
assert has_explicit_execution_intent("execution_intent: explicit")
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def test_structured_execution_intent_detected_from_contract_field() -> None:
|
|
21
|
-
assert has_structured_execution_intent({"execution_intent": "explicit"})
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def test_structured_execution_intent_detected_from_boolean_flag() -> None:
|
|
25
|
-
assert has_structured_execution_intent({"execution_intent_explicit": True})
|
|
26
|
-
|
|
27
|
-
|
|
28
15
|
def test_internal_object_leak_detected() -> None:
|
|
29
16
|
text = '{"pipeline_mode": "internal_section_refinement_with_final_audit"}'
|
|
30
17
|
assert has_internal_object_leak(text)
|
|
@@ -66,3 +53,53 @@ def test_ambiguous_scope_terms_detected() -> None:
|
|
|
66
53
|
terms = find_ambiguous_scope_terms(text)
|
|
67
54
|
assert "this session" in terms
|
|
68
55
|
assert "current files" in terms
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _fenced_xml(body: str) -> str:
|
|
59
|
+
return f"```xml\n{body}\n```"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_missing_required_xml_sections_all_present_returns_empty() -> None:
|
|
63
|
+
body = (
|
|
64
|
+
"<role>R.</role>\n"
|
|
65
|
+
"<context>C.</context>\n"
|
|
66
|
+
"<instructions>I.</instructions>\n"
|
|
67
|
+
"<constraints>Co.</constraints>\n"
|
|
68
|
+
"<output_format>O.</output_format>\n"
|
|
69
|
+
)
|
|
70
|
+
assert missing_required_xml_sections(_fenced_xml(body)) == []
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_missing_required_xml_sections_missing_context() -> None:
|
|
74
|
+
body = (
|
|
75
|
+
"<role>R.</role>\n"
|
|
76
|
+
"<instructions>I.</instructions>\n"
|
|
77
|
+
"<constraints>Co.</constraints>\n"
|
|
78
|
+
"<output_format>O.</output_format>\n"
|
|
79
|
+
)
|
|
80
|
+
assert missing_required_xml_sections(_fenced_xml(body)) == ["context"]
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def test_missing_required_xml_sections_missing_role_and_output_format() -> None:
|
|
84
|
+
body = (
|
|
85
|
+
"<context>C.</context>\n"
|
|
86
|
+
"<instructions>I.</instructions>\n"
|
|
87
|
+
"<constraints>Co.</constraints>\n"
|
|
88
|
+
)
|
|
89
|
+
missing = missing_required_xml_sections(_fenced_xml(body))
|
|
90
|
+
assert missing == ["role", "output_format"]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def test_missing_required_xml_sections_no_fence_returns_empty() -> None:
|
|
94
|
+
assert missing_required_xml_sections("no fenced xml here") == []
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def test_missing_required_xml_sections_prose_without_tags_counts_as_missing() -> None:
|
|
98
|
+
body = (
|
|
99
|
+
"<role>R.</role>\n"
|
|
100
|
+
"context appears in prose but has no tags.\n"
|
|
101
|
+
"<instructions>I.</instructions>\n"
|
|
102
|
+
"<constraints>Co.</constraints>\n"
|
|
103
|
+
"<output_format>O.</output_format>\n"
|
|
104
|
+
)
|
|
105
|
+
assert missing_required_xml_sections(_fenced_xml(body)) == ["context"]
|
|
@@ -118,6 +118,16 @@ def test_blocks_ambiguous_scope_phrasing() -> None:
|
|
|
118
118
|
assert response["decision"] == "block"
|
|
119
119
|
assert "Ambiguous scope phrasing detected" in response["reason"]
|
|
120
120
|
|
|
121
|
+
def _wrap_five_section_scaffold(inner_body: str) -> str:
|
|
122
|
+
return (
|
|
123
|
+
"<role>Test role sentence one.</role>\n"
|
|
124
|
+
"<context>Test context sentence one.</context>\n"
|
|
125
|
+
f"{inner_body}\n"
|
|
126
|
+
"<constraints>Test constraints sentence one.</constraints>\n"
|
|
127
|
+
"<output_format>Test output format sentence one.</output_format>\n"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
|
|
121
131
|
def _build_prompt_workflow_message_with_fenced_xml(fenced_xml_body: str) -> str:
|
|
122
132
|
return (
|
|
123
133
|
"Audit: pass 15/15\n"
|
|
@@ -137,7 +147,9 @@ def _build_prompt_workflow_message_with_fenced_xml(fenced_xml_body: str) -> str:
|
|
|
137
147
|
|
|
138
148
|
|
|
139
149
|
def test_allows_positive_phrasing_inside_fenced_xml() -> None:
|
|
140
|
-
fenced_content =
|
|
150
|
+
fenced_content = _wrap_five_section_scaffold(
|
|
151
|
+
"<instructions>Ensure all functions have explicit return types.</instructions>"
|
|
152
|
+
)
|
|
141
153
|
payload = {
|
|
142
154
|
"last_assistant_message": _build_prompt_workflow_message_with_fenced_xml(fenced_content),
|
|
143
155
|
}
|
|
@@ -172,7 +184,9 @@ def test_blocks_banned_pattern_inside_fenced_xml(
|
|
|
172
184
|
fenced_xml_content: str,
|
|
173
185
|
) -> None:
|
|
174
186
|
payload = {
|
|
175
|
-
"last_assistant_message": _build_prompt_workflow_message_with_fenced_xml(
|
|
187
|
+
"last_assistant_message": _build_prompt_workflow_message_with_fenced_xml(
|
|
188
|
+
_wrap_five_section_scaffold(fenced_xml_content)
|
|
189
|
+
),
|
|
176
190
|
}
|
|
177
191
|
result = _run_hook(payload)
|
|
178
192
|
response = json.loads(result.stdout)
|
|
@@ -180,12 +194,15 @@ def test_blocks_banned_pattern_inside_fenced_xml(
|
|
|
180
194
|
|
|
181
195
|
|
|
182
196
|
def test_permits_negative_keywords_outside_fenced_xml() -> None:
|
|
197
|
+
fenced_inner = _wrap_five_section_scaffold(
|
|
198
|
+
"<instructions>Ensure all functions have explicit return types.</instructions>"
|
|
199
|
+
)
|
|
183
200
|
message = (
|
|
184
201
|
"Audit: pass 15/15\n"
|
|
185
202
|
"Do not skip the audit line.\n"
|
|
186
203
|
"```xml\n"
|
|
187
|
-
|
|
188
|
-
"```\n"
|
|
204
|
+
+ fenced_inner
|
|
205
|
+
+ "\n```\n"
|
|
189
206
|
"overall_status: pass\n"
|
|
190
207
|
+ _full_checklist_rows()
|
|
191
208
|
+ "target_local_roots\n"
|
|
@@ -201,6 +218,23 @@ def test_permits_negative_keywords_outside_fenced_xml() -> None:
|
|
|
201
218
|
assert result.stdout.strip() == ""
|
|
202
219
|
|
|
203
220
|
|
|
221
|
+
def test_blocks_when_fenced_xml_missing_context_section() -> None:
|
|
222
|
+
fenced_body = (
|
|
223
|
+
"<role>Test role sentence one.</role>\n"
|
|
224
|
+
"<instructions>Test instructions sentence one.</instructions>\n"
|
|
225
|
+
"<constraints>Test constraints sentence one.</constraints>\n"
|
|
226
|
+
"<output_format>Test output format sentence one.</output_format>\n"
|
|
227
|
+
)
|
|
228
|
+
payload = {
|
|
229
|
+
"last_assistant_message": _build_prompt_workflow_message_with_fenced_xml(fenced_body),
|
|
230
|
+
}
|
|
231
|
+
result = _run_hook(payload)
|
|
232
|
+
response = json.loads(result.stdout)
|
|
233
|
+
assert response["decision"] == "block"
|
|
234
|
+
assert "context" in response["reason"]
|
|
235
|
+
assert "include all required XML sections" in response["systemMessage"]
|
|
236
|
+
|
|
237
|
+
|
|
204
238
|
def test_allows_fully_structured_prompt_workflow_output() -> None:
|
|
205
239
|
payload = {
|
|
206
240
|
"last_assistant_message": (
|
package/hooks/hooks.json
CHANGED
|
@@ -94,11 +94,6 @@
|
|
|
94
94
|
"type": "command",
|
|
95
95
|
"command": "python3 ${CLAUDE_PLUGIN_ROOT}/hooks/blocking/parallel-task-blocker.py",
|
|
96
96
|
"timeout": 10
|
|
97
|
-
},
|
|
98
|
-
{
|
|
99
|
-
"type": "command",
|
|
100
|
-
"command": "python3 ${CLAUDE_PLUGIN_ROOT}/hooks/blocking/agent-execution-intent-gate.py",
|
|
101
|
-
"timeout": 10
|
|
102
97
|
}
|
|
103
98
|
]
|
|
104
99
|
},
|
package/package.json
CHANGED
|
@@ -68,7 +68,6 @@ Use simplified mode when either condition is true:
|
|
|
68
68
|
|
|
69
69
|
This mode is triggered when execution input includes `pipeline_mode: internal_section_refinement_with_final_audit` or equivalent execution-ready orchestration metadata.
|
|
70
70
|
If present, carry forward the scope block (`target_local_roots`, `target_canonical_roots`, `target_file_globs`, `comparison_basis`, `completion_boundary`) so execution remains artifact-bound.
|
|
71
|
-
Execution launch payload must include `execution_intent: explicit`.
|
|
72
71
|
|
|
73
72
|
1. Spawn exactly 6 refinement agents, one per section in fixed order:
|
|
74
73
|
- `role`
|
|
@@ -194,7 +193,7 @@ Section-refinement orchestration is done only when all are true:
|
|
|
194
193
|
- Gather context before crafting -- do not send an agent in blind
|
|
195
194
|
- Start only after explicit user execution intent; keep prompt authoring/refinement in `/prompt-generator`
|
|
196
195
|
- Default to `section_refinement_with_final_audit` orchestration for execution tasks unless user requests simplified mode
|
|
197
|
-
-
|
|
196
|
+
- Carry scope-block context into execution prompts; native Agent/Task tools have no custom intent metadata
|
|
198
197
|
- If the task is too small for an agent (single file read, quick grep), say so and just do it directly
|
|
199
198
|
- Include obstacle handling: "When encountering obstacles, do not use destructive actions as a shortcut (e.g. --no-verify, discarding unfamiliar files)" -- agents without this guidance may take irreversible shortcuts
|
|
200
199
|
- Frame agent tasks with collaborative language and include permission to express uncertainty — agents produce higher-quality output with collaborative briefing (Anthropic emotion concepts research, 2026)
|
|
@@ -26,7 +26,7 @@ Use this command:
|
|
|
26
26
|
- `target_file_globs`
|
|
27
27
|
- `comparison_basis`
|
|
28
28
|
- `completion_boundary`
|
|
29
|
-
- XML scaffold includes all sections:
|
|
29
|
+
- XML scaffold includes all sections — verified by the Stop hook at runtime; each required section tag must have both an opening and a closing tag:
|
|
30
30
|
- `<role>`
|
|
31
31
|
- `<context>`
|
|
32
32
|
- `<instructions>`
|
|
@@ -115,7 +115,7 @@ If `overall_status` is `fail`:
|
|
|
115
115
|
|
|
116
116
|
- Prompt refinement remains inside `/prompt-generator`.
|
|
117
117
|
- `/agent-prompt` is used only after explicit execution/delegation intent.
|
|
118
|
-
- Execution
|
|
118
|
+
- Execution handoffs that go through `/agent-prompt` carry scope-block context in the execution prompt as needed.
|
|
119
119
|
- Final refined prompt content is treated as artifact text during refinement and audit.
|
|
120
120
|
- Execution steps (when requested) are bound to scope block artifacts.
|
|
121
121
|
|
|
@@ -128,10 +128,8 @@ If `overall_status` is `fail`:
|
|
|
128
128
|
|
|
129
129
|
Validate fail-closed runtime gates:
|
|
130
130
|
|
|
131
|
-
1. **
|
|
132
|
-
-
|
|
133
|
-
- Deny execution when required scope anchors are missing from launch payload.
|
|
134
|
-
2. **Stop leakage/scope/checklist gate**
|
|
131
|
+
1. **Stop leakage/scope/checklist gate**
|
|
132
|
+
- **Section-presence gate (Stop)** — Block responses where the fenced XML artifact is missing any of the five required section tag pairs: `role`, `context`, `instructions`, `constraints`, `output_format`.
|
|
135
133
|
- Block responses that leak raw internal refinement object fields unless debug intent is explicit.
|
|
136
134
|
- Block responses missing deterministic checklist rows when audit output is present.
|
|
137
135
|
- Block responses using ambiguous scope phrasing in scope-bound sections.
|
|
@@ -148,10 +146,10 @@ Validate fail-closed runtime gates:
|
|
|
148
146
|
## Deterministic vs Semantic Boundary
|
|
149
147
|
|
|
150
148
|
- **Deterministic (fail-closed):**
|
|
151
|
-
- Missing
|
|
152
|
-
- Missing required scope anchors
|
|
149
|
+
- Missing required scope anchors (when Stop guard applies)
|
|
153
150
|
- Raw internal object leakage without debug intent
|
|
154
151
|
- Missing required checklist rows in audit output
|
|
152
|
+
- Missing required XML sections (`role`, `context`, `instructions`, `constraints`, `output_format`) in the fenced artifact (opening and closing tags)
|
|
155
153
|
- Ambiguous scope terms in scope-bound text
|
|
156
154
|
- Negative keywords inside fenced XML artifacts
|
|
157
155
|
- Hedging language inside fenced XML artifacts
|
|
@@ -185,6 +185,7 @@ Expand the light self-check with this internal checklist when useful:
|
|
|
185
185
|
- [ ] Emotion-informed framing is present: collaborative language, explicit success criteria, and explicit permission to express uncertainty ("say so if unsure")
|
|
186
186
|
- [ ] Constraints are surfaced upfront (proactive constraint awareness) so the model can incorporate them into its plan, and each non-obvious constraint carries its motivation
|
|
187
187
|
- [ ] Self-correction chaining is considered when the prompt must hold up over time (generate → review → refine)
|
|
188
|
+
- [ ] All five required XML sections (`<role>`, `<context>`, `<instructions>`, `<constraints>`, `<output_format>`) are present with both opening and closing tags in the fenced artifact
|
|
188
189
|
|
|
189
190
|
### 9. Deliver (orchestrator)
|
|
190
191
|
|
|
@@ -196,6 +197,8 @@ Audit: pass 15/15
|
|
|
196
197
|
|
|
197
198
|
(or `fail N/15 — …`), immediately followed by **one** fenced XML block; **send boundary** is immediately after the closing fence so the user receives a copy-ready pair (audit line + artifact) in one assistant message before the conversation continues.
|
|
198
199
|
|
|
200
|
+
**Render-survival:** When the fenced XML uses tag names that **collide with HTML5 elements** (`context`, `section`, `summary`, `details`, `header`, `footer`, `main`, `aside`, `article`, `nav`, `figure`), or when the artifact is **very large**, **write the artifact to a file** and give the user the path together with the usual one-line audit. Add a brief **section inventory** (confirming the five required sections) so the user can trust the file even if the inline fence would render poorly. Details: **TARGET_OUTPUT.md — Structural invariant E**.
|
|
201
|
+
|
|
199
202
|
### 10. Default refinement mode (subagent-internal)
|
|
200
203
|
|
|
201
204
|
For non-trivial requests, run inside the drafting subagent (use **draft-only** when the user explicitly asks for a quick draft / no refinement loop):
|
|
@@ -212,6 +215,8 @@ Required section list is immutable for this pipeline: `role`, `context`, `instru
|
|
|
212
215
|
|
|
213
216
|
**Two-tier validation — tier 2:** The `15` in `Audit: pass 15/15` counts these **compliance** rows (stable ids for hooks). Tier 1 is the **light self-check** in §8—keep the steps separate so models do not merge them.
|
|
214
217
|
|
|
218
|
+
**Runtime Stop hook:** In addition to the 15-row internal audit, the `prompt-workflow-stop-guard` Stop hook enforces **section presence** on prompt-workflow responses: any fenced Markdown XML block must include opening and closing tags for `role`, `context`, `instructions`, `constraints`, and `output_format`. Missing tags trigger a retry before the user sees a passing turn. Pair this with **Structural invariant E** in `TARGET_OUTPUT.md` so users still receive intact XML when chat renderers strip HTML-named tags.
|
|
219
|
+
|
|
215
220
|
| # | Row name |
|
|
216
221
|
|---|----------|
|
|
217
222
|
| 1 | structured_scoped_instructions |
|
|
@@ -263,7 +268,7 @@ When refining prompt text:
|
|
|
263
268
|
|
|
264
269
|
### 16. Optional execution handoff (`/agent-prompt`)
|
|
265
270
|
|
|
266
|
-
Use `/agent-prompt` only after the user explicitly asks to execute.
|
|
271
|
+
Use `/agent-prompt` only after the user explicitly asks to execute. Refinement subagents do not need `/agent-prompt` unless you are performing an execution handoff.
|
|
267
272
|
|
|
268
273
|
### 17. Context-footprint controls
|
|
269
274
|
|
|
@@ -87,6 +87,13 @@ This file is the **target output spec** for eval-driven iteration of the `prompt
|
|
|
87
87
|
- Place residual uncertainty only in `<open_question>` elements (one topic per tag) with a clear decision you need from the executor or user.
|
|
88
88
|
- Use definitive phrasing inside instructions (e.g. “Run tests in `packages/foo` with `pytest tests/`”) so each step reads like an executable checklist.
|
|
89
89
|
|
|
90
|
+
## Structural invariant E — Render-survival for XML sections
|
|
91
|
+
|
|
92
|
+
- **Problem:** Tag names used for prompt XML sections can overlap **HTML5 element names**. Chat renderers may treat those tokens as HTML and hide or alter the content between tags. High-risk examples include: `context`, `section`, `summary`, `details`, `header`, `footer`, `main`, `aside`, `article`, `nav`, `figure`. The raw assistant text may be complete while the **rendered** message looks like sections are missing (notably `<context>`).
|
|
93
|
+
- **Primary mitigation:** When the fenced XML artifact **contains any tag whose local name is on that HTML-collision list**, or when the artifact is **large enough that render truncation is likely**, the orchestrator **must write the full artifact to a file** (default: under `data/prompts/` or a path the user supplied earlier) and **paste the absolute file path** in the chat message. Pair the path with a **short section inventory** confirming all five required sections (`role`, `context`, `instructions`, `constraints`, `output_format`) are present in the file.
|
|
94
|
+
- **Fallback when file write is unavailable:** Escape the **opening angle bracket** of colliding tags (for example `<context>` — user restores `<` when pasting) or use another distinctive wrapper **documented in the same message**, so the user can recover literal XML. State explicitly that the user should restore brackets when copying into another system.
|
|
95
|
+
- **Structural safety net:** Regardless of renderer behavior, the **Stop hook section-presence gate** blocks any prompt-workflow response whose fenced XML is missing any required opening/closing section tag pair. Methodology: [Anthropic — Agent Skills: evaluation and iteration](https://platform.claude.com/docs/en/agents-and-tools/agent-skills/best-practices#evaluation-and-iteration).
|
|
96
|
+
|
|
90
97
|
## XML artifact (minimum sections)
|
|
91
98
|
|
|
92
99
|
Include at least:
|
|
@@ -133,6 +133,43 @@
|
|
|
133
133
|
"Example: 'Ensure all functions have explicit return types' passes; 'Do not leave return types implicit' fails; 'Avoid missing return types' fails",
|
|
134
134
|
"Applies to all sections inside the fenced block: <role>, <context>, <instructions>, <constraints>, <output_format>"
|
|
135
135
|
]
|
|
136
|
+
},
|
|
137
|
+
{
|
|
138
|
+
"id": 10,
|
|
139
|
+
"name": "required_sections_present_in_artifact",
|
|
140
|
+
"scenario": "Section completeness gate (render-survival)",
|
|
141
|
+
"prompt": "/prompt-generator Write a system prompt for a Python linting agent that auto-fixes code style issues in this repo",
|
|
142
|
+
"files": [],
|
|
143
|
+
"expected_behavior": [
|
|
144
|
+
"Fenced XML block contains opening and closing tags for all five required sections: role, context, instructions, constraints, output_format",
|
|
145
|
+
"Each required section contains substantive content (minimum one sentence each)",
|
|
146
|
+
"The Stop hook section-presence check passes for this output (no missing section tags)",
|
|
147
|
+
"Sections appear in order: role first, output_format last among the five required sections"
|
|
148
|
+
]
|
|
149
|
+
},
|
|
150
|
+
{
|
|
151
|
+
"id": 11,
|
|
152
|
+
"name": "section_missing_triggers_hook_block",
|
|
153
|
+
"scenario": "Section completeness gate — failure path",
|
|
154
|
+
"prompt": "Synthetic eval: assistant final message is prompt-workflow shaped (overall_status, checklist, scope anchors, runtime signals) with a fenced Markdown XML block whose body omits the entire context section (no context opening/closing tags); observer asserts Stop hook behavior and successful retry.",
|
|
155
|
+
"files": [],
|
|
156
|
+
"expected_behavior": [
|
|
157
|
+
"The Stop hook runs _check_required_xml_sections and returns a block decision naming context as a missing section",
|
|
158
|
+
"The model retry includes all five required sections with both opening and closing tags",
|
|
159
|
+
"The retry output passes the section-presence gate (empty missing list from missing_required_xml_sections)"
|
|
160
|
+
]
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
"id": 12,
|
|
164
|
+
"name": "render_survival_file_fallback",
|
|
165
|
+
"scenario": "Render-layer mitigation",
|
|
166
|
+
"prompt": "/prompt-generator Write a comprehensive agent prompt for migrating a large Prisma schema and all related API routes, with step-by-step rollout, rollback, and verification — artifact sized like the migration prompt that triggered chat render stripping.",
|
|
167
|
+
"files": [],
|
|
168
|
+
"expected_behavior": [
|
|
169
|
+
"When the artifact exceeds a size threshold or contains XML section tag names that collide with HTML5 elements (context, section, summary, details, header, footer, main, aside, article, nav, figure), the orchestrator writes the full artifact to a file under data/prompts/ or a user-specified path",
|
|
170
|
+
"The file contains the complete XML with all tags preserved as literal text",
|
|
171
|
+
"The user-facing message states the file path and briefly inventories which required sections the artifact contains"
|
|
172
|
+
]
|
|
136
173
|
}
|
|
137
174
|
]
|
|
138
175
|
}
|
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""PreToolUse gate for Task/Agent execution intent and scope anchors."""
|
|
3
|
-
|
|
4
|
-
from __future__ import annotations
|
|
5
|
-
|
|
6
|
-
import json
|
|
7
|
-
import sys
|
|
8
|
-
|
|
9
|
-
from prompt_workflow_gate_core import (
|
|
10
|
-
has_explicit_execution_intent,
|
|
11
|
-
has_structured_execution_intent,
|
|
12
|
-
missing_scope_anchors,
|
|
13
|
-
)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def _deny(reason: str) -> None:
|
|
17
|
-
response = {
|
|
18
|
-
"hookSpecificOutput": {
|
|
19
|
-
"hookEventName": "PreToolUse",
|
|
20
|
-
"permissionDecision": "deny",
|
|
21
|
-
"permissionDecisionReason": reason,
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
print(json.dumps(response))
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def main() -> None:
|
|
28
|
-
try:
|
|
29
|
-
hook_input = json.load(sys.stdin)
|
|
30
|
-
except json.JSONDecodeError:
|
|
31
|
-
sys.exit(0)
|
|
32
|
-
|
|
33
|
-
tool_name = str(hook_input.get("tool_name", ""))
|
|
34
|
-
if tool_name not in {"Task", "Agent"}:
|
|
35
|
-
sys.exit(0)
|
|
36
|
-
|
|
37
|
-
tool_input = hook_input.get("tool_input", {})
|
|
38
|
-
prompt_text = str(tool_input.get("prompt", ""))
|
|
39
|
-
description = str(tool_input.get("description", ""))
|
|
40
|
-
combined_text = f"{description}\n{prompt_text}"
|
|
41
|
-
|
|
42
|
-
if not has_structured_execution_intent(tool_input):
|
|
43
|
-
if not has_explicit_execution_intent(combined_text):
|
|
44
|
-
_deny(
|
|
45
|
-
"BLOCKED: Missing structured execution intent signal for Agent/Task launch. "
|
|
46
|
-
"Provide `tool_input.execution_intent: explicit` or "
|
|
47
|
-
"`tool_input.execution_intent_explicit: true`."
|
|
48
|
-
)
|
|
49
|
-
sys.exit(0)
|
|
50
|
-
|
|
51
|
-
missing_anchors = missing_scope_anchors(combined_text)
|
|
52
|
-
if missing_anchors:
|
|
53
|
-
_deny(
|
|
54
|
-
"BLOCKED: Scope anchors missing for prompt workflow execution: "
|
|
55
|
-
+ ", ".join(missing_anchors)
|
|
56
|
-
)
|
|
57
|
-
sys.exit(0)
|
|
58
|
-
|
|
59
|
-
sys.exit(0)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
if __name__ == "__main__":
|
|
63
|
-
main()
|
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
"""Tests for agent-execution-intent-gate hook."""
|
|
2
|
-
|
|
3
|
-
import json
|
|
4
|
-
import subprocess
|
|
5
|
-
import sys
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
SCRIPT_PATH = Path(__file__).parent / "agent-execution-intent-gate.py"
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def _run_hook(payload: dict) -> subprocess.CompletedProcess[str]:
|
|
13
|
-
return subprocess.run(
|
|
14
|
-
[sys.executable, str(SCRIPT_PATH)],
|
|
15
|
-
input=json.dumps(payload),
|
|
16
|
-
text=True,
|
|
17
|
-
capture_output=True,
|
|
18
|
-
check=False,
|
|
19
|
-
)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def test_denies_task_without_explicit_intent_marker() -> None:
|
|
23
|
-
payload = {
|
|
24
|
-
"tool_name": "Task",
|
|
25
|
-
"tool_input": {"prompt": "run the workflow", "description": "delegate"},
|
|
26
|
-
}
|
|
27
|
-
result = _run_hook(payload)
|
|
28
|
-
response = json.loads(result.stdout)
|
|
29
|
-
assert response["hookSpecificOutput"]["permissionDecision"] == "deny"
|
|
30
|
-
assert "structured execution intent signal" in response["hookSpecificOutput"]["permissionDecisionReason"]
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def test_allows_phrase_marker_with_scope_anchors() -> None:
|
|
34
|
-
payload = {
|
|
35
|
-
"tool_name": "Task",
|
|
36
|
-
"tool_input": {
|
|
37
|
-
"prompt": (
|
|
38
|
-
"execution_intent: explicit\n"
|
|
39
|
-
"target_local_roots\n"
|
|
40
|
-
"target_canonical_roots\n"
|
|
41
|
-
"target_file_globs\n"
|
|
42
|
-
"comparison_basis\n"
|
|
43
|
-
"completion_boundary\n"
|
|
44
|
-
),
|
|
45
|
-
"description": "explicit delegation intent",
|
|
46
|
-
},
|
|
47
|
-
}
|
|
48
|
-
result = _run_hook(payload)
|
|
49
|
-
assert result.stdout.strip() == ""
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def test_denies_when_scope_anchors_missing() -> None:
|
|
53
|
-
payload = {
|
|
54
|
-
"tool_name": "Agent",
|
|
55
|
-
"tool_input": {
|
|
56
|
-
"execution_intent": "explicit",
|
|
57
|
-
"prompt": "target_local_roots only",
|
|
58
|
-
"description": "delegate",
|
|
59
|
-
},
|
|
60
|
-
}
|
|
61
|
-
result = _run_hook(payload)
|
|
62
|
-
response = json.loads(result.stdout)
|
|
63
|
-
assert response["hookSpecificOutput"]["permissionDecision"] == "deny"
|
|
64
|
-
assert "Scope anchors missing" in response["hookSpecificOutput"]["permissionDecisionReason"]
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def test_allows_when_intent_and_scope_anchors_present() -> None:
|
|
68
|
-
payload = {
|
|
69
|
-
"tool_name": "Task",
|
|
70
|
-
"tool_input": {
|
|
71
|
-
"execution_intent_explicit": True,
|
|
72
|
-
"description": "delegate",
|
|
73
|
-
"prompt": (
|
|
74
|
-
"target_local_roots\n"
|
|
75
|
-
"target_canonical_roots\n"
|
|
76
|
-
"target_file_globs\n"
|
|
77
|
-
"comparison_basis\n"
|
|
78
|
-
"completion_boundary\n"
|
|
79
|
-
),
|
|
80
|
-
},
|
|
81
|
-
}
|
|
82
|
-
result = _run_hook(payload)
|
|
83
|
-
assert result.stdout.strip() == ""
|
|
84
|
-
|