claude-dev-env 1.17.0 → 1.17.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/install.mjs +2 -1
- package/hooks/HOOK_SPECS_PROMPT_WORKFLOW.md +8 -6
- package/hooks/blocking/prompt_workflow_validate.py +218 -0
- package/hooks/blocking/test_prompt_workflow_validate.py +339 -0
- package/hooks/hooks.json +0 -5
- package/package.json +1 -1
- package/skills/prompt-generator/ARCHITECTURE.md +18 -0
- package/skills/prompt-generator/REFERENCE.md +16 -12
- package/skills/prompt-generator/SKILL.md +62 -46
- package/skills/prompt-generator/TARGET_OUTPUT.md +41 -21
- package/skills/prompt-generator/evals/prompt-generator.json +28 -8
- package/hooks/blocking/prompt-workflow-stop-guard.py +0 -217
- package/hooks/blocking/test_prompt_workflow_stop_guard.py +0 -261
|
@@ -13,8 +13,10 @@
|
|
|
13
13
|
"Discovery tool calls (Glob/Grep) execute before any AskUserQuestion",
|
|
14
14
|
"All questions delivered via AskUserQuestion — zero questions in direct chat text",
|
|
15
15
|
"AskUserQuestion contains 2-4 questions, each with 2-4 options, recommended option first",
|
|
16
|
-
"
|
|
17
|
-
"
|
|
16
|
+
"After internal drafting finishes, one assistant turn shows ### Outcome preview with bullets only: what executing the generated prompt will produce, primary inputs or tools, done-when, short sample (TARGET_OUTPUT.md)",
|
|
17
|
+
"That same turn uses AskUserQuestion (2-4 options): recommended option first = user confirms the preview matches their intent and proceeds to the final handoff; two options offer scope or emphasis shifts grounded in discovery; one option collects free-text refinements and triggers another drafting pass (at most three such preview rounds unless the user raises the cap in chat)",
|
|
18
|
+
"Final response: Audit line, one Markdown code fence tagged xml with the full prompt, then ## Outcome digest after the closing fence",
|
|
19
|
+
"No second outer ```xml fence in the digest (samples use four spaces or tilde fences only)",
|
|
18
20
|
"Fenced block contains <role>, <background>, <instructions>, <constraints>, <output_format>",
|
|
19
21
|
"Prompt generation delegated to a subagent (Agent tool call visible in the flow)"
|
|
20
22
|
]
|
|
@@ -34,7 +36,8 @@
|
|
|
34
36
|
"No redundant discovery tool calls for information already in conversation",
|
|
35
37
|
"Handoff prompt is self-contained — a new session can resume without prior context",
|
|
36
38
|
"Prior decisions preserved in the handoff, not lost or paraphrased away",
|
|
37
|
-
"
|
|
39
|
+
"After internal drafting finishes, ### Outcome preview bullets plus AskUserQuestion as in eval id 1 (confirm match as recommended first option; two contextual alternates; free-text refine option; preview loop cap)",
|
|
40
|
+
"Final output: 1-liner audit + fenced XML prompt + ## Outcome digest after the fence"
|
|
38
41
|
]
|
|
39
42
|
},
|
|
40
43
|
{
|
|
@@ -48,7 +51,8 @@
|
|
|
48
51
|
"Ambiguities surfaced as specific options, not open-ended questions",
|
|
49
52
|
"Discovery tool calls verify references from input (shared_utils, config patterns)",
|
|
50
53
|
"ALL requirements from unstructured input captured (timeouts, selectors, config extraction, TDD, code rules, test safety) — none dropped",
|
|
51
|
-
"
|
|
54
|
+
"After internal drafting finishes, ### Outcome preview bullets plus AskUserQuestion as in eval id 1 (confirm match as recommended first option; two contextual alternates; free-text refine option; preview loop cap)",
|
|
55
|
+
"Final output: 1-liner audit + fenced XML prompt + ## Outcome digest after the fence"
|
|
52
56
|
]
|
|
53
57
|
},
|
|
54
58
|
{
|
|
@@ -58,7 +62,8 @@
|
|
|
58
62
|
"prompt": "[Preceded by 80+ turns: failed git push, hook debugging, unrelated Samsung portal discussion, Python tracebacks, Midjourney tangent, 15+ empty Grep results] /prompt-generator Write a system prompt for a code review agent that checks for security vulnerabilities",
|
|
59
63
|
"files": [],
|
|
60
64
|
"expected_behavior": [
|
|
61
|
-
"Output format
|
|
65
|
+
"Output format matches Scenario 1: 1-liner audit + fenced XML prompt + ## Outcome digest after the fence",
|
|
66
|
+
"After internal drafting finishes, ### Outcome preview bullets plus AskUserQuestion as in eval id 1 (confirm match as recommended first option; two contextual alternates; free-text refine option; preview loop cap)",
|
|
62
67
|
"Prompt content about code review and security — zero contamination from prior noise",
|
|
63
68
|
"No references to prior errors, tangents, or unrelated tool calls in the prompt",
|
|
64
69
|
"XML structure complete and well-formed — no truncation from context pressure",
|
|
@@ -74,8 +79,8 @@
|
|
|
74
79
|
"expected_behavior": [
|
|
75
80
|
"No tool_use blocks appear after the first fence marker of the canonical prompt artifact",
|
|
76
81
|
"All Glob/Grep discovery calls precede the AskUserQuestion",
|
|
77
|
-
"
|
|
78
|
-
"Review the last successful Audit + fenced xml pair; blocked retry attempts preserved by
|
|
82
|
+
"AskUserQuestion interactions precede the subagent; Outcome preview AskUserQuestion precedes the final Audit line and xml fence",
|
|
83
|
+
"Review the last successful Audit + fenced xml pair; blocked retry attempts preserved by exported conversation logs do not count as additional delivered artifacts"
|
|
79
84
|
]
|
|
80
85
|
},
|
|
81
86
|
{
|
|
@@ -85,7 +90,7 @@
|
|
|
85
90
|
"prompt": "/prompt-generator Write a detailed agent-harness prompt for a TDD bug-fix workflow that traces a routing error across 5+ files, with state management for multi-window execution and structured test tracking",
|
|
86
91
|
"files": [],
|
|
87
92
|
"expected_behavior": [
|
|
88
|
-
"The canonical prompt artifact has one opening xml fence and one matching closing fence;
|
|
93
|
+
"The canonical prompt artifact has one opening xml fence and one matching closing fence; exported conversation logs are normalized to that same boundary before review",
|
|
89
94
|
"Every XML tag properly opened and closed",
|
|
90
95
|
"No truncation at numbered-list bullets (the Issue #41 failure mode)",
|
|
91
96
|
"No mid-sentence cuts or incomplete sections",
|
|
@@ -182,6 +187,21 @@
|
|
|
182
187
|
"missing_required_xml_sections sees closing tags for role, background, instructions, constraints, output_format when those appear after nested fences",
|
|
183
188
|
"SKILL.md §7 states ordered authoring steps for <illustrations>: four-space-indented sample lines, then tilde fences, then a complete triple-backtick pair when required"
|
|
184
189
|
]
|
|
190
|
+
},
|
|
191
|
+
{
|
|
192
|
+
"id": 14,
|
|
193
|
+
"name": "outcome_preview_gate_and_digest_placement",
|
|
194
|
+
"scenario": "Outcome preview + post-fence digest (refinement contract)",
|
|
195
|
+
"prompt": "/prompt-generator Write a short user-task prompt for triaging GitHub issues by label in this repo",
|
|
196
|
+
"files": [],
|
|
197
|
+
"expected_behavior": [
|
|
198
|
+
"Subagent returns final XML plus preview summary fields for orchestrator use",
|
|
199
|
+
"### Outcome preview markdown block precedes AskUserQuestion; bullets cover executor output, inputs or tools, done when, sample excerpt (~20 lines max)",
|
|
200
|
+
"AskUserQuestion: recommended first option labels accepting the described outcome and proceeding (SKILL.md may phrase this as 'Ship this outcome profile' or equivalent); plus two contextual alternates and a free-text refinement path; at most three preview rounds unless user extends cap in chat",
|
|
201
|
+
"At most three preview refinement loops unless user raises cap in chat",
|
|
202
|
+
"Final handoff order: Audit line, single ```xml fence, ## Outcome digest, then optional hook validation block (defined in SKILL.md Terminology) after digest",
|
|
203
|
+
"extract_fenced_xml_content returns only the XML body (digest uses no second ```xml fence)"
|
|
204
|
+
]
|
|
185
205
|
}
|
|
186
206
|
]
|
|
187
207
|
}
|
|
@@ -1,217 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""Stop hook gate for prompt-workflow leakage and deterministic audit coverage.
|
|
3
|
-
|
|
4
|
-
When every workflow gate passes, the fenced ``xml`` artifact body is copied to the
|
|
5
|
-
system clipboard via :mod:`prompt_workflow_clipboard` (tkinter, then pyperclip).
|
|
6
|
-
Set ``PROMPT_WORKFLOW_SKIP_CLIPBOARD=1`` to disable (tests, CI, headless).
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
from __future__ import annotations
|
|
10
|
-
|
|
11
|
-
import datetime
|
|
12
|
-
import json
|
|
13
|
-
import sys
|
|
14
|
-
from collections.abc import Callable
|
|
15
|
-
from pathlib import Path
|
|
16
|
-
|
|
17
|
-
from prompt_workflow_clipboard import copy_text_to_system_clipboard
|
|
18
|
-
from prompt_workflow_gate_core import (
|
|
19
|
-
extract_fenced_xml_content,
|
|
20
|
-
find_ambiguous_scope_terms,
|
|
21
|
-
find_negative_keywords_in_fenced_xml,
|
|
22
|
-
has_debug_intent,
|
|
23
|
-
has_checklist_container,
|
|
24
|
-
has_internal_object_leak,
|
|
25
|
-
is_prompt_workflow_response,
|
|
26
|
-
missing_context_control_signals,
|
|
27
|
-
missing_checklist_rows,
|
|
28
|
-
missing_required_xml_sections,
|
|
29
|
-
missing_scope_anchors,
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
PROMPT_GATE_LOG_PATH: Path = Path.home() / ".claude" / "logs" / "prompt-gate.log"
|
|
33
|
-
USER_FACING_PREFIX: str = "[prompt-gate]"
|
|
34
|
-
|
|
35
|
-
def _extract_user_context(hook_input: dict) -> str:
|
|
36
|
-
candidates = (
|
|
37
|
-
"last_user_message",
|
|
38
|
-
"user_message",
|
|
39
|
-
"user_prompt",
|
|
40
|
-
"prompt",
|
|
41
|
-
"input",
|
|
42
|
-
)
|
|
43
|
-
for key in candidates:
|
|
44
|
-
value = hook_input.get(key)
|
|
45
|
-
if isinstance(value, str) and value.strip():
|
|
46
|
-
return value
|
|
47
|
-
return ""
|
|
48
|
-
|
|
49
|
-
def _append_diagnostic_to_log(brief_label: str, full_reason: str) -> None:
|
|
50
|
-
try:
|
|
51
|
-
PROMPT_GATE_LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
52
|
-
timestamp_iso = datetime.datetime.now().isoformat()
|
|
53
|
-
log_entry = f"{timestamp_iso}\t{brief_label}\t{full_reason}\n"
|
|
54
|
-
with PROMPT_GATE_LOG_PATH.open("a", encoding="utf-8") as log_handle:
|
|
55
|
-
log_handle.write(log_entry)
|
|
56
|
-
except OSError:
|
|
57
|
-
pass
|
|
58
|
-
|
|
59
|
-
def _build_block(brief_label: str, full_reason: str) -> dict:
|
|
60
|
-
_append_diagnostic_to_log(brief_label, full_reason)
|
|
61
|
-
return {
|
|
62
|
-
"decision": "block",
|
|
63
|
-
"reason": full_reason,
|
|
64
|
-
"systemMessage": f"{USER_FACING_PREFIX} {brief_label}",
|
|
65
|
-
"suppressOutput": True,
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
def _check_internal_object_leak(
|
|
69
|
-
assistant_message: str,
|
|
70
|
-
debug_requested: bool,
|
|
71
|
-
) -> dict | None:
|
|
72
|
-
if not has_internal_object_leak(assistant_message) or debug_requested:
|
|
73
|
-
return None
|
|
74
|
-
return _build_block(
|
|
75
|
-
brief_label="retrying: sanitize audit format",
|
|
76
|
-
full_reason=(
|
|
77
|
-
"PROMPT-WORKFLOW GATE: Raw internal refinement object leakage detected. "
|
|
78
|
-
"Return sanitized user-facing output unless explicit debug intent is present."
|
|
79
|
-
),
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
def _check_checklist_container(assistant_message: str) -> dict | None:
|
|
83
|
-
if has_checklist_container(assistant_message):
|
|
84
|
-
return None
|
|
85
|
-
return _build_block(
|
|
86
|
-
brief_label="retrying: add checklist",
|
|
87
|
-
full_reason=(
|
|
88
|
-
"PROMPT-WORKFLOW GATE: Deterministic checklist container missing. "
|
|
89
|
-
"Include `checklist_results` with all required rows."
|
|
90
|
-
),
|
|
91
|
-
)
|
|
92
|
-
|
|
93
|
-
def _check_missing_checklist_rows(assistant_message: str) -> dict | None:
|
|
94
|
-
if not has_checklist_container(assistant_message):
|
|
95
|
-
return None
|
|
96
|
-
missing_rows = missing_checklist_rows(assistant_message)
|
|
97
|
-
if not missing_rows:
|
|
98
|
-
return None
|
|
99
|
-
return _build_block(
|
|
100
|
-
brief_label="retrying: complete checklist",
|
|
101
|
-
full_reason=(
|
|
102
|
-
"PROMPT-WORKFLOW GATE: Deterministic checklist rows missing: "
|
|
103
|
-
+ ", ".join(missing_rows)
|
|
104
|
-
),
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
def _check_missing_scope_anchors(assistant_message: str) -> dict | None:
|
|
108
|
-
missing_anchors = missing_scope_anchors(assistant_message)
|
|
109
|
-
if not missing_anchors:
|
|
110
|
-
return None
|
|
111
|
-
return _build_block(
|
|
112
|
-
brief_label="retrying: add scope anchors",
|
|
113
|
-
full_reason=(
|
|
114
|
-
"PROMPT-WORKFLOW GATE: Required scope anchors missing: "
|
|
115
|
-
+ ", ".join(missing_anchors)
|
|
116
|
-
),
|
|
117
|
-
)
|
|
118
|
-
|
|
119
|
-
def _check_missing_context_signals(assistant_message: str) -> dict | None:
|
|
120
|
-
missing_signals = missing_context_control_signals(assistant_message)
|
|
121
|
-
if not missing_signals:
|
|
122
|
-
return None
|
|
123
|
-
return _build_block(
|
|
124
|
-
brief_label="retrying: add runtime signals",
|
|
125
|
-
full_reason=(
|
|
126
|
-
"PROMPT-WORKFLOW GATE: Runtime context-control preamble missing. "
|
|
127
|
-
"Include the two required lines from prompt-workflow-context-controls "
|
|
128
|
-
"(minimal instruction layer and on-demand skill loading)."
|
|
129
|
-
),
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
def _check_ambiguous_scope(assistant_message: str) -> dict | None:
|
|
133
|
-
ambiguous_terms = find_ambiguous_scope_terms(assistant_message)
|
|
134
|
-
if not ambiguous_terms:
|
|
135
|
-
return None
|
|
136
|
-
return _build_block(
|
|
137
|
-
brief_label="retrying: rephrase scope refs",
|
|
138
|
-
full_reason=(
|
|
139
|
-
"PROMPT-WORKFLOW GATE: Ambiguous scope phrasing detected: "
|
|
140
|
-
+ ", ".join(ambiguous_terms)
|
|
141
|
-
),
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
def _check_negative_keywords_in_artifact(assistant_message: str) -> dict | None:
|
|
145
|
-
violations = find_negative_keywords_in_fenced_xml(assistant_message)
|
|
146
|
-
if not violations:
|
|
147
|
-
return None
|
|
148
|
-
violation_descriptions = [
|
|
149
|
-
f" line {each_violation['line_number']}: \"{each_violation['keyword']}\" in: {each_violation['line_text']}"
|
|
150
|
-
for each_violation in violations
|
|
151
|
-
]
|
|
152
|
-
return _build_block(
|
|
153
|
-
brief_label="retrying: rephrase negative keywords in artifact",
|
|
154
|
-
full_reason=(
|
|
155
|
-
"PROMPT-WORKFLOW GATE: Banned negative keywords found inside fenced XML artifact. "
|
|
156
|
-
"Rephrase as positive directives (what TO do, not what to avoid):\n"
|
|
157
|
-
+ "\n".join(violation_descriptions)
|
|
158
|
-
),
|
|
159
|
-
)
|
|
160
|
-
|
|
161
|
-
def _check_required_xml_sections(assistant_message: str) -> dict | None:
|
|
162
|
-
missing_sections = missing_required_xml_sections(assistant_message)
|
|
163
|
-
if not missing_sections:
|
|
164
|
-
return None
|
|
165
|
-
return _build_block(
|
|
166
|
-
brief_label="retrying: include all required XML sections",
|
|
167
|
-
full_reason=(
|
|
168
|
-
"PROMPT-WORKFLOW GATE: Fenced XML artifact missing required sections: "
|
|
169
|
-
+ ", ".join(missing_sections)
|
|
170
|
-
),
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
def _evaluate_workflow_gates(assistant_message: str) -> dict | None:
|
|
174
|
-
if not is_prompt_workflow_response(assistant_message):
|
|
175
|
-
return None
|
|
176
|
-
workflow_gate_checks: tuple[Callable[[str], dict | None], ...] = (
|
|
177
|
-
_check_required_xml_sections,
|
|
178
|
-
_check_missing_checklist_rows,
|
|
179
|
-
_check_missing_scope_anchors,
|
|
180
|
-
_check_missing_context_signals,
|
|
181
|
-
_check_ambiguous_scope,
|
|
182
|
-
_check_negative_keywords_in_artifact,
|
|
183
|
-
)
|
|
184
|
-
for check in workflow_gate_checks:
|
|
185
|
-
block = check(assistant_message)
|
|
186
|
-
if block is not None:
|
|
187
|
-
return block
|
|
188
|
-
return None
|
|
189
|
-
|
|
190
|
-
def main() -> None:
|
|
191
|
-
try:
|
|
192
|
-
hook_input = json.load(sys.stdin)
|
|
193
|
-
except json.JSONDecodeError:
|
|
194
|
-
sys.exit(0)
|
|
195
|
-
|
|
196
|
-
assistant_message = str(hook_input.get("last_assistant_message", ""))
|
|
197
|
-
if not assistant_message.strip():
|
|
198
|
-
sys.exit(0)
|
|
199
|
-
|
|
200
|
-
user_context = _extract_user_context(hook_input)
|
|
201
|
-
debug_requested = has_debug_intent(user_context)
|
|
202
|
-
|
|
203
|
-
block = _check_internal_object_leak(assistant_message, debug_requested)
|
|
204
|
-
if block is None:
|
|
205
|
-
block = _evaluate_workflow_gates(assistant_message)
|
|
206
|
-
|
|
207
|
-
if block is not None:
|
|
208
|
-
sys.stdout.write(json.dumps(block) + "\n")
|
|
209
|
-
elif is_prompt_workflow_response(assistant_message):
|
|
210
|
-
artifact_text = extract_fenced_xml_content(assistant_message).strip()
|
|
211
|
-
if artifact_text:
|
|
212
|
-
copy_text_to_system_clipboard(artifact_text)
|
|
213
|
-
|
|
214
|
-
sys.exit(0)
|
|
215
|
-
|
|
216
|
-
if __name__ == "__main__":
|
|
217
|
-
main()
|
|
@@ -1,261 +0,0 @@
|
|
|
1
|
-
"""Tests for prompt-workflow-stop-guard hook."""
|
|
2
|
-
|
|
3
|
-
import json
|
|
4
|
-
import subprocess
|
|
5
|
-
import sys
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
|
|
8
|
-
import pytest
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
SCRIPT_PATH = Path(__file__).parent / "prompt-workflow-stop-guard.py"
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@pytest.fixture(autouse=True)
|
|
15
|
-
def _disable_prompt_workflow_clipboard_in_subprocess(
|
|
16
|
-
monkeypatch: pytest.MonkeyPatch,
|
|
17
|
-
) -> None:
|
|
18
|
-
"""Subprocess hook inherits env; clipboard would be flaky in CI."""
|
|
19
|
-
monkeypatch.setenv("PROMPT_WORKFLOW_SKIP_CLIPBOARD", "1")
|
|
20
|
-
|
|
21
|
-
def _run_hook(payload: dict) -> subprocess.CompletedProcess[str]:
|
|
22
|
-
return subprocess.run(
|
|
23
|
-
[sys.executable, str(SCRIPT_PATH)],
|
|
24
|
-
input=json.dumps(payload),
|
|
25
|
-
text=True,
|
|
26
|
-
capture_output=True,
|
|
27
|
-
check=False,
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
def _full_checklist_rows() -> str:
|
|
31
|
-
return (
|
|
32
|
-
"checklist_results:\n"
|
|
33
|
-
"- structured_scoped_instructions\n"
|
|
34
|
-
"- sequential_steps_present\n"
|
|
35
|
-
"- positive_framing\n"
|
|
36
|
-
"- acceptance_criteria_defined\n"
|
|
37
|
-
"- safety_reversibility_language\n"
|
|
38
|
-
"- reversible_action_and_safety_check_guidance\n"
|
|
39
|
-
"- concrete_output_contract\n"
|
|
40
|
-
"- scope_boundary_present\n"
|
|
41
|
-
"- explicit_scope_anchors_present\n"
|
|
42
|
-
"- all_instructions_artifact_bound\n"
|
|
43
|
-
"- scope_terms_explicit_and_anchored\n"
|
|
44
|
-
"- completion_boundary_measurable\n"
|
|
45
|
-
"- citation_grounding_policy_present\n"
|
|
46
|
-
"- source_priority_rules_present\n"
|
|
47
|
-
"- artifact_language_confidence\n"
|
|
48
|
-
)
|
|
49
|
-
|
|
50
|
-
def test_blocks_internal_object_leak_without_debug_intent() -> None:
|
|
51
|
-
payload = {
|
|
52
|
-
"last_assistant_message": '{"pipeline_mode": "internal_section_refinement_with_final_audit"}',
|
|
53
|
-
"last_user_message": "just return the final prompt",
|
|
54
|
-
}
|
|
55
|
-
result = _run_hook(payload)
|
|
56
|
-
response = json.loads(result.stdout)
|
|
57
|
-
assert response["decision"] == "block"
|
|
58
|
-
assert "Raw internal refinement object leakage" in response["reason"]
|
|
59
|
-
|
|
60
|
-
def test_allows_internal_object_with_debug_intent() -> None:
|
|
61
|
-
payload = {
|
|
62
|
-
"last_assistant_message": '{"pipeline_mode": "internal_section_refinement_with_final_audit"}',
|
|
63
|
-
"last_user_message": "debug: show internal pipeline object",
|
|
64
|
-
}
|
|
65
|
-
result = _run_hook(payload)
|
|
66
|
-
assert result.stdout.strip() == ""
|
|
67
|
-
|
|
68
|
-
def test_blocks_missing_checklist_rows() -> None:
|
|
69
|
-
payload = {
|
|
70
|
-
"last_assistant_message": "overall_status: pass\nchecklist_results: structured_scoped_instructions",
|
|
71
|
-
}
|
|
72
|
-
result = _run_hook(payload)
|
|
73
|
-
response = json.loads(result.stdout)
|
|
74
|
-
assert response["decision"] == "block"
|
|
75
|
-
assert "Deterministic checklist rows missing" in response["reason"]
|
|
76
|
-
|
|
77
|
-
def test_allows_prompt_workflow_output_without_checklist_container() -> None:
|
|
78
|
-
payload = {
|
|
79
|
-
"last_assistant_message": (
|
|
80
|
-
"overall_status: pass\n"
|
|
81
|
-
"target_local_roots\n"
|
|
82
|
-
"target_canonical_roots\n"
|
|
83
|
-
"target_file_globs\n"
|
|
84
|
-
"comparison_basis\n"
|
|
85
|
-
"completion_boundary\n"
|
|
86
|
-
"base_minimal_instruction_layer: true\n"
|
|
87
|
-
"on_demand_skill_loading: true\n"
|
|
88
|
-
),
|
|
89
|
-
}
|
|
90
|
-
result = _run_hook(payload)
|
|
91
|
-
assert result.stdout.strip() == ""
|
|
92
|
-
|
|
93
|
-
def test_blocks_missing_context_control_signals() -> None:
|
|
94
|
-
payload = {
|
|
95
|
-
"last_assistant_message": (
|
|
96
|
-
"overall_status: pass\n"
|
|
97
|
-
+ _full_checklist_rows()
|
|
98
|
-
+ "target_local_roots\n"
|
|
99
|
-
+ "target_canonical_roots\n"
|
|
100
|
-
+ "target_file_globs\n"
|
|
101
|
-
+ "comparison_basis\n"
|
|
102
|
-
+ "completion_boundary\n"
|
|
103
|
-
+ "base_minimal_instruction_layer: true\n"
|
|
104
|
-
),
|
|
105
|
-
}
|
|
106
|
-
result = _run_hook(payload)
|
|
107
|
-
response = json.loads(result.stdout)
|
|
108
|
-
assert response["decision"] == "block"
|
|
109
|
-
assert "Runtime context-control preamble missing" in response["reason"]
|
|
110
|
-
assert "on-demand skill loading" in response["reason"]
|
|
111
|
-
|
|
112
|
-
def test_blocks_ambiguous_scope_phrasing() -> None:
|
|
113
|
-
payload = {
|
|
114
|
-
"last_assistant_message": (
|
|
115
|
-
"overall_status: pass\n"
|
|
116
|
-
+ _full_checklist_rows()
|
|
117
|
-
+ "scope block includes target_local_roots target_canonical_roots "
|
|
118
|
-
+ "target_file_globs comparison_basis completion_boundary "
|
|
119
|
-
+ "base_minimal_instruction_layer: true\n"
|
|
120
|
-
+ "on_demand_skill_loading: true\n"
|
|
121
|
-
+ "and applies to this session."
|
|
122
|
-
),
|
|
123
|
-
}
|
|
124
|
-
result = _run_hook(payload)
|
|
125
|
-
response = json.loads(result.stdout)
|
|
126
|
-
assert response["decision"] == "block"
|
|
127
|
-
assert "Ambiguous scope phrasing detected" in response["reason"]
|
|
128
|
-
|
|
129
|
-
def _wrap_five_section_scaffold(inner_body: str) -> str:
|
|
130
|
-
return (
|
|
131
|
-
"<role>Test role sentence one.</role>\n"
|
|
132
|
-
"<background>Test background sentence one.</background>\n"
|
|
133
|
-
f"{inner_body}\n"
|
|
134
|
-
"<constraints>Test constraints sentence one.</constraints>\n"
|
|
135
|
-
"<output_format>Test output format sentence one.</output_format>\n"
|
|
136
|
-
)
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
def _build_prompt_workflow_message_with_fenced_xml(fenced_xml_body: str) -> str:
|
|
140
|
-
return (
|
|
141
|
-
"Audit: pass 15/15\n"
|
|
142
|
-
"```xml\n"
|
|
143
|
-
+ fenced_xml_body
|
|
144
|
-
+ "\n```\n"
|
|
145
|
-
"overall_status: pass\n"
|
|
146
|
-
+ _full_checklist_rows()
|
|
147
|
-
+ "target_local_roots\n"
|
|
148
|
-
"target_canonical_roots\n"
|
|
149
|
-
"target_file_globs\n"
|
|
150
|
-
"comparison_basis\n"
|
|
151
|
-
"completion_boundary\n"
|
|
152
|
-
"base_minimal_instruction_layer: true\n"
|
|
153
|
-
"on_demand_skill_loading: true\n"
|
|
154
|
-
)
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
def test_allows_positive_phrasing_inside_fenced_xml() -> None:
|
|
158
|
-
fenced_content = _wrap_five_section_scaffold(
|
|
159
|
-
"<instructions>Ensure all functions have explicit return types.</instructions>"
|
|
160
|
-
)
|
|
161
|
-
payload = {
|
|
162
|
-
"last_assistant_message": _build_prompt_workflow_message_with_fenced_xml(fenced_content),
|
|
163
|
-
}
|
|
164
|
-
result = _run_hook(payload)
|
|
165
|
-
assert result.stdout.strip() == ""
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
BANNED_KEYWORD_TEST_CASES: list[tuple[str, str]] = [
|
|
169
|
-
("do_not", "<instructions>Do not leave return types implicit.</instructions>"),
|
|
170
|
-
("avoid", "<instructions>Avoid missing return types.</instructions>"),
|
|
171
|
-
("never", "<constraints>Never store credentials in plain text.</constraints>"),
|
|
172
|
-
("without", "<instructions>Deploy without running tests first.</instructions>"),
|
|
173
|
-
("prevent", "<constraints>Prevent unauthorized access to the API.</constraints>"),
|
|
174
|
-
("reject", "<constraints>Reject all unsigned commits.</constraints>"),
|
|
175
|
-
("cannot", "<constraints>The API cannot accept unauthenticated requests.</constraints>"),
|
|
176
|
-
("unless", "<constraints>Skip the build step unless the user explicitly approves.</constraints>"),
|
|
177
|
-
("must_not", "<constraints>The script must not produce duplicates.</constraints>"),
|
|
178
|
-
("must_never", "<constraints>You must never store credentials in environment variables.</constraints>"),
|
|
179
|
-
("instead_of", "<instructions>Use explicit types instead of implicit ones.</instructions>"),
|
|
180
|
-
("rather_than", "<constraints>Prefer explicit types rather than inferred ones.</constraints>"),
|
|
181
|
-
("as_opposed_to", "<instructions>Use Grid as opposed to floats for layout.</instructions>"),
|
|
182
|
-
]
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
@pytest.mark.parametrize(
|
|
186
|
-
("banned_pattern_name", "fenced_xml_content"),
|
|
187
|
-
BANNED_KEYWORD_TEST_CASES,
|
|
188
|
-
ids=[each_case[0] for each_case in BANNED_KEYWORD_TEST_CASES],
|
|
189
|
-
)
|
|
190
|
-
def test_blocks_banned_pattern_inside_fenced_xml(
|
|
191
|
-
banned_pattern_name: str,
|
|
192
|
-
fenced_xml_content: str,
|
|
193
|
-
) -> None:
|
|
194
|
-
payload = {
|
|
195
|
-
"last_assistant_message": _build_prompt_workflow_message_with_fenced_xml(
|
|
196
|
-
_wrap_five_section_scaffold(fenced_xml_content)
|
|
197
|
-
),
|
|
198
|
-
}
|
|
199
|
-
result = _run_hook(payload)
|
|
200
|
-
response = json.loads(result.stdout)
|
|
201
|
-
assert response["decision"] == "block"
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
def test_permits_negative_keywords_outside_fenced_xml() -> None:
|
|
205
|
-
fenced_inner = _wrap_five_section_scaffold(
|
|
206
|
-
"<instructions>Ensure all functions have explicit return types.</instructions>"
|
|
207
|
-
)
|
|
208
|
-
message = (
|
|
209
|
-
"Audit: pass 15/15\n"
|
|
210
|
-
"Do not skip the audit line.\n"
|
|
211
|
-
"```xml\n"
|
|
212
|
-
+ fenced_inner
|
|
213
|
-
+ "\n```\n"
|
|
214
|
-
"overall_status: pass\n"
|
|
215
|
-
+ _full_checklist_rows()
|
|
216
|
-
+ "target_local_roots\n"
|
|
217
|
-
"target_canonical_roots\n"
|
|
218
|
-
"target_file_globs\n"
|
|
219
|
-
"comparison_basis\n"
|
|
220
|
-
"completion_boundary\n"
|
|
221
|
-
"base_minimal_instruction_layer: true\n"
|
|
222
|
-
"on_demand_skill_loading: true\n"
|
|
223
|
-
)
|
|
224
|
-
payload = {"last_assistant_message": message}
|
|
225
|
-
result = _run_hook(payload)
|
|
226
|
-
assert result.stdout.strip() == ""
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
def test_blocks_when_fenced_xml_missing_background_section() -> None:
|
|
230
|
-
fenced_body = (
|
|
231
|
-
"<role>Test role sentence one.</role>\n"
|
|
232
|
-
"<instructions>Test instructions sentence one.</instructions>\n"
|
|
233
|
-
"<constraints>Test constraints sentence one.</constraints>\n"
|
|
234
|
-
"<output_format>Test output format sentence one.</output_format>\n"
|
|
235
|
-
)
|
|
236
|
-
payload = {
|
|
237
|
-
"last_assistant_message": _build_prompt_workflow_message_with_fenced_xml(fenced_body),
|
|
238
|
-
}
|
|
239
|
-
result = _run_hook(payload)
|
|
240
|
-
response = json.loads(result.stdout)
|
|
241
|
-
assert response["decision"] == "block"
|
|
242
|
-
assert "background" in response["reason"]
|
|
243
|
-
assert "include all required XML sections" in response["systemMessage"]
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
def test_allows_fully_structured_prompt_workflow_output() -> None:
|
|
247
|
-
payload = {
|
|
248
|
-
"last_assistant_message": (
|
|
249
|
-
"overall_status: pass\n"
|
|
250
|
-
+ _full_checklist_rows()
|
|
251
|
-
+ "target_local_roots\n"
|
|
252
|
-
+ "target_canonical_roots\n"
|
|
253
|
-
+ "target_file_globs\n"
|
|
254
|
-
+ "comparison_basis\n"
|
|
255
|
-
+ "completion_boundary\n"
|
|
256
|
-
+ "base_minimal_instruction_layer: true\n"
|
|
257
|
-
+ "on_demand_skill_loading: true\n"
|
|
258
|
-
),
|
|
259
|
-
}
|
|
260
|
-
result = _run_hook(payload)
|
|
261
|
-
assert result.stdout.strip() == ""
|