claude-dev-env 1.7.0 → 1.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/deep-research.md +170 -0
- package/bin/install.mjs +140 -105
- package/hooks/HOOK_SPECS_PROMPT_WORKFLOW.md +68 -0
- package/hooks/blocking/agent-execution-intent-gate.py +83 -0
- package/hooks/blocking/prompt-workflow-stop-guard.py +131 -0
- package/hooks/blocking/prompt_workflow_gate_core.py +161 -0
- package/hooks/blocking/test_agent_execution_intent_gate.py +106 -0
- package/hooks/blocking/test_context_control_policy_files.py +27 -0
- package/hooks/blocking/test_prompt_workflow_gate_core.py +68 -0
- package/hooks/blocking/test_prompt_workflow_stop_guard.py +144 -0
- package/hooks/hooks.json +10 -0
- package/package.json +1 -6
- package/rules/prompt-workflow-context-controls.md +48 -0
- package/skills/agent-prompt/SKILL.md +200 -0
- package/skills/deep-research/SKILL.md +80 -0
- package/skills/dream/SKILL.md +118 -0
- package/skills/prompt-generator/REFERENCE.md +150 -0
- package/skills/prompt-generator/REFINEMENT_PIPELINE_RUNBOOK.md +174 -0
- package/skills/prompt-generator/SKILL.md +333 -0
- package/skills/research-mode/SKILL.md +53 -0
- package/skills/session-log/SKILL.md +237 -0
- package/skills/session-tidy/SKILL.md +181 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Stop hook gate for prompt-workflow leakage and deterministic audit coverage."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import sys
|
|
8
|
+
|
|
9
|
+
from prompt_workflow_gate_core import (
|
|
10
|
+
find_ambiguous_scope_terms,
|
|
11
|
+
has_debug_intent,
|
|
12
|
+
has_checklist_container,
|
|
13
|
+
has_internal_object_leak,
|
|
14
|
+
is_prompt_workflow_response,
|
|
15
|
+
missing_context_control_signals,
|
|
16
|
+
missing_checklist_rows,
|
|
17
|
+
missing_scope_anchors,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _extract_user_context(hook_input: dict) -> str:
|
|
22
|
+
candidates = (
|
|
23
|
+
"last_user_message",
|
|
24
|
+
"user_message",
|
|
25
|
+
"user_prompt",
|
|
26
|
+
"prompt",
|
|
27
|
+
"input",
|
|
28
|
+
)
|
|
29
|
+
for key in candidates:
|
|
30
|
+
value = hook_input.get(key)
|
|
31
|
+
if isinstance(value, str) and value.strip():
|
|
32
|
+
return value
|
|
33
|
+
return ""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _build_block(reason: str) -> dict:
|
|
37
|
+
return {
|
|
38
|
+
"decision": "block",
|
|
39
|
+
"reason": reason,
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def main() -> None:
|
|
44
|
+
try:
|
|
45
|
+
hook_input = json.load(sys.stdin)
|
|
46
|
+
except json.JSONDecodeError:
|
|
47
|
+
sys.exit(0)
|
|
48
|
+
|
|
49
|
+
assistant_message = str(hook_input.get("last_assistant_message", ""))
|
|
50
|
+
if not assistant_message.strip():
|
|
51
|
+
sys.exit(0)
|
|
52
|
+
|
|
53
|
+
user_context = _extract_user_context(hook_input)
|
|
54
|
+
debug_requested = has_debug_intent(user_context)
|
|
55
|
+
|
|
56
|
+
if has_internal_object_leak(assistant_message) and not debug_requested:
|
|
57
|
+
print(
|
|
58
|
+
json.dumps(
|
|
59
|
+
_build_block(
|
|
60
|
+
"PROMPT-WORKFLOW GATE: Raw internal refinement object leakage detected. "
|
|
61
|
+
"Return sanitized user-facing output unless explicit debug intent is present."
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
)
|
|
65
|
+
sys.exit(0)
|
|
66
|
+
|
|
67
|
+
if is_prompt_workflow_response(assistant_message):
|
|
68
|
+
if not has_checklist_container(assistant_message):
|
|
69
|
+
print(
|
|
70
|
+
json.dumps(
|
|
71
|
+
_build_block(
|
|
72
|
+
"PROMPT-WORKFLOW GATE: Deterministic checklist container missing. "
|
|
73
|
+
"Include `checklist_results` with all required rows."
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
)
|
|
77
|
+
sys.exit(0)
|
|
78
|
+
|
|
79
|
+
missing_rows = missing_checklist_rows(assistant_message)
|
|
80
|
+
if missing_rows:
|
|
81
|
+
print(
|
|
82
|
+
json.dumps(
|
|
83
|
+
_build_block(
|
|
84
|
+
"PROMPT-WORKFLOW GATE: Deterministic checklist rows missing: "
|
|
85
|
+
+ ", ".join(missing_rows)
|
|
86
|
+
)
|
|
87
|
+
)
|
|
88
|
+
)
|
|
89
|
+
sys.exit(0)
|
|
90
|
+
|
|
91
|
+
missing_anchors = missing_scope_anchors(assistant_message)
|
|
92
|
+
if missing_anchors:
|
|
93
|
+
print(
|
|
94
|
+
json.dumps(
|
|
95
|
+
_build_block(
|
|
96
|
+
"PROMPT-WORKFLOW GATE: Required scope anchors missing: "
|
|
97
|
+
+ ", ".join(missing_anchors)
|
|
98
|
+
)
|
|
99
|
+
)
|
|
100
|
+
)
|
|
101
|
+
sys.exit(0)
|
|
102
|
+
|
|
103
|
+
missing_context_signals = missing_context_control_signals(assistant_message)
|
|
104
|
+
if missing_context_signals:
|
|
105
|
+
print(
|
|
106
|
+
json.dumps(
|
|
107
|
+
_build_block(
|
|
108
|
+
"PROMPT-WORKFLOW GATE: Runtime context-control signals missing: "
|
|
109
|
+
+ ", ".join(missing_context_signals)
|
|
110
|
+
)
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
sys.exit(0)
|
|
114
|
+
|
|
115
|
+
ambiguous_terms = find_ambiguous_scope_terms(assistant_message)
|
|
116
|
+
if ambiguous_terms:
|
|
117
|
+
print(
|
|
118
|
+
json.dumps(
|
|
119
|
+
_build_block(
|
|
120
|
+
"PROMPT-WORKFLOW GATE: Ambiguous scope phrasing detected: "
|
|
121
|
+
+ ", ".join(ambiguous_terms)
|
|
122
|
+
)
|
|
123
|
+
)
|
|
124
|
+
)
|
|
125
|
+
sys.exit(0)
|
|
126
|
+
|
|
127
|
+
sys.exit(0)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
if __name__ == "__main__":
|
|
131
|
+
main()
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Shared deterministic checks for prompt workflow hooks."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import re
|
|
7
|
+
from typing import Iterable
|
|
8
|
+
|
|
9
|
+
REQUIRED_SCOPE_ANCHORS: tuple[str, ...] = (
|
|
10
|
+
"target_local_roots",
|
|
11
|
+
"target_canonical_roots",
|
|
12
|
+
"target_file_globs",
|
|
13
|
+
"comparison_basis",
|
|
14
|
+
"completion_boundary",
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
REQUIRED_CHECKLIST_ROWS: tuple[str, ...] = (
|
|
18
|
+
"structured_scoped_instructions",
|
|
19
|
+
"sequential_steps_present",
|
|
20
|
+
"positive_framing",
|
|
21
|
+
"acceptance_criteria_defined",
|
|
22
|
+
"safety_reversibility_language",
|
|
23
|
+
"no_destructive_shortcuts_guidance",
|
|
24
|
+
"concrete_output_contract",
|
|
25
|
+
"scope_boundary_present",
|
|
26
|
+
"explicit_scope_anchors_present",
|
|
27
|
+
"all_instructions_artifact_bound",
|
|
28
|
+
"no_ambiguous_scope_terms",
|
|
29
|
+
"completion_boundary_measurable",
|
|
30
|
+
"citation_grounding_policy_present",
|
|
31
|
+
"source_priority_rules_present",
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
REQUIRED_CONTEXT_CONTROL_SIGNALS: tuple[str, ...] = (
|
|
35
|
+
"base_minimal_instruction_layer: true",
|
|
36
|
+
"on_demand_skill_loading: true",
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
AMBIGUOUS_SCOPE_TERMS: tuple[str, ...] = (
|
|
40
|
+
"this session",
|
|
41
|
+
"current files",
|
|
42
|
+
"here",
|
|
43
|
+
"above",
|
|
44
|
+
"as needed",
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
INTERNAL_OBJECT_MARKERS: tuple[str, ...] = (
|
|
48
|
+
'"pipeline_mode": "internal_section_refinement_with_final_audit"',
|
|
49
|
+
'"scope_block": {',
|
|
50
|
+
'"required_sections": [',
|
|
51
|
+
'"section_output_contract": {',
|
|
52
|
+
'"merge_output_contract": {',
|
|
53
|
+
'"audit_output_contract": {',
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
EXPLICIT_EXECUTION_MARKERS: tuple[str, ...] = (
|
|
57
|
+
"/agent-prompt",
|
|
58
|
+
"execution_intent: explicit",
|
|
59
|
+
"execution_intent_explicit: true",
|
|
60
|
+
"explicit execution intent",
|
|
61
|
+
"explicit delegation intent",
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
PROMPT_WORKFLOW_RESPONSE_MARKERS: tuple[str, ...] = (
|
|
65
|
+
"checklist_results",
|
|
66
|
+
"overall_status",
|
|
67
|
+
"scope anchors",
|
|
68
|
+
"target_local_roots",
|
|
69
|
+
"target_canonical_roots",
|
|
70
|
+
"target_file_globs",
|
|
71
|
+
"comparison_basis",
|
|
72
|
+
"completion_boundary",
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
DEBUG_INTENT_MARKERS: tuple[str, ...] = (
|
|
76
|
+
"debug",
|
|
77
|
+
"show internal",
|
|
78
|
+
"raw internal object",
|
|
79
|
+
"pipeline object",
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _contains_any_marker(text: str, markers: Iterable[str]) -> bool:
|
|
84
|
+
lower_text = text.lower()
|
|
85
|
+
return any(marker.lower() in lower_text for marker in markers)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def has_explicit_execution_intent(text: str) -> bool:
|
|
89
|
+
return _contains_any_marker(text, EXPLICIT_EXECUTION_MARKERS)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def has_structured_execution_intent(tool_input: object) -> bool:
|
|
93
|
+
if not isinstance(tool_input, dict):
|
|
94
|
+
return False
|
|
95
|
+
|
|
96
|
+
explicit_flag = tool_input.get("execution_intent_explicit")
|
|
97
|
+
if isinstance(explicit_flag, bool):
|
|
98
|
+
return explicit_flag
|
|
99
|
+
|
|
100
|
+
intent_value = tool_input.get("execution_intent")
|
|
101
|
+
if isinstance(intent_value, str):
|
|
102
|
+
normalized = intent_value.strip().lower()
|
|
103
|
+
return normalized in {"explicit", "execute", "delegation", "delegate"}
|
|
104
|
+
if isinstance(intent_value, bool):
|
|
105
|
+
return intent_value
|
|
106
|
+
|
|
107
|
+
metadata = tool_input.get("metadata")
|
|
108
|
+
if isinstance(metadata, dict):
|
|
109
|
+
metadata_intent = metadata.get("execution_intent")
|
|
110
|
+
if isinstance(metadata_intent, str):
|
|
111
|
+
return metadata_intent.strip().lower() in {"explicit", "execute", "delegate"}
|
|
112
|
+
if isinstance(metadata_intent, bool):
|
|
113
|
+
return metadata_intent
|
|
114
|
+
|
|
115
|
+
return False
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def has_debug_intent(text: str) -> bool:
|
|
119
|
+
return _contains_any_marker(text, DEBUG_INTENT_MARKERS)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def has_internal_object_leak(text: str) -> bool:
|
|
123
|
+
return _contains_any_marker(text, INTERNAL_OBJECT_MARKERS)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def missing_scope_anchors(text: str) -> list[str]:
|
|
127
|
+
return [anchor for anchor in REQUIRED_SCOPE_ANCHORS if anchor not in text]
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def find_ambiguous_scope_terms(text: str) -> list[str]:
|
|
131
|
+
if "scope" not in text.lower():
|
|
132
|
+
return []
|
|
133
|
+
matches: list[str] = []
|
|
134
|
+
lower_text = text.lower()
|
|
135
|
+
for term in AMBIGUOUS_SCOPE_TERMS:
|
|
136
|
+
if re.search(rf"\b{re.escape(term)}\b", lower_text):
|
|
137
|
+
matches.append(term)
|
|
138
|
+
return matches
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def has_checklist_container(text: str) -> bool:
|
|
142
|
+
lower_text = text.lower()
|
|
143
|
+
return "checklist_results" in lower_text or "checklist:" in lower_text
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def missing_checklist_rows(text: str) -> list[str]:
|
|
147
|
+
return [row for row in REQUIRED_CHECKLIST_ROWS if row not in text]
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def is_prompt_workflow_response(text: str) -> bool:
|
|
151
|
+
lower_text = text.lower()
|
|
152
|
+
matched_markers = [
|
|
153
|
+
marker for marker in PROMPT_WORKFLOW_RESPONSE_MARKERS if marker in lower_text
|
|
154
|
+
]
|
|
155
|
+
return len(matched_markers) >= 2
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def missing_context_control_signals(text: str) -> list[str]:
|
|
159
|
+
return [
|
|
160
|
+
signal for signal in REQUIRED_CONTEXT_CONTROL_SIGNALS if signal not in text.lower()
|
|
161
|
+
]
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Tests for agent-execution-intent-gate hook."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import subprocess
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
SCRIPT_PATH = Path(__file__).parent / "agent-execution-intent-gate.py"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _run_hook(payload: dict) -> subprocess.CompletedProcess[str]:
|
|
14
|
+
return subprocess.run(
|
|
15
|
+
[sys.executable, str(SCRIPT_PATH)],
|
|
16
|
+
input=json.dumps(payload),
|
|
17
|
+
text=True,
|
|
18
|
+
capture_output=True,
|
|
19
|
+
check=False,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_denies_task_without_explicit_intent_marker() -> None:
|
|
24
|
+
payload = {
|
|
25
|
+
"tool_name": "Task",
|
|
26
|
+
"tool_input": {"prompt": "run the workflow", "description": "delegate"},
|
|
27
|
+
}
|
|
28
|
+
result = _run_hook(payload)
|
|
29
|
+
response = json.loads(result.stdout)
|
|
30
|
+
assert response["hookSpecificOutput"]["permissionDecision"] == "deny"
|
|
31
|
+
assert "structured execution intent signal" in response["hookSpecificOutput"]["permissionDecisionReason"]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_denies_phrase_marker_without_structured_intent_contract() -> None:
|
|
35
|
+
payload = {
|
|
36
|
+
"tool_name": "Task",
|
|
37
|
+
"tool_input": {
|
|
38
|
+
"prompt": "execution_intent: explicit and delegate now",
|
|
39
|
+
"description": "explicit delegation intent",
|
|
40
|
+
},
|
|
41
|
+
}
|
|
42
|
+
result = _run_hook(payload)
|
|
43
|
+
response = json.loads(result.stdout)
|
|
44
|
+
assert response["hookSpecificOutput"]["permissionDecision"] == "deny"
|
|
45
|
+
assert "Missing structured execution intent signal" in response["hookSpecificOutput"]["permissionDecisionReason"]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def test_denies_when_scope_anchors_missing() -> None:
|
|
49
|
+
payload = {
|
|
50
|
+
"tool_name": "Agent",
|
|
51
|
+
"tool_input": {
|
|
52
|
+
"execution_intent": "explicit",
|
|
53
|
+
"prompt": "target_local_roots only",
|
|
54
|
+
"description": "delegate",
|
|
55
|
+
},
|
|
56
|
+
}
|
|
57
|
+
result = _run_hook(payload)
|
|
58
|
+
response = json.loads(result.stdout)
|
|
59
|
+
assert response["hookSpecificOutput"]["permissionDecision"] == "deny"
|
|
60
|
+
assert "Scope anchors missing" in response["hookSpecificOutput"]["permissionDecisionReason"]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def test_allows_when_intent_and_scope_anchors_present() -> None:
|
|
64
|
+
payload = {
|
|
65
|
+
"tool_name": "Task",
|
|
66
|
+
"tool_input": {
|
|
67
|
+
"execution_intent_explicit": True,
|
|
68
|
+
"description": "delegate",
|
|
69
|
+
"prompt": (
|
|
70
|
+
"target_local_roots\n"
|
|
71
|
+
"target_canonical_roots\n"
|
|
72
|
+
"target_file_globs\n"
|
|
73
|
+
"comparison_basis\n"
|
|
74
|
+
"completion_boundary\n"
|
|
75
|
+
),
|
|
76
|
+
},
|
|
77
|
+
}
|
|
78
|
+
result = _run_hook(payload)
|
|
79
|
+
assert result.stdout.strip() == ""
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def test_text_intent_fallback_is_logged_and_allowed_when_enabled() -> None:
|
|
83
|
+
payload = {
|
|
84
|
+
"tool_name": "Task",
|
|
85
|
+
"tool_input": {
|
|
86
|
+
"description": "delegate now",
|
|
87
|
+
"prompt": (
|
|
88
|
+
"execution_intent: explicit\n"
|
|
89
|
+
"target_local_roots\n"
|
|
90
|
+
"target_canonical_roots\n"
|
|
91
|
+
"target_file_globs\n"
|
|
92
|
+
"comparison_basis\n"
|
|
93
|
+
"completion_boundary\n"
|
|
94
|
+
),
|
|
95
|
+
},
|
|
96
|
+
}
|
|
97
|
+
result = subprocess.run(
|
|
98
|
+
[sys.executable, str(SCRIPT_PATH)],
|
|
99
|
+
input=json.dumps(payload),
|
|
100
|
+
text=True,
|
|
101
|
+
capture_output=True,
|
|
102
|
+
check=False,
|
|
103
|
+
env={**os.environ, "PROMPT_WORKFLOW_ALLOW_TEXT_INTENT_FALLBACK": "1"},
|
|
104
|
+
)
|
|
105
|
+
assert result.stdout.strip() == ""
|
|
106
|
+
assert "compatibility text-intent fallback used" in result.stderr
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Validation fixtures for context-control policy artifacts."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
ROOT = Path(__file__).resolve().parents[2]
|
|
7
|
+
RULE_PATH = ROOT / "rules" / "prompt-workflow-context-controls.md"
|
|
8
|
+
HOOK_SPEC_PATH = ROOT / "hooks" / "HOOK_SPECS_PROMPT_WORKFLOW.md"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_context_control_rule_exists_with_required_sections() -> None:
|
|
12
|
+
text = RULE_PATH.read_text(encoding="utf-8")
|
|
13
|
+
assert "Base Minimal Instruction Layer" in text
|
|
14
|
+
assert "On-Demand Skill Loading" in text
|
|
15
|
+
assert "Compaction and Caching Strategy" in text
|
|
16
|
+
assert "Runtime Enforcement Signals" in text
|
|
17
|
+
assert "base_minimal_instruction_layer: true" in text
|
|
18
|
+
assert "on_demand_skill_loading: true" in text
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_hook_spec_exists_with_required_gates() -> None:
|
|
22
|
+
text = HOOK_SPEC_PATH.read_text(encoding="utf-8")
|
|
23
|
+
assert "Execution Intent (PreToolUse Task/Agent)" in text
|
|
24
|
+
assert "Leakage + Checklist + Scope (Stop)" in text
|
|
25
|
+
assert "Required Deterministic Checklist Rows" in text
|
|
26
|
+
assert "structured execution intent contract" in text
|
|
27
|
+
assert "Runtime Context-Control Signals" in text
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Unit tests for shared prompt workflow gate logic."""
|
|
2
|
+
|
|
3
|
+
from prompt_workflow_gate_core import (
|
|
4
|
+
find_ambiguous_scope_terms,
|
|
5
|
+
has_checklist_container,
|
|
6
|
+
has_explicit_execution_intent,
|
|
7
|
+
has_structured_execution_intent,
|
|
8
|
+
has_internal_object_leak,
|
|
9
|
+
is_prompt_workflow_response,
|
|
10
|
+
missing_context_control_signals,
|
|
11
|
+
missing_checklist_rows,
|
|
12
|
+
missing_scope_anchors,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_execution_intent_marker_detected() -> None:
|
|
17
|
+
assert has_explicit_execution_intent("execution_intent: explicit")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_structured_execution_intent_detected_from_contract_field() -> None:
|
|
21
|
+
assert has_structured_execution_intent({"execution_intent": "explicit"})
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_structured_execution_intent_detected_from_boolean_flag() -> None:
|
|
25
|
+
assert has_structured_execution_intent({"execution_intent_explicit": True})
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_internal_object_leak_detected() -> None:
|
|
29
|
+
text = '{"pipeline_mode": "internal_section_refinement_with_final_audit"}'
|
|
30
|
+
assert has_internal_object_leak(text)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_missing_scope_anchors_returns_expected_rows() -> None:
|
|
34
|
+
text = "target_local_roots only."
|
|
35
|
+
missing = missing_scope_anchors(text)
|
|
36
|
+
assert "target_canonical_roots" in missing
|
|
37
|
+
assert "completion_boundary" in missing
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_missing_checklist_rows_detected() -> None:
|
|
41
|
+
text = "checklist_results: structured_scoped_instructions only"
|
|
42
|
+
missing = missing_checklist_rows(text)
|
|
43
|
+
assert "completion_boundary_measurable" in missing
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_checklist_container_detection() -> None:
|
|
47
|
+
assert has_checklist_container("checklist_results:\n- structured_scoped_instructions")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_prompt_workflow_response_detection() -> None:
|
|
51
|
+
message = (
|
|
52
|
+
"overall_status: pass\n"
|
|
53
|
+
"target_local_roots: /repo\n"
|
|
54
|
+
"comparison_basis: current behavior vs deterministic guarantees\n"
|
|
55
|
+
)
|
|
56
|
+
assert is_prompt_workflow_response(message)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_missing_context_control_signals_detected() -> None:
|
|
60
|
+
missing = missing_context_control_signals("base_minimal_instruction_layer: true")
|
|
61
|
+
assert "on_demand_skill_loading: true" in missing
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def test_ambiguous_scope_terms_detected() -> None:
|
|
65
|
+
text = "Scope applies to this session and current files."
|
|
66
|
+
terms = find_ambiguous_scope_terms(text)
|
|
67
|
+
assert "this session" in terms
|
|
68
|
+
assert "current files" in terms
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""Tests for prompt-workflow-stop-guard hook."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import subprocess
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
SCRIPT_PATH = Path(__file__).parent / "prompt-workflow-stop-guard.py"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _run_hook(payload: dict) -> subprocess.CompletedProcess[str]:
|
|
13
|
+
return subprocess.run(
|
|
14
|
+
[sys.executable, str(SCRIPT_PATH)],
|
|
15
|
+
input=json.dumps(payload),
|
|
16
|
+
text=True,
|
|
17
|
+
capture_output=True,
|
|
18
|
+
check=False,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _full_checklist_rows() -> str:
|
|
23
|
+
return (
|
|
24
|
+
"checklist_results:\n"
|
|
25
|
+
"- structured_scoped_instructions\n"
|
|
26
|
+
"- sequential_steps_present\n"
|
|
27
|
+
"- positive_framing\n"
|
|
28
|
+
"- acceptance_criteria_defined\n"
|
|
29
|
+
"- safety_reversibility_language\n"
|
|
30
|
+
"- no_destructive_shortcuts_guidance\n"
|
|
31
|
+
"- concrete_output_contract\n"
|
|
32
|
+
"- scope_boundary_present\n"
|
|
33
|
+
"- explicit_scope_anchors_present\n"
|
|
34
|
+
"- all_instructions_artifact_bound\n"
|
|
35
|
+
"- no_ambiguous_scope_terms\n"
|
|
36
|
+
"- completion_boundary_measurable\n"
|
|
37
|
+
"- citation_grounding_policy_present\n"
|
|
38
|
+
"- source_priority_rules_present\n"
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_blocks_internal_object_leak_without_debug_intent() -> None:
|
|
43
|
+
payload = {
|
|
44
|
+
"last_assistant_message": '{"pipeline_mode": "internal_section_refinement_with_final_audit"}',
|
|
45
|
+
"last_user_message": "just return the final prompt",
|
|
46
|
+
}
|
|
47
|
+
result = _run_hook(payload)
|
|
48
|
+
response = json.loads(result.stdout)
|
|
49
|
+
assert response["decision"] == "block"
|
|
50
|
+
assert "Raw internal refinement object leakage" in response["reason"]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_allows_internal_object_with_debug_intent() -> None:
|
|
54
|
+
payload = {
|
|
55
|
+
"last_assistant_message": '{"pipeline_mode": "internal_section_refinement_with_final_audit"}',
|
|
56
|
+
"last_user_message": "debug: show internal pipeline object",
|
|
57
|
+
}
|
|
58
|
+
result = _run_hook(payload)
|
|
59
|
+
assert result.stdout.strip() == ""
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_blocks_missing_checklist_rows() -> None:
|
|
63
|
+
payload = {
|
|
64
|
+
"last_assistant_message": "overall_status: pass\nchecklist_results: structured_scoped_instructions",
|
|
65
|
+
}
|
|
66
|
+
result = _run_hook(payload)
|
|
67
|
+
response = json.loads(result.stdout)
|
|
68
|
+
assert response["decision"] == "block"
|
|
69
|
+
assert "Deterministic checklist rows missing" in response["reason"]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def test_blocks_missing_checklist_container_for_prompt_workflow_output() -> None:
|
|
73
|
+
payload = {
|
|
74
|
+
"last_assistant_message": (
|
|
75
|
+
"overall_status: pass\n"
|
|
76
|
+
"target_local_roots\n"
|
|
77
|
+
"target_canonical_roots\n"
|
|
78
|
+
"target_file_globs\n"
|
|
79
|
+
"comparison_basis\n"
|
|
80
|
+
"completion_boundary\n"
|
|
81
|
+
"base_minimal_instruction_layer: true\n"
|
|
82
|
+
"on_demand_skill_loading: true\n"
|
|
83
|
+
),
|
|
84
|
+
}
|
|
85
|
+
result = _run_hook(payload)
|
|
86
|
+
response = json.loads(result.stdout)
|
|
87
|
+
assert response["decision"] == "block"
|
|
88
|
+
assert "Deterministic checklist container missing" in response["reason"]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def test_blocks_missing_context_control_signals() -> None:
|
|
92
|
+
payload = {
|
|
93
|
+
"last_assistant_message": (
|
|
94
|
+
"overall_status: pass\n"
|
|
95
|
+
+ _full_checklist_rows()
|
|
96
|
+
+ "target_local_roots\n"
|
|
97
|
+
+ "target_canonical_roots\n"
|
|
98
|
+
+ "target_file_globs\n"
|
|
99
|
+
+ "comparison_basis\n"
|
|
100
|
+
+ "completion_boundary\n"
|
|
101
|
+
+ "base_minimal_instruction_layer: true\n"
|
|
102
|
+
),
|
|
103
|
+
}
|
|
104
|
+
result = _run_hook(payload)
|
|
105
|
+
response = json.loads(result.stdout)
|
|
106
|
+
assert response["decision"] == "block"
|
|
107
|
+
assert "Runtime context-control signals missing" in response["reason"]
|
|
108
|
+
assert "on_demand_skill_loading: true" in response["reason"]
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def test_blocks_ambiguous_scope_phrasing() -> None:
|
|
112
|
+
payload = {
|
|
113
|
+
"last_assistant_message": (
|
|
114
|
+
"overall_status: pass\n"
|
|
115
|
+
+ _full_checklist_rows()
|
|
116
|
+
+ "scope block includes target_local_roots target_canonical_roots "
|
|
117
|
+
+ "target_file_globs comparison_basis completion_boundary "
|
|
118
|
+
+ "base_minimal_instruction_layer: true\n"
|
|
119
|
+
+ "on_demand_skill_loading: true\n"
|
|
120
|
+
+ "and applies to this session."
|
|
121
|
+
),
|
|
122
|
+
}
|
|
123
|
+
result = _run_hook(payload)
|
|
124
|
+
response = json.loads(result.stdout)
|
|
125
|
+
assert response["decision"] == "block"
|
|
126
|
+
assert "Ambiguous scope phrasing detected" in response["reason"]
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def test_allows_fully_structured_prompt_workflow_output() -> None:
|
|
130
|
+
payload = {
|
|
131
|
+
"last_assistant_message": (
|
|
132
|
+
"overall_status: pass\n"
|
|
133
|
+
+ _full_checklist_rows()
|
|
134
|
+
+ "target_local_roots\n"
|
|
135
|
+
+ "target_canonical_roots\n"
|
|
136
|
+
+ "target_file_globs\n"
|
|
137
|
+
+ "comparison_basis\n"
|
|
138
|
+
+ "completion_boundary\n"
|
|
139
|
+
+ "base_minimal_instruction_layer: true\n"
|
|
140
|
+
+ "on_demand_skill_loading: true\n"
|
|
141
|
+
),
|
|
142
|
+
}
|
|
143
|
+
result = _run_hook(payload)
|
|
144
|
+
assert result.stdout.strip() == ""
|
package/hooks/hooks.json
CHANGED
|
@@ -94,6 +94,11 @@
|
|
|
94
94
|
"type": "command",
|
|
95
95
|
"command": "python3 ${CLAUDE_PLUGIN_ROOT}/hooks/blocking/parallel-task-blocker.py",
|
|
96
96
|
"timeout": 10
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
"type": "command",
|
|
100
|
+
"command": "python3 ${CLAUDE_PLUGIN_ROOT}/hooks/blocking/agent-execution-intent-gate.py",
|
|
101
|
+
"timeout": 10
|
|
97
102
|
}
|
|
98
103
|
]
|
|
99
104
|
},
|
|
@@ -150,6 +155,11 @@
|
|
|
150
155
|
"type": "command",
|
|
151
156
|
"command": "python3 ${CLAUDE_PLUGIN_ROOT}/hooks/blocking/hedging-language-blocker.py",
|
|
152
157
|
"timeout": 10
|
|
158
|
+
},
|
|
159
|
+
{
|
|
160
|
+
"type": "command",
|
|
161
|
+
"command": "python3 ${CLAUDE_PLUGIN_ROOT}/hooks/blocking/prompt-workflow-stop-guard.py",
|
|
162
|
+
"timeout": 10
|
|
153
163
|
}
|
|
154
164
|
]
|
|
155
165
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-dev-env",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.8.1",
|
|
4
4
|
"description": "Claude Code development standards — rules, hooks, agents, commands, and skills",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -15,11 +15,6 @@
|
|
|
15
15
|
"skills/",
|
|
16
16
|
"hooks/"
|
|
17
17
|
],
|
|
18
|
-
"dependencies": {
|
|
19
|
-
"claude-journal": "^1.3.0",
|
|
20
|
-
"claude-deep-research": "^1.0.0",
|
|
21
|
-
"claude-prompt-tools": "^1.0.0"
|
|
22
|
-
},
|
|
23
18
|
"keywords": [
|
|
24
19
|
"claude-code",
|
|
25
20
|
"plugin",
|