claude-dev-env 1.17.5 → 1.19.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/hooks/blocking/content-search-to-zoekt-redirector.py +6 -2
- package/hooks/blocking/content_search_zoekt_block_payload.py +9 -5
- package/hooks/blocking/content_search_zoekt_redirect_guidance.py +7 -1
- package/hooks/blocking/test_content_search_to_zoekt_redirector_integration.py +8 -1
- package/hooks/blocking/test_content_search_to_zoekt_redirector_unit.py +7 -2
- package/hooks/hooks.json +15 -0
- package/package.json +1 -1
- package/hooks/HOOK_SPECS_PROMPT_WORKFLOW.md +0 -64
- package/hooks/blocking/prompt_workflow_clipboard.py +0 -63
- package/hooks/blocking/prompt_workflow_gate_config.py +0 -113
- package/hooks/blocking/prompt_workflow_gate_core.py +0 -289
- package/hooks/blocking/prompt_workflow_validate.py +0 -218
- package/hooks/blocking/test_prompt_workflow_clipboard.py +0 -54
- package/hooks/blocking/test_prompt_workflow_gate_core.py +0 -195
- package/hooks/blocking/test_prompt_workflow_validate.py +0 -339
- package/rules/prompt-workflow-context-controls.md +0 -48
- package/skills/agent-prompt/SKILL.md +0 -199
- package/skills/prompt-generator/ARCHITECTURE.md +0 -18
- package/skills/prompt-generator/REFERENCE.md +0 -254
- package/skills/prompt-generator/REFINEMENT_PIPELINE_RUNBOOK.md +0 -177
- package/skills/prompt-generator/SKILL.md +0 -354
- package/skills/prompt-generator/TARGET_OUTPUT.md +0 -133
- package/skills/prompt-generator/evals/prompt-generator.json +0 -207
- package/skills/prompt-generator/templates/skill-from-ground-up.md +0 -104
- package/skills/prompt-generator/templates/skill-refinement-package.md +0 -109
|
@@ -1,339 +0,0 @@
|
|
|
1
|
-
"""Tests for prompt_workflow_validate module (shared validator + CLI entry point)."""
|
|
2
|
-
|
|
3
|
-
import subprocess
|
|
4
|
-
import sys
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
import pytest
|
|
8
|
-
|
|
9
|
-
from prompt_workflow_validate import ValidationResult, validate_prompt_workflow
|
|
10
|
-
|
|
11
|
-
VALIDATOR_MODULE_PATH = Path(__file__).parent / "prompt_workflow_validate.py"
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def _full_checklist_rows() -> str:
|
|
15
|
-
return (
|
|
16
|
-
"checklist_results:\n"
|
|
17
|
-
"- structured_scoped_instructions\n"
|
|
18
|
-
"- sequential_steps_present\n"
|
|
19
|
-
"- positive_framing\n"
|
|
20
|
-
"- acceptance_criteria_defined\n"
|
|
21
|
-
"- safety_reversibility_language\n"
|
|
22
|
-
"- reversible_action_and_safety_check_guidance\n"
|
|
23
|
-
"- concrete_output_contract\n"
|
|
24
|
-
"- scope_boundary_present\n"
|
|
25
|
-
"- explicit_scope_anchors_present\n"
|
|
26
|
-
"- all_instructions_artifact_bound\n"
|
|
27
|
-
"- scope_terms_explicit_and_anchored\n"
|
|
28
|
-
"- completion_boundary_measurable\n"
|
|
29
|
-
"- citation_grounding_policy_present\n"
|
|
30
|
-
"- source_priority_rules_present\n"
|
|
31
|
-
"- artifact_language_confidence\n"
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def _wrap_five_section_scaffold(inner_body: str) -> str:
|
|
36
|
-
has_instructions = "<instructions>" in inner_body
|
|
37
|
-
has_constraints = "<constraints>" in inner_body
|
|
38
|
-
instructions_section = (
|
|
39
|
-
"" if has_instructions else "<instructions>Test instructions sentence one.</instructions>\n"
|
|
40
|
-
)
|
|
41
|
-
constraints_section = (
|
|
42
|
-
"" if has_constraints else "<constraints>Test constraints sentence one.</constraints>\n"
|
|
43
|
-
)
|
|
44
|
-
return (
|
|
45
|
-
"<role>Test role sentence one.</role>\n"
|
|
46
|
-
"<background>Test background sentence one.</background>\n"
|
|
47
|
-
f"{instructions_section}"
|
|
48
|
-
f"{inner_body}\n"
|
|
49
|
-
f"{constraints_section}"
|
|
50
|
-
"<output_format>Test output format sentence one.</output_format>\n"
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def _build_prompt_workflow_message_with_fenced_xml(fenced_xml_body: str) -> str:
|
|
55
|
-
return (
|
|
56
|
-
"Audit: pass 15/15\n"
|
|
57
|
-
"```xml\n" + fenced_xml_body + "\n```\n"
|
|
58
|
-
"overall_status: pass\n" + _full_checklist_rows() + "target_local_roots\n"
|
|
59
|
-
"target_canonical_roots\n"
|
|
60
|
-
"target_file_globs\n"
|
|
61
|
-
"comparison_basis\n"
|
|
62
|
-
"completion_boundary\n"
|
|
63
|
-
"base_minimal_instruction_layer: true\n"
|
|
64
|
-
"on_demand_skill_loading: true\n"
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
class TestValidatePromptWorkflowFunction:
|
|
69
|
-
"""Tests that exercise the shared validate_prompt_workflow function directly."""
|
|
70
|
-
|
|
71
|
-
def test_allowed_complete_message_with_fenced_xml(self) -> None:
|
|
72
|
-
fenced_content = _wrap_five_section_scaffold(
|
|
73
|
-
"<instructions>Ensure all functions have explicit return types.</instructions>"
|
|
74
|
-
)
|
|
75
|
-
message = _build_prompt_workflow_message_with_fenced_xml(fenced_content)
|
|
76
|
-
validation_result = validate_prompt_workflow(message)
|
|
77
|
-
assert validation_result.allowed is True
|
|
78
|
-
assert validation_result.reasons == ()
|
|
79
|
-
|
|
80
|
-
def test_blocked_missing_context_control_lines(self) -> None:
|
|
81
|
-
message = (
|
|
82
|
-
"overall_status: pass\n"
|
|
83
|
-
+ _full_checklist_rows()
|
|
84
|
-
+ "target_local_roots\n"
|
|
85
|
-
+ "target_canonical_roots\n"
|
|
86
|
-
+ "target_file_globs\n"
|
|
87
|
-
+ "comparison_basis\n"
|
|
88
|
-
+ "completion_boundary\n"
|
|
89
|
-
)
|
|
90
|
-
validation_result = validate_prompt_workflow(message)
|
|
91
|
-
assert validation_result.allowed is False
|
|
92
|
-
assert "missing_context_signals" in validation_result.reason_codes
|
|
93
|
-
assert any(
|
|
94
|
-
"context-control" in each_message
|
|
95
|
-
for each_message in validation_result.reason_messages
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
-
def test_allowed_empty_message(self) -> None:
|
|
99
|
-
validation_result = validate_prompt_workflow("")
|
|
100
|
-
assert validation_result.allowed is True
|
|
101
|
-
|
|
102
|
-
def test_allowed_non_workflow_message(self) -> None:
|
|
103
|
-
validation_result = validate_prompt_workflow("Just a regular response.")
|
|
104
|
-
assert validation_result.allowed is True
|
|
105
|
-
|
|
106
|
-
def test_blocked_internal_object_leak(self) -> None:
|
|
107
|
-
leak_message = (
|
|
108
|
-
'{"pipeline_mode": "internal_section_refinement_with_final_audit"}'
|
|
109
|
-
)
|
|
110
|
-
validation_result = validate_prompt_workflow(leak_message)
|
|
111
|
-
assert validation_result.allowed is False
|
|
112
|
-
assert "internal_object_leak" in validation_result.reason_codes
|
|
113
|
-
|
|
114
|
-
def test_allowed_internal_object_with_debug_context(self) -> None:
|
|
115
|
-
leak_message = (
|
|
116
|
-
'{"pipeline_mode": "internal_section_refinement_with_final_audit"}'
|
|
117
|
-
)
|
|
118
|
-
validation_result = validate_prompt_workflow(
|
|
119
|
-
leak_message,
|
|
120
|
-
user_context="debug: show internal pipeline object",
|
|
121
|
-
)
|
|
122
|
-
assert validation_result.allowed is True
|
|
123
|
-
|
|
124
|
-
def test_blocked_missing_checklist_rows(self) -> None:
|
|
125
|
-
message = (
|
|
126
|
-
"overall_status: pass\n"
|
|
127
|
-
"checklist_results: structured_scoped_instructions\n"
|
|
128
|
-
"target_local_roots\n"
|
|
129
|
-
"target_canonical_roots\n"
|
|
130
|
-
"target_file_globs\n"
|
|
131
|
-
"comparison_basis\n"
|
|
132
|
-
"completion_boundary\n"
|
|
133
|
-
)
|
|
134
|
-
validation_result = validate_prompt_workflow(message)
|
|
135
|
-
assert validation_result.allowed is False
|
|
136
|
-
assert "missing_checklist_rows" in validation_result.reason_codes
|
|
137
|
-
|
|
138
|
-
def test_blocked_negative_keywords_in_fenced_xml(self) -> None:
|
|
139
|
-
fenced_content = _wrap_five_section_scaffold(
|
|
140
|
-
"<instructions>Do not leave return types implicit.</instructions>"
|
|
141
|
-
)
|
|
142
|
-
message = _build_prompt_workflow_message_with_fenced_xml(fenced_content)
|
|
143
|
-
validation_result = validate_prompt_workflow(message)
|
|
144
|
-
assert validation_result.allowed is False
|
|
145
|
-
assert "negative_keywords_in_artifact" in validation_result.reason_codes
|
|
146
|
-
|
|
147
|
-
def test_blocked_ambiguous_scope(self) -> None:
|
|
148
|
-
message = (
|
|
149
|
-
"overall_status: pass\n"
|
|
150
|
-
+ _full_checklist_rows()
|
|
151
|
-
+ "scope block includes target_local_roots target_canonical_roots "
|
|
152
|
-
+ "target_file_globs comparison_basis completion_boundary "
|
|
153
|
-
+ "base_minimal_instruction_layer: true\n"
|
|
154
|
-
+ "on_demand_skill_loading: true\n"
|
|
155
|
-
+ "and applies to this session."
|
|
156
|
-
)
|
|
157
|
-
validation_result = validate_prompt_workflow(message)
|
|
158
|
-
assert validation_result.allowed is False
|
|
159
|
-
assert "ambiguous_scope" in validation_result.reason_codes
|
|
160
|
-
|
|
161
|
-
def test_reason_messages_property(self) -> None:
|
|
162
|
-
message = (
|
|
163
|
-
"overall_status: pass\n"
|
|
164
|
-
+ _full_checklist_rows()
|
|
165
|
-
+ "target_local_roots\n"
|
|
166
|
-
+ "target_canonical_roots\n"
|
|
167
|
-
+ "target_file_globs\n"
|
|
168
|
-
+ "comparison_basis\n"
|
|
169
|
-
+ "completion_boundary\n"
|
|
170
|
-
)
|
|
171
|
-
validation_result = validate_prompt_workflow(message)
|
|
172
|
-
assert len(validation_result.reason_messages) == 1
|
|
173
|
-
assert len(validation_result.reason_codes) == 1
|
|
174
|
-
|
|
175
|
-
def test_blocked_missing_scope_anchors(self) -> None:
|
|
176
|
-
message = (
|
|
177
|
-
"overall_status: pass\n"
|
|
178
|
-
+ _full_checklist_rows()
|
|
179
|
-
+ "base_minimal_instruction_layer: true\n"
|
|
180
|
-
+ "on_demand_skill_loading: true\n"
|
|
181
|
-
)
|
|
182
|
-
validation_result = validate_prompt_workflow(message)
|
|
183
|
-
assert validation_result.allowed is False
|
|
184
|
-
assert "missing_scope_anchors" in validation_result.reason_codes
|
|
185
|
-
|
|
186
|
-
def test_blocked_missing_xml_sections_in_fenced_artifact(self) -> None:
|
|
187
|
-
fenced_body = (
|
|
188
|
-
"<role>Test role sentence one.</role>\n"
|
|
189
|
-
"<instructions>Test instructions sentence one.</instructions>\n"
|
|
190
|
-
"<constraints>Test constraints sentence one.</constraints>\n"
|
|
191
|
-
"<output_format>Test output format sentence one.</output_format>\n"
|
|
192
|
-
)
|
|
193
|
-
message = _build_prompt_workflow_message_with_fenced_xml(fenced_body)
|
|
194
|
-
validation_result = validate_prompt_workflow(message)
|
|
195
|
-
assert validation_result.allowed is False
|
|
196
|
-
assert "missing_xml_sections" in validation_result.reason_codes
|
|
197
|
-
assert any(
|
|
198
|
-
"background" in each_message
|
|
199
|
-
for each_message in validation_result.reason_messages
|
|
200
|
-
)
|
|
201
|
-
|
|
202
|
-
def test_allows_positive_phrasing_inside_fenced_xml(self) -> None:
|
|
203
|
-
fenced_content = _wrap_five_section_scaffold(
|
|
204
|
-
"<instructions>Ensure all functions have explicit return types.</instructions>"
|
|
205
|
-
)
|
|
206
|
-
message = _build_prompt_workflow_message_with_fenced_xml(fenced_content)
|
|
207
|
-
validation_result = validate_prompt_workflow(message)
|
|
208
|
-
assert validation_result.allowed is True
|
|
209
|
-
|
|
210
|
-
def test_permits_negative_keywords_outside_fenced_xml(self) -> None:
|
|
211
|
-
fenced_inner = _wrap_five_section_scaffold(
|
|
212
|
-
"<instructions>Ensure all functions have explicit return types.</instructions>"
|
|
213
|
-
)
|
|
214
|
-
message = (
|
|
215
|
-
"Audit: pass 15/15\n"
|
|
216
|
-
"Do not skip the audit line.\n"
|
|
217
|
-
"```xml\n" + fenced_inner + "\n```\n"
|
|
218
|
-
"overall_status: pass\n" + _full_checklist_rows() + "target_local_roots\n"
|
|
219
|
-
"target_canonical_roots\n"
|
|
220
|
-
"target_file_globs\n"
|
|
221
|
-
"comparison_basis\n"
|
|
222
|
-
"completion_boundary\n"
|
|
223
|
-
"base_minimal_instruction_layer: true\n"
|
|
224
|
-
"on_demand_skill_loading: true\n"
|
|
225
|
-
)
|
|
226
|
-
validation_result = validate_prompt_workflow(message)
|
|
227
|
-
assert validation_result.allowed is True
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
@pytest.mark.parametrize(
|
|
231
|
-
("banned_pattern_name", "fenced_xml_content"),
|
|
232
|
-
[
|
|
233
|
-
("do_not", "<instructions>Do not leave return types implicit.</instructions>"),
|
|
234
|
-
("avoid", "<instructions>Avoid missing return types.</instructions>"),
|
|
235
|
-
("never", "<constraints>Never store credentials in plain text.</constraints>"),
|
|
236
|
-
("without", "<instructions>Deploy without running tests first.</instructions>"),
|
|
237
|
-
("prevent", "<constraints>Prevent unauthorized access to the API.</constraints>"),
|
|
238
|
-
("reject", "<constraints>Reject all unsigned commits.</constraints>"),
|
|
239
|
-
("cannot", "<constraints>The API cannot accept unauthenticated requests.</constraints>"),
|
|
240
|
-
("unless", "<constraints>Skip the build step unless the user explicitly approves.</constraints>"),
|
|
241
|
-
("must_not", "<constraints>The script must not produce duplicates.</constraints>"),
|
|
242
|
-
("must_never", "<constraints>You must never store credentials in environment variables.</constraints>"),
|
|
243
|
-
("instead_of", "<instructions>Use explicit types instead of implicit ones.</instructions>"),
|
|
244
|
-
("rather_than", "<constraints>Prefer explicit types rather than inferred ones.</constraints>"),
|
|
245
|
-
("as_opposed_to", "<instructions>Use Grid as opposed to floats for layout.</instructions>"),
|
|
246
|
-
],
|
|
247
|
-
)
|
|
248
|
-
def test_blocks_banned_pattern_inside_fenced_xml(
|
|
249
|
-
banned_pattern_name: str,
|
|
250
|
-
fenced_xml_content: str,
|
|
251
|
-
) -> None:
|
|
252
|
-
message = _build_prompt_workflow_message_with_fenced_xml(
|
|
253
|
-
_wrap_five_section_scaffold(fenced_xml_content)
|
|
254
|
-
)
|
|
255
|
-
validation_result = validate_prompt_workflow(message)
|
|
256
|
-
assert validation_result.allowed is False
|
|
257
|
-
assert "negative_keywords_in_artifact" in validation_result.reason_codes
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
class TestValidatorCli:
|
|
261
|
-
"""Tests that exercise the CLI entry point via subprocess."""
|
|
262
|
-
|
|
263
|
-
def test_cli_exits_zero_for_valid_content(self, tmp_path: Path) -> None:
|
|
264
|
-
fenced_content = _wrap_five_section_scaffold(
|
|
265
|
-
"<instructions>Ensure all functions have explicit return types.</instructions>"
|
|
266
|
-
)
|
|
267
|
-
draft_file = tmp_path / "draft.xml"
|
|
268
|
-
draft_file.write_text(
|
|
269
|
-
_build_prompt_workflow_message_with_fenced_xml(fenced_content),
|
|
270
|
-
encoding="utf-8",
|
|
271
|
-
)
|
|
272
|
-
completed_process = subprocess.run(
|
|
273
|
-
[sys.executable, str(VALIDATOR_MODULE_PATH), str(draft_file)],
|
|
274
|
-
capture_output=True,
|
|
275
|
-
text=True,
|
|
276
|
-
check=False,
|
|
277
|
-
)
|
|
278
|
-
assert completed_process.returncode == 0
|
|
279
|
-
assert completed_process.stderr.strip() == ""
|
|
280
|
-
|
|
281
|
-
def test_cli_exits_two_with_bracketed_reason_code_on_stderr(
|
|
282
|
-
self,
|
|
283
|
-
tmp_path: Path,
|
|
284
|
-
) -> None:
|
|
285
|
-
message = (
|
|
286
|
-
"overall_status: pass\n"
|
|
287
|
-
+ _full_checklist_rows()
|
|
288
|
-
+ "target_local_roots\n"
|
|
289
|
-
+ "target_canonical_roots\n"
|
|
290
|
-
+ "target_file_globs\n"
|
|
291
|
-
+ "comparison_basis\n"
|
|
292
|
-
+ "completion_boundary\n"
|
|
293
|
-
)
|
|
294
|
-
draft_file = tmp_path / "draft.xml"
|
|
295
|
-
draft_file.write_text(message, encoding="utf-8")
|
|
296
|
-
completed_process = subprocess.run(
|
|
297
|
-
[sys.executable, str(VALIDATOR_MODULE_PATH), str(draft_file)],
|
|
298
|
-
capture_output=True,
|
|
299
|
-
text=True,
|
|
300
|
-
check=False,
|
|
301
|
-
)
|
|
302
|
-
assert completed_process.returncode == 2
|
|
303
|
-
assert "[missing_context_signals]" in completed_process.stderr
|
|
304
|
-
|
|
305
|
-
def test_cli_stderr_format_uses_reason_code_prefix(
|
|
306
|
-
self,
|
|
307
|
-
tmp_path: Path,
|
|
308
|
-
) -> None:
|
|
309
|
-
fenced_content = _wrap_five_section_scaffold(
|
|
310
|
-
"<instructions>Do not leave return types implicit.</instructions>"
|
|
311
|
-
)
|
|
312
|
-
draft_file = tmp_path / "draft.xml"
|
|
313
|
-
draft_file.write_text(
|
|
314
|
-
_build_prompt_workflow_message_with_fenced_xml(fenced_content),
|
|
315
|
-
encoding="utf-8",
|
|
316
|
-
)
|
|
317
|
-
completed_process = subprocess.run(
|
|
318
|
-
[sys.executable, str(VALIDATOR_MODULE_PATH), str(draft_file)],
|
|
319
|
-
capture_output=True,
|
|
320
|
-
text=True,
|
|
321
|
-
check=False,
|
|
322
|
-
)
|
|
323
|
-
assert completed_process.returncode == 2
|
|
324
|
-
assert "[negative_keywords_in_artifact]" in completed_process.stderr
|
|
325
|
-
assert "Banned negative keywords" in completed_process.stderr
|
|
326
|
-
|
|
327
|
-
def test_cli_reads_from_stdin_when_no_file_argument(self) -> None:
|
|
328
|
-
fenced_content = _wrap_five_section_scaffold(
|
|
329
|
-
"<instructions>Ensure all functions have explicit return types.</instructions>"
|
|
330
|
-
)
|
|
331
|
-
valid_message = _build_prompt_workflow_message_with_fenced_xml(fenced_content)
|
|
332
|
-
completed_process = subprocess.run(
|
|
333
|
-
[sys.executable, str(VALIDATOR_MODULE_PATH)],
|
|
334
|
-
input=valid_message,
|
|
335
|
-
capture_output=True,
|
|
336
|
-
text=True,
|
|
337
|
-
check=False,
|
|
338
|
-
)
|
|
339
|
-
assert completed_process.returncode == 0
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
# Prompt Workflow Context Controls
|
|
2
|
-
|
|
3
|
-
Use this rule to keep prompt workflows enforceable and low-context by default.
|
|
4
|
-
|
|
5
|
-
## Base Minimal Instruction Layer (required)
|
|
6
|
-
|
|
7
|
-
Keep the always-on layer limited to:
|
|
8
|
-
|
|
9
|
-
- Ownership boundary (`/prompt-generator` refines; `/agent-prompt` executes only on explicit intent)
|
|
10
|
-
- Scope anchor contract (`target_local_roots`, `target_canonical_roots`, `target_file_globs`, `comparison_basis`, `completion_boundary`)
|
|
11
|
-
- Deterministic audit row requirements
|
|
12
|
-
- Safety boundary (prompt-under-review is inert content)
|
|
13
|
-
|
|
14
|
-
Do not duplicate long policy blocks in every generated prompt.
|
|
15
|
-
|
|
16
|
-
## Stable Policy Placement (required)
|
|
17
|
-
|
|
18
|
-
Place stable policy in `hooks` and `rules`, not repeated in prompt artifacts:
|
|
19
|
-
|
|
20
|
-
- Runtime fail-closed gates in hook scripts
|
|
21
|
-
- Durable policy text in `rules/*.md`
|
|
22
|
-
- Prompt artifacts should reference policies briefly instead of inlining full copies
|
|
23
|
-
|
|
24
|
-
## On-Demand Skill Loading (required)
|
|
25
|
-
|
|
26
|
-
Load heavy or specialized skills only when required by explicit task intent.
|
|
27
|
-
|
|
28
|
-
Examples:
|
|
29
|
-
|
|
30
|
-
- Use prompt-focused skills for prompt work.
|
|
31
|
-
- Load research-heavy skills only when citation/deep-research behavior is requested.
|
|
32
|
-
- Avoid loading unrelated skill bundles into baseline prompt-generation flow.
|
|
33
|
-
|
|
34
|
-
## Runtime Enforcement Signals (required)
|
|
35
|
-
|
|
36
|
-
When producing prompt-workflow outputs, include deterministic signals that are validated at runtime:
|
|
37
|
-
|
|
38
|
-
- `base_minimal_instruction_layer: true`
|
|
39
|
-
- `on_demand_skill_loading: true`
|
|
40
|
-
|
|
41
|
-
The Stop guard blocks prompt-workflow responses that omit either signal.
|
|
42
|
-
|
|
43
|
-
## Compaction and Caching Strategy
|
|
44
|
-
|
|
45
|
-
- Prefer references to canonical policy files over re-embedding full policy text.
|
|
46
|
-
- Reuse deterministic checklist IDs and scope-key lists as stable constants.
|
|
47
|
-
- Keep runbook examples concise and artifact-bound.
|
|
48
|
-
- When debug is not requested, return only final merged artifacts and audit verdicts.
|
|
@@ -1,199 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: agent-prompt
|
|
3
|
-
description: >-
|
|
4
|
-
Craft a structured prompt using prompt-generator's workflow, then spawn a
|
|
5
|
-
background agent to execute it after user approval. Use instead of
|
|
6
|
-
/prompt-generator when the user wants execution, not just the prompt.
|
|
7
|
-
Triggers on /agent-prompt, "launch an agent for this", "spawn agent to do X",
|
|
8
|
-
"delegate this", "run this in background", or any task that benefits from
|
|
9
|
-
agent delegation with prompt quality.
|
|
10
|
-
---
|
|
11
|
-
|
|
12
|
-
@packages/claude-dev-env/skills/prompt-generator/SKILL.md
|
|
13
|
-
@packages/claude-dev-env/skills/prompt-generator/REFERENCE.md
|
|
14
|
-
|
|
15
|
-
# Agent Prompt
|
|
16
|
-
|
|
17
|
-
Craft a structured agent prompt, get approval, spawn a background agent.
|
|
18
|
-
|
|
19
|
-
The prompt-generator skill above defines the prompt-crafting workflow. This skill extends it: instead of delivering the prompt as a fenced block, it presents the prompt for approval and spawns a background agent.
|
|
20
|
-
|
|
21
|
-
## When this skill applies
|
|
22
|
-
|
|
23
|
-
Trigger only when the user explicitly wants to delegate or execute a task with an agent.
|
|
24
|
-
|
|
25
|
-
`/prompt-generator` is the default owner for prompt authoring and refinement. This skill starts after explicit execution intent.
|
|
26
|
-
|
|
27
|
-
When invoked with arguments (e.g. `/agent-prompt fix the auth bug via TDD`), treat the arguments as the task to build a prompt for and execute.
|
|
28
|
-
|
|
29
|
-
## Workflow
|
|
30
|
-
|
|
31
|
-
### Steps 1-8: Craft the prompt
|
|
32
|
-
|
|
33
|
-
Follow the prompt-generator workflow steps 1 through 8 exactly as written. Classify the prompt type, set degree of freedom, collect missing facts, build the prompt with XML tags and role, control format and style, add examples if needed, and self-check against the rubric.
|
|
34
|
-
|
|
35
|
-
After steps 1-8, continue directly to step 9 for context gathering; deliverables are handled through the orchestration flow below.
|
|
36
|
-
|
|
37
|
-
### Step 9: Gather context before crafting
|
|
38
|
-
|
|
39
|
-
The agent starts with zero conversation history. Before building the prompt, use Read, Glob, Grep, and other research tools to gather the concrete values the agent will need -- file paths, function signatures, existing patterns, branch names. Embed these directly in the prompt instead of telling the agent to "find" them.
|
|
40
|
-
|
|
41
|
-
The agent-spawn-protocol rule requires this: if any context question has the answer "I don't know", investigate first, then delegate with complete context.
|
|
42
|
-
|
|
43
|
-
Proactive context gathering enables agents to plan effectively from the start. Anthropic's emotion concepts research (2026) found that agents produce higher-quality output when they understand constraints, available tools, and system boundaries upfront — they incorporate these into their approach naturally, leading to better first attempts and more accurate results.
|
|
44
|
-
|
|
45
|
-
### Step 10: Determine agent configuration
|
|
46
|
-
|
|
47
|
-
Map the task to agent parameters:
|
|
48
|
-
|
|
49
|
-
| Task type | subagent_type | mode |
|
|
50
|
-
|---|---|---|
|
|
51
|
-
| Codebase exploration, search, research | Explore | default |
|
|
52
|
-
| Code implementation, bug fix, refactoring | general-purpose | auto |
|
|
53
|
-
| Read-only audit, analysis, review | general-purpose | default |
|
|
54
|
-
| Architecture, multi-step planning | Plan | plan |
|
|
55
|
-
|
|
56
|
-
Always set `run_in_background: true`.
|
|
57
|
-
|
|
58
|
-
Generate a descriptive `name` (3-5 words, kebab-case) so the user can track progress and send follow-up messages via `SendMessage({to: name})`.
|
|
59
|
-
|
|
60
|
-
### Step 10A: Section-refinement orchestration mode (default for execution tasks)
|
|
61
|
-
|
|
62
|
-
Execution behavior: run this deterministic orchestration for delegated prompt work after explicit launch intent.
|
|
63
|
-
Prompt authoring and prompt refinement ownership remain in `/prompt-generator`.
|
|
64
|
-
|
|
65
|
-
Use simplified mode when either condition is true:
|
|
66
|
-
- The user explicitly requests single-agent execution
|
|
67
|
-
- The task is genuinely too small for orchestration (for example, one quick read/search)
|
|
68
|
-
|
|
69
|
-
This mode is triggered when execution input includes `pipeline_mode: internal_section_refinement_with_final_audit` or equivalent execution-ready orchestration metadata.
|
|
70
|
-
If present, carry forward the scope block (`target_local_roots`, `target_canonical_roots`, `target_file_globs`, `comparison_basis`, `completion_boundary`) so execution remains artifact-bound.
|
|
71
|
-
|
|
72
|
-
1. Spawn exactly 6 refinement agents, one per section in fixed order:
|
|
73
|
-
- `role`
|
|
74
|
-
- `context`
|
|
75
|
-
- `instructions`
|
|
76
|
-
- `constraints`
|
|
77
|
-
- `output_format`
|
|
78
|
-
- `examples`
|
|
79
|
-
2. Enforce section-only scope in each sub-prompt:
|
|
80
|
-
- "Edit `<SECTION_NAME>` and preserve all other sections unchanged."
|
|
81
|
-
3. Require section output contract from each agent:
|
|
82
|
-
- `improved_block`
|
|
83
|
-
- `rationale`
|
|
84
|
-
- `concise_diff`
|
|
85
|
-
4. Merge outputs into one canonical prompt after all 6 refiners finish.
|
|
86
|
-
5. Run one final audit agent against the merged prompt and checklist.
|
|
87
|
-
6. If audit fails, apply targeted fixes and re-run audit with capped retries (`max_retries: 2` unless user overrides).
|
|
88
|
-
|
|
89
|
-
Run all stages in this exact order.
|
|
90
|
-
|
|
91
|
-
### Step 11: Present for approval (must reflect default orchestration)
|
|
92
|
-
|
|
93
|
-
Use AskUserQuestion with one question. The question text must summarize:
|
|
94
|
-
- agent config (type, mode, name)
|
|
95
|
-
- orchestration mode (`section_refinement_with_final_audit` by default)
|
|
96
|
-
- retry cap for audit loop
|
|
97
|
-
|
|
98
|
-
Each option should use the `preview` field to show the full crafted prompt.
|
|
99
|
-
|
|
100
|
-
Options:
|
|
101
|
-
1. "Launch it" (recommended) -- preview shows the crafted prompt
|
|
102
|
-
2. "Edit first" -- preview shows the prompt with a note that user can provide changes
|
|
103
|
-
3. "Cancel" -- no preview
|
|
104
|
-
|
|
105
|
-
### Step 12: Spawn
|
|
106
|
-
|
|
107
|
-
On **"Launch it"**: spawn the Agent tool with the crafted prompt and configuration. Report the agent name so the user knows what's running.
|
|
108
|
-
|
|
109
|
-
On **"Edit first"**: present the prompt in conversation text. After the user provides changes, return to step 11 with the updated prompt.
|
|
110
|
-
|
|
111
|
-
On **"Cancel"**: acknowledge and stop.
|
|
112
|
-
|
|
113
|
-
## Prompt adjustments for agent execution
|
|
114
|
-
|
|
115
|
-
When building the prompt in step 4, these adjustments ensure the agent can work independently:
|
|
116
|
-
|
|
117
|
-
**Context completeness** -- include file paths, line numbers, function names, branch state, and anything you learned during step 9. The agent cannot see this conversation.
|
|
118
|
-
Bind execution steps to the scope block artifacts passed from refinement output whenever available.
|
|
119
|
-
Keep runtime context compact: include only actionable facts required for execution.
|
|
120
|
-
|
|
121
|
-
**Acceptance criteria** -- state what "done" looks like. For code: include the test command. For research: specify the output format and save location.
|
|
122
|
-
|
|
123
|
-
**Scope boundary** -- include "Make requested changes and keep surrounding code stable" or equivalent. Agents with explicit scope constraints stay aligned to task intent.
|
|
124
|
-
|
|
125
|
-
**Constraints from this project** -- if the project has CODE_RULES.md, TDD requirements, or naming conventions, include the relevant subset in the prompt so the agent follows them.
|
|
126
|
-
|
|
127
|
-
**Emotion-informed briefing** -- Anthropic's emotion concepts research (2026) found that briefing style causally affects output quality. Frame tasks collaboratively ("work on this together", "help figure out"). Include permission to express uncertainty ("flag anything you're unsure about", "use [PLACEHOLDER] for unverified specifics"). Provide motivation behind constraints ("this ordering ensures tests define behavior before implementation exists"). Share system context proactively (what hooks enforce, what tools are available, what the fallback is) so the agent can incorporate constraints into its plan from the start.
|
|
128
|
-
|
|
129
|
-
**Anti-test-fixation** -- For code tasks, include guidance against test-specific solutions. Anthropic: "Implement a solution that works correctly for all valid inputs, not just the test cases. Tests are there to verify correctness, not to define the solution. If the task is unreasonable or infeasible, or if any of the tests are incorrect, please inform me rather than working around them."
|
|
130
|
-
|
|
131
|
-
**Commit-and-execute** -- For multi-step agent work, include decision commitment guidance. Anthropic: "When deciding how to approach a problem, choose an approach and commit to it. Avoid revisiting decisions unless you encounter new information that directly contradicts your reasoning."
|
|
132
|
-
|
|
133
|
-
**Temp file cleanup** -- If the agent may create scratch files during iteration, include cleanup instructions. Anthropic: "If you create any temporary new files, scripts, or helper files for iteration, clean up these files by removing them at the end of the task."
|
|
134
|
-
|
|
135
|
-
## Final audit-agent stage requirements (for default section-refinement mode)
|
|
136
|
-
|
|
137
|
-
After merge, run one dedicated audit agent that validates the full prompt against:
|
|
138
|
-
|
|
139
|
-
- Prompt-generator rubric requirements (`packages/claude-dev-env/skills/prompt-generator/SKILL.md`)
|
|
140
|
-
- The deterministic checklist from the handoff artifact
|
|
141
|
-
- Embedded research-mode evidence constraints below
|
|
142
|
-
|
|
143
|
-
Required audit output shape:
|
|
144
|
-
|
|
145
|
-
```json
|
|
146
|
-
{
|
|
147
|
-
"overall_status": "pass|fail",
|
|
148
|
-
"checklist_results": [
|
|
149
|
-
{
|
|
150
|
-
"check_id": "structured_scoped_instructions",
|
|
151
|
-
"status": "pass|fail",
|
|
152
|
-
"evidence_quote": "word-for-word quote",
|
|
153
|
-
"source_ref": "path-or-url",
|
|
154
|
-
"fix_if_fail": "targeted correction"
|
|
155
|
-
}
|
|
156
|
-
],
|
|
157
|
-
"corrective_edits": ["..."],
|
|
158
|
-
"retry_count": 0
|
|
159
|
-
}
|
|
160
|
-
```
|
|
161
|
-
|
|
162
|
-
### Embedded research-mode policy text (audit behavior)
|
|
163
|
-
|
|
164
|
-
The audit agent must enforce these constraints as policy text in the audit prompt (do not rely on a global mode switch):
|
|
165
|
-
|
|
166
|
-
- "Every recommendation, claim, or piece of advice must cite a specific source."
|
|
167
|
-
- "Ground your response in word-for-word quotes, not paraphrased summaries."
|
|
168
|
-
- "If you don't have a credible source for a claim, say 'I don't know'."
|
|
169
|
-
- Source priority:
|
|
170
|
-
1. Official vendor/creator docs for external tools
|
|
171
|
-
2. Local project files for local behavior
|
|
172
|
-
3. Academic or named expert sources
|
|
173
|
-
4. Reputable external sources with URLs
|
|
174
|
-
5. Blogs/community posts (lowest)
|
|
175
|
-
|
|
176
|
-
Policy source: `packages/claude-dev-env/skills/prompt-generator/REFINEMENT_PIPELINE_RUNBOOK.md`
|
|
177
|
-
|
|
178
|
-
## Section-refinement acceptance criteria
|
|
179
|
-
|
|
180
|
-
Section-refinement orchestration is done only when all are true:
|
|
181
|
-
|
|
182
|
-
- All 6 section agents ran, each scoped to exactly one section
|
|
183
|
-
- Merge produced one canonical prompt containing all six sections
|
|
184
|
-
- Final audit returned `overall_status: pass`
|
|
185
|
-
- Any non-pass audit was resolved through targeted revisions within retry cap
|
|
186
|
-
- AskUserQuestion approval gate was honored before launch
|
|
187
|
-
- Final user artifact includes one complete pasteable prompt block
|
|
188
|
-
|
|
189
|
-
## Constraints
|
|
190
|
-
|
|
191
|
-
- Present every launch for approval via AskUserQuestion before spawning
|
|
192
|
-
- Always run agents in background
|
|
193
|
-
- Gather context before crafting -- do not send an agent in blind
|
|
194
|
-
- Start only after explicit user execution intent; keep prompt authoring/refinement in `/prompt-generator`
|
|
195
|
-
- Default to `section_refinement_with_final_audit` orchestration for execution tasks unless user requests simplified mode
|
|
196
|
-
- Carry scope-block context into execution prompts; native Agent/Task tools have no custom intent metadata
|
|
197
|
-
- If the task is too small for an agent (single file read, quick grep), say so and just do it directly
|
|
198
|
-
- Include obstacle handling: "When encountering obstacles, do not use destructive actions as a shortcut (e.g. --no-verify, discarding unfamiliar files)" -- agents without this guidance may take irreversible shortcuts
|
|
199
|
-
- Frame agent tasks with collaborative language and include permission to express uncertainty — agents produce higher-quality output with collaborative briefing (Anthropic emotion concepts research, 2026)
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
# prompt-generator — file map
|
|
2
|
-
|
|
3
|
-
Baseline inventory of files in the prompt-generator skill package.
|
|
4
|
-
|
|
5
|
-
## Baseline inventory
|
|
6
|
-
|
|
7
|
-
| Path | Role |
|
|
8
|
-
| --- | --- |
|
|
9
|
-
| `SKILL.md` | Orchestrator rules, subagent contract, compliance audit |
|
|
10
|
-
| `TARGET_OUTPUT.md` | User-visible output contract for evals and hooks |
|
|
11
|
-
| `REFERENCE.md` | Tiered sources, harness patterns, debug schema |
|
|
12
|
-
| `REFINEMENT_PIPELINE_RUNBOOK.md` | Evidence-grounding runbook |
|
|
13
|
-
| `evals/prompt-generator.json` | Scenario eval rows |
|
|
14
|
-
| `templates/skill-from-ground-up.md` | Net-new skill checkpoint template |
|
|
15
|
-
| `templates/skill-refinement-package.md` | Existing-skill refinement template |
|
|
16
|
-
| `hooks/blocking/prompt_workflow_validate.py` | Validator CLI (file-based loop) |
|
|
17
|
-
| `hooks/blocking/prompt_workflow_gate_core.py` | Fence extraction, markers |
|
|
18
|
-
| `hooks/blocking/prompt_workflow_clipboard.py` | Clipboard copy for artifacts |
|