claude-dev-env 1.17.5 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,339 +0,0 @@
1
- """Tests for prompt_workflow_validate module (shared validator + CLI entry point)."""
2
-
3
- import subprocess
4
- import sys
5
- from pathlib import Path
6
-
7
- import pytest
8
-
9
- from prompt_workflow_validate import ValidationResult, validate_prompt_workflow
10
-
11
- VALIDATOR_MODULE_PATH = Path(__file__).parent / "prompt_workflow_validate.py"
12
-
13
-
14
- def _full_checklist_rows() -> str:
15
- return (
16
- "checklist_results:\n"
17
- "- structured_scoped_instructions\n"
18
- "- sequential_steps_present\n"
19
- "- positive_framing\n"
20
- "- acceptance_criteria_defined\n"
21
- "- safety_reversibility_language\n"
22
- "- reversible_action_and_safety_check_guidance\n"
23
- "- concrete_output_contract\n"
24
- "- scope_boundary_present\n"
25
- "- explicit_scope_anchors_present\n"
26
- "- all_instructions_artifact_bound\n"
27
- "- scope_terms_explicit_and_anchored\n"
28
- "- completion_boundary_measurable\n"
29
- "- citation_grounding_policy_present\n"
30
- "- source_priority_rules_present\n"
31
- "- artifact_language_confidence\n"
32
- )
33
-
34
-
35
- def _wrap_five_section_scaffold(inner_body: str) -> str:
36
- has_instructions = "<instructions>" in inner_body
37
- has_constraints = "<constraints>" in inner_body
38
- instructions_section = (
39
- "" if has_instructions else "<instructions>Test instructions sentence one.</instructions>\n"
40
- )
41
- constraints_section = (
42
- "" if has_constraints else "<constraints>Test constraints sentence one.</constraints>\n"
43
- )
44
- return (
45
- "<role>Test role sentence one.</role>\n"
46
- "<background>Test background sentence one.</background>\n"
47
- f"{instructions_section}"
48
- f"{inner_body}\n"
49
- f"{constraints_section}"
50
- "<output_format>Test output format sentence one.</output_format>\n"
51
- )
52
-
53
-
54
- def _build_prompt_workflow_message_with_fenced_xml(fenced_xml_body: str) -> str:
55
- return (
56
- "Audit: pass 15/15\n"
57
- "```xml\n" + fenced_xml_body + "\n```\n"
58
- "overall_status: pass\n" + _full_checklist_rows() + "target_local_roots\n"
59
- "target_canonical_roots\n"
60
- "target_file_globs\n"
61
- "comparison_basis\n"
62
- "completion_boundary\n"
63
- "base_minimal_instruction_layer: true\n"
64
- "on_demand_skill_loading: true\n"
65
- )
66
-
67
-
68
- class TestValidatePromptWorkflowFunction:
69
- """Tests that exercise the shared validate_prompt_workflow function directly."""
70
-
71
- def test_allowed_complete_message_with_fenced_xml(self) -> None:
72
- fenced_content = _wrap_five_section_scaffold(
73
- "<instructions>Ensure all functions have explicit return types.</instructions>"
74
- )
75
- message = _build_prompt_workflow_message_with_fenced_xml(fenced_content)
76
- validation_result = validate_prompt_workflow(message)
77
- assert validation_result.allowed is True
78
- assert validation_result.reasons == ()
79
-
80
- def test_blocked_missing_context_control_lines(self) -> None:
81
- message = (
82
- "overall_status: pass\n"
83
- + _full_checklist_rows()
84
- + "target_local_roots\n"
85
- + "target_canonical_roots\n"
86
- + "target_file_globs\n"
87
- + "comparison_basis\n"
88
- + "completion_boundary\n"
89
- )
90
- validation_result = validate_prompt_workflow(message)
91
- assert validation_result.allowed is False
92
- assert "missing_context_signals" in validation_result.reason_codes
93
- assert any(
94
- "context-control" in each_message
95
- for each_message in validation_result.reason_messages
96
- )
97
-
98
- def test_allowed_empty_message(self) -> None:
99
- validation_result = validate_prompt_workflow("")
100
- assert validation_result.allowed is True
101
-
102
- def test_allowed_non_workflow_message(self) -> None:
103
- validation_result = validate_prompt_workflow("Just a regular response.")
104
- assert validation_result.allowed is True
105
-
106
- def test_blocked_internal_object_leak(self) -> None:
107
- leak_message = (
108
- '{"pipeline_mode": "internal_section_refinement_with_final_audit"}'
109
- )
110
- validation_result = validate_prompt_workflow(leak_message)
111
- assert validation_result.allowed is False
112
- assert "internal_object_leak" in validation_result.reason_codes
113
-
114
- def test_allowed_internal_object_with_debug_context(self) -> None:
115
- leak_message = (
116
- '{"pipeline_mode": "internal_section_refinement_with_final_audit"}'
117
- )
118
- validation_result = validate_prompt_workflow(
119
- leak_message,
120
- user_context="debug: show internal pipeline object",
121
- )
122
- assert validation_result.allowed is True
123
-
124
- def test_blocked_missing_checklist_rows(self) -> None:
125
- message = (
126
- "overall_status: pass\n"
127
- "checklist_results: structured_scoped_instructions\n"
128
- "target_local_roots\n"
129
- "target_canonical_roots\n"
130
- "target_file_globs\n"
131
- "comparison_basis\n"
132
- "completion_boundary\n"
133
- )
134
- validation_result = validate_prompt_workflow(message)
135
- assert validation_result.allowed is False
136
- assert "missing_checklist_rows" in validation_result.reason_codes
137
-
138
- def test_blocked_negative_keywords_in_fenced_xml(self) -> None:
139
- fenced_content = _wrap_five_section_scaffold(
140
- "<instructions>Do not leave return types implicit.</instructions>"
141
- )
142
- message = _build_prompt_workflow_message_with_fenced_xml(fenced_content)
143
- validation_result = validate_prompt_workflow(message)
144
- assert validation_result.allowed is False
145
- assert "negative_keywords_in_artifact" in validation_result.reason_codes
146
-
147
- def test_blocked_ambiguous_scope(self) -> None:
148
- message = (
149
- "overall_status: pass\n"
150
- + _full_checklist_rows()
151
- + "scope block includes target_local_roots target_canonical_roots "
152
- + "target_file_globs comparison_basis completion_boundary "
153
- + "base_minimal_instruction_layer: true\n"
154
- + "on_demand_skill_loading: true\n"
155
- + "and applies to this session."
156
- )
157
- validation_result = validate_prompt_workflow(message)
158
- assert validation_result.allowed is False
159
- assert "ambiguous_scope" in validation_result.reason_codes
160
-
161
- def test_reason_messages_property(self) -> None:
162
- message = (
163
- "overall_status: pass\n"
164
- + _full_checklist_rows()
165
- + "target_local_roots\n"
166
- + "target_canonical_roots\n"
167
- + "target_file_globs\n"
168
- + "comparison_basis\n"
169
- + "completion_boundary\n"
170
- )
171
- validation_result = validate_prompt_workflow(message)
172
- assert len(validation_result.reason_messages) == 1
173
- assert len(validation_result.reason_codes) == 1
174
-
175
- def test_blocked_missing_scope_anchors(self) -> None:
176
- message = (
177
- "overall_status: pass\n"
178
- + _full_checklist_rows()
179
- + "base_minimal_instruction_layer: true\n"
180
- + "on_demand_skill_loading: true\n"
181
- )
182
- validation_result = validate_prompt_workflow(message)
183
- assert validation_result.allowed is False
184
- assert "missing_scope_anchors" in validation_result.reason_codes
185
-
186
- def test_blocked_missing_xml_sections_in_fenced_artifact(self) -> None:
187
- fenced_body = (
188
- "<role>Test role sentence one.</role>\n"
189
- "<instructions>Test instructions sentence one.</instructions>\n"
190
- "<constraints>Test constraints sentence one.</constraints>\n"
191
- "<output_format>Test output format sentence one.</output_format>\n"
192
- )
193
- message = _build_prompt_workflow_message_with_fenced_xml(fenced_body)
194
- validation_result = validate_prompt_workflow(message)
195
- assert validation_result.allowed is False
196
- assert "missing_xml_sections" in validation_result.reason_codes
197
- assert any(
198
- "background" in each_message
199
- for each_message in validation_result.reason_messages
200
- )
201
-
202
- def test_allows_positive_phrasing_inside_fenced_xml(self) -> None:
203
- fenced_content = _wrap_five_section_scaffold(
204
- "<instructions>Ensure all functions have explicit return types.</instructions>"
205
- )
206
- message = _build_prompt_workflow_message_with_fenced_xml(fenced_content)
207
- validation_result = validate_prompt_workflow(message)
208
- assert validation_result.allowed is True
209
-
210
- def test_permits_negative_keywords_outside_fenced_xml(self) -> None:
211
- fenced_inner = _wrap_five_section_scaffold(
212
- "<instructions>Ensure all functions have explicit return types.</instructions>"
213
- )
214
- message = (
215
- "Audit: pass 15/15\n"
216
- "Do not skip the audit line.\n"
217
- "```xml\n" + fenced_inner + "\n```\n"
218
- "overall_status: pass\n" + _full_checklist_rows() + "target_local_roots\n"
219
- "target_canonical_roots\n"
220
- "target_file_globs\n"
221
- "comparison_basis\n"
222
- "completion_boundary\n"
223
- "base_minimal_instruction_layer: true\n"
224
- "on_demand_skill_loading: true\n"
225
- )
226
- validation_result = validate_prompt_workflow(message)
227
- assert validation_result.allowed is True
228
-
229
-
230
- @pytest.mark.parametrize(
231
- ("banned_pattern_name", "fenced_xml_content"),
232
- [
233
- ("do_not", "<instructions>Do not leave return types implicit.</instructions>"),
234
- ("avoid", "<instructions>Avoid missing return types.</instructions>"),
235
- ("never", "<constraints>Never store credentials in plain text.</constraints>"),
236
- ("without", "<instructions>Deploy without running tests first.</instructions>"),
237
- ("prevent", "<constraints>Prevent unauthorized access to the API.</constraints>"),
238
- ("reject", "<constraints>Reject all unsigned commits.</constraints>"),
239
- ("cannot", "<constraints>The API cannot accept unauthenticated requests.</constraints>"),
240
- ("unless", "<constraints>Skip the build step unless the user explicitly approves.</constraints>"),
241
- ("must_not", "<constraints>The script must not produce duplicates.</constraints>"),
242
- ("must_never", "<constraints>You must never store credentials in environment variables.</constraints>"),
243
- ("instead_of", "<instructions>Use explicit types instead of implicit ones.</instructions>"),
244
- ("rather_than", "<constraints>Prefer explicit types rather than inferred ones.</constraints>"),
245
- ("as_opposed_to", "<instructions>Use Grid as opposed to floats for layout.</instructions>"),
246
- ],
247
- )
248
- def test_blocks_banned_pattern_inside_fenced_xml(
249
- banned_pattern_name: str,
250
- fenced_xml_content: str,
251
- ) -> None:
252
- message = _build_prompt_workflow_message_with_fenced_xml(
253
- _wrap_five_section_scaffold(fenced_xml_content)
254
- )
255
- validation_result = validate_prompt_workflow(message)
256
- assert validation_result.allowed is False
257
- assert "negative_keywords_in_artifact" in validation_result.reason_codes
258
-
259
-
260
- class TestValidatorCli:
261
- """Tests that exercise the CLI entry point via subprocess."""
262
-
263
- def test_cli_exits_zero_for_valid_content(self, tmp_path: Path) -> None:
264
- fenced_content = _wrap_five_section_scaffold(
265
- "<instructions>Ensure all functions have explicit return types.</instructions>"
266
- )
267
- draft_file = tmp_path / "draft.xml"
268
- draft_file.write_text(
269
- _build_prompt_workflow_message_with_fenced_xml(fenced_content),
270
- encoding="utf-8",
271
- )
272
- completed_process = subprocess.run(
273
- [sys.executable, str(VALIDATOR_MODULE_PATH), str(draft_file)],
274
- capture_output=True,
275
- text=True,
276
- check=False,
277
- )
278
- assert completed_process.returncode == 0
279
- assert completed_process.stderr.strip() == ""
280
-
281
- def test_cli_exits_two_with_bracketed_reason_code_on_stderr(
282
- self,
283
- tmp_path: Path,
284
- ) -> None:
285
- message = (
286
- "overall_status: pass\n"
287
- + _full_checklist_rows()
288
- + "target_local_roots\n"
289
- + "target_canonical_roots\n"
290
- + "target_file_globs\n"
291
- + "comparison_basis\n"
292
- + "completion_boundary\n"
293
- )
294
- draft_file = tmp_path / "draft.xml"
295
- draft_file.write_text(message, encoding="utf-8")
296
- completed_process = subprocess.run(
297
- [sys.executable, str(VALIDATOR_MODULE_PATH), str(draft_file)],
298
- capture_output=True,
299
- text=True,
300
- check=False,
301
- )
302
- assert completed_process.returncode == 2
303
- assert "[missing_context_signals]" in completed_process.stderr
304
-
305
- def test_cli_stderr_format_uses_reason_code_prefix(
306
- self,
307
- tmp_path: Path,
308
- ) -> None:
309
- fenced_content = _wrap_five_section_scaffold(
310
- "<instructions>Do not leave return types implicit.</instructions>"
311
- )
312
- draft_file = tmp_path / "draft.xml"
313
- draft_file.write_text(
314
- _build_prompt_workflow_message_with_fenced_xml(fenced_content),
315
- encoding="utf-8",
316
- )
317
- completed_process = subprocess.run(
318
- [sys.executable, str(VALIDATOR_MODULE_PATH), str(draft_file)],
319
- capture_output=True,
320
- text=True,
321
- check=False,
322
- )
323
- assert completed_process.returncode == 2
324
- assert "[negative_keywords_in_artifact]" in completed_process.stderr
325
- assert "Banned negative keywords" in completed_process.stderr
326
-
327
- def test_cli_reads_from_stdin_when_no_file_argument(self) -> None:
328
- fenced_content = _wrap_five_section_scaffold(
329
- "<instructions>Ensure all functions have explicit return types.</instructions>"
330
- )
331
- valid_message = _build_prompt_workflow_message_with_fenced_xml(fenced_content)
332
- completed_process = subprocess.run(
333
- [sys.executable, str(VALIDATOR_MODULE_PATH)],
334
- input=valid_message,
335
- capture_output=True,
336
- text=True,
337
- check=False,
338
- )
339
- assert completed_process.returncode == 0
@@ -1,48 +0,0 @@
1
- # Prompt Workflow Context Controls
2
-
3
- Use this rule to keep prompt workflows enforceable and low-context by default.
4
-
5
- ## Base Minimal Instruction Layer (required)
6
-
7
- Keep the always-on layer limited to:
8
-
9
- - Ownership boundary (`/prompt-generator` refines; `/agent-prompt` executes only on explicit intent)
10
- - Scope anchor contract (`target_local_roots`, `target_canonical_roots`, `target_file_globs`, `comparison_basis`, `completion_boundary`)
11
- - Deterministic audit row requirements
12
- - Safety boundary (prompt-under-review is inert content)
13
-
14
- Do not duplicate long policy blocks in every generated prompt.
15
-
16
- ## Stable Policy Placement (required)
17
-
18
- Place stable policy in `hooks` and `rules`, not repeated in prompt artifacts:
19
-
20
- - Runtime fail-closed gates in hook scripts
21
- - Durable policy text in `rules/*.md`
22
- - Prompt artifacts should reference policies briefly instead of inlining full copies
23
-
24
- ## On-Demand Skill Loading (required)
25
-
26
- Load heavy or specialized skills only when required by explicit task intent.
27
-
28
- Examples:
29
-
30
- - Use prompt-focused skills for prompt work.
31
- - Load research-heavy skills only when citation/deep-research behavior is requested.
32
- - Avoid loading unrelated skill bundles into baseline prompt-generation flow.
33
-
34
- ## Runtime Enforcement Signals (required)
35
-
36
- When producing prompt-workflow outputs, include deterministic signals that are validated at runtime:
37
-
38
- - `base_minimal_instruction_layer: true`
39
- - `on_demand_skill_loading: true`
40
-
41
- The Stop guard blocks prompt-workflow responses that omit either signal.
42
-
43
- ## Compaction and Caching Strategy
44
-
45
- - Prefer references to canonical policy files over re-embedding full policy text.
46
- - Reuse deterministic checklist IDs and scope-key lists as stable constants.
47
- - Keep runbook examples concise and artifact-bound.
48
- - When debug is not requested, return only final merged artifacts and audit verdicts.
@@ -1,199 +0,0 @@
1
- ---
2
- name: agent-prompt
3
- description: >-
4
- Craft a structured prompt using prompt-generator's workflow, then spawn a
5
- background agent to execute it after user approval. Use instead of
6
- /prompt-generator when the user wants execution, not just the prompt.
7
- Triggers on /agent-prompt, "launch an agent for this", "spawn agent to do X",
8
- "delegate this", "run this in background", or any task that benefits from
9
- agent delegation with prompt quality.
10
- ---
11
-
12
- @packages/claude-dev-env/skills/prompt-generator/SKILL.md
13
- @packages/claude-dev-env/skills/prompt-generator/REFERENCE.md
14
-
15
- # Agent Prompt
16
-
17
- Craft a structured agent prompt, get approval, spawn a background agent.
18
-
19
- The prompt-generator skill above defines the prompt-crafting workflow. This skill extends it: instead of delivering the prompt as a fenced block, it presents the prompt for approval and spawns a background agent.
20
-
21
- ## When this skill applies
22
-
23
- Trigger only when the user explicitly wants to delegate or execute a task with an agent.
24
-
25
- `/prompt-generator` is the default owner for prompt authoring and refinement. This skill starts after explicit execution intent.
26
-
27
- When invoked with arguments (e.g. `/agent-prompt fix the auth bug via TDD`), treat the arguments as the task to build a prompt for and execute.
28
-
29
- ## Workflow
30
-
31
- ### Steps 1-8: Craft the prompt
32
-
33
- Follow the prompt-generator workflow steps 1 through 8 exactly as written. Classify the prompt type, set degree of freedom, collect missing facts, build the prompt with XML tags and role, control format and style, add examples if needed, and self-check against the rubric.
34
-
35
- After steps 1-8, continue directly to step 9 for context gathering; deliverables are handled through the orchestration flow below.
36
-
37
- ### Step 9: Gather context before crafting
38
-
39
- The agent starts with zero conversation history. Before building the prompt, use Read, Glob, Grep, and other research tools to gather the concrete values the agent will need -- file paths, function signatures, existing patterns, branch names. Embed these directly in the prompt instead of telling the agent to "find" them.
40
-
41
- The agent-spawn-protocol rule requires this: if any context question has the answer "I don't know", investigate first, then delegate with complete context.
42
-
43
- Proactive context gathering enables agents to plan effectively from the start. Anthropic's emotion concepts research (2026) found that agents produce higher-quality output when they understand constraints, available tools, and system boundaries upfront — they incorporate these into their approach naturally, leading to better first attempts and more accurate results.
44
-
45
- ### Step 10: Determine agent configuration
46
-
47
- Map the task to agent parameters:
48
-
49
- | Task type | subagent_type | mode |
50
- |---|---|---|
51
- | Codebase exploration, search, research | Explore | default |
52
- | Code implementation, bug fix, refactoring | general-purpose | auto |
53
- | Read-only audit, analysis, review | general-purpose | default |
54
- | Architecture, multi-step planning | Plan | plan |
55
-
56
- Always set `run_in_background: true`.
57
-
58
- Generate a descriptive `name` (3-5 words, kebab-case) so the user can track progress and send follow-up messages via `SendMessage({to: name})`.
59
-
60
- ### Step 10A: Section-refinement orchestration mode (default for execution tasks)
61
-
62
- Execution behavior: run this deterministic orchestration for delegated prompt work after explicit launch intent.
63
- Prompt authoring and prompt refinement ownership remain in `/prompt-generator`.
64
-
65
- Use simplified mode when either condition is true:
66
- - The user explicitly requests single-agent execution
67
- - The task is genuinely too small for orchestration (for example, one quick read/search)
68
-
69
- This mode is triggered when execution input includes `pipeline_mode: internal_section_refinement_with_final_audit` or equivalent execution-ready orchestration metadata.
70
- If present, carry forward the scope block (`target_local_roots`, `target_canonical_roots`, `target_file_globs`, `comparison_basis`, `completion_boundary`) so execution remains artifact-bound.
71
-
72
- 1. Spawn exactly 6 refinement agents, one per section in fixed order:
73
- - `role`
74
- - `context`
75
- - `instructions`
76
- - `constraints`
77
- - `output_format`
78
- - `examples`
79
- 2. Enforce section-only scope in each sub-prompt:
80
- - "Edit `<SECTION_NAME>` and preserve all other sections unchanged."
81
- 3. Require section output contract from each agent:
82
- - `improved_block`
83
- - `rationale`
84
- - `concise_diff`
85
- 4. Merge outputs into one canonical prompt after all 6 refiners finish.
86
- 5. Run one final audit agent against the merged prompt and checklist.
87
- 6. If audit fails, apply targeted fixes and re-run audit with capped retries (`max_retries: 2` unless user overrides).
88
-
89
- Run all stages in this exact order.
90
-
91
- ### Step 11: Present for approval (must reflect default orchestration)
92
-
93
- Use AskUserQuestion with one question. The question text must summarize:
94
- - agent config (type, mode, name)
95
- - orchestration mode (`section_refinement_with_final_audit` by default)
96
- - retry cap for audit loop
97
-
98
- Each option should use the `preview` field to show the full crafted prompt.
99
-
100
- Options:
101
- 1. "Launch it" (recommended) -- preview shows the crafted prompt
102
- 2. "Edit first" -- preview shows the prompt with a note that user can provide changes
103
- 3. "Cancel" -- no preview
104
-
105
- ### Step 12: Spawn
106
-
107
- On **"Launch it"**: spawn the Agent tool with the crafted prompt and configuration. Report the agent name so the user knows what's running.
108
-
109
- On **"Edit first"**: present the prompt in conversation text. After the user provides changes, return to step 11 with the updated prompt.
110
-
111
- On **"Cancel"**: acknowledge and stop.
112
-
113
- ## Prompt adjustments for agent execution
114
-
115
- When building the prompt in step 4, these adjustments ensure the agent can work independently:
116
-
117
- **Context completeness** -- include file paths, line numbers, function names, branch state, and anything you learned during step 9. The agent cannot see this conversation.
118
- Bind execution steps to the scope block artifacts passed from refinement output whenever available.
119
- Keep runtime context compact: include only actionable facts required for execution.
120
-
121
- **Acceptance criteria** -- state what "done" looks like. For code: include the test command. For research: specify the output format and save location.
122
-
123
- **Scope boundary** -- include "Make requested changes and keep surrounding code stable" or equivalent. Agents with explicit scope constraints stay aligned to task intent.
124
-
125
- **Constraints from this project** -- if the project has CODE_RULES.md, TDD requirements, or naming conventions, include the relevant subset in the prompt so the agent follows them.
126
-
127
- **Emotion-informed briefing** -- Anthropic's emotion concepts research (2026) found that briefing style causally affects output quality. Frame tasks collaboratively ("work on this together", "help figure out"). Include permission to express uncertainty ("flag anything you're unsure about", "use [PLACEHOLDER] for unverified specifics"). Provide motivation behind constraints ("this ordering ensures tests define behavior before implementation exists"). Share system context proactively (what hooks enforce, what tools are available, what the fallback is) so the agent can incorporate constraints into its plan from the start.
128
-
129
- **Anti-test-fixation** -- For code tasks, include guidance against test-specific solutions. Anthropic: "Implement a solution that works correctly for all valid inputs, not just the test cases. Tests are there to verify correctness, not to define the solution. If the task is unreasonable or infeasible, or if any of the tests are incorrect, please inform me rather than working around them."
130
-
131
- **Commit-and-execute** -- For multi-step agent work, include decision commitment guidance. Anthropic: "When deciding how to approach a problem, choose an approach and commit to it. Avoid revisiting decisions unless you encounter new information that directly contradicts your reasoning."
132
-
133
- **Temp file cleanup** -- If the agent may create scratch files during iteration, include cleanup instructions. Anthropic: "If you create any temporary new files, scripts, or helper files for iteration, clean up these files by removing them at the end of the task."
134
-
135
- ## Final audit-agent stage requirements (for default section-refinement mode)
136
-
137
- After merge, run one dedicated audit agent that validates the full prompt against:
138
-
139
- - Prompt-generator rubric requirements (`packages/claude-dev-env/skills/prompt-generator/SKILL.md`)
140
- - The deterministic checklist from the handoff artifact
141
- - Embedded research-mode evidence constraints below
142
-
143
- Required audit output shape:
144
-
145
- ```json
146
- {
147
- "overall_status": "pass|fail",
148
- "checklist_results": [
149
- {
150
- "check_id": "structured_scoped_instructions",
151
- "status": "pass|fail",
152
- "evidence_quote": "word-for-word quote",
153
- "source_ref": "path-or-url",
154
- "fix_if_fail": "targeted correction"
155
- }
156
- ],
157
- "corrective_edits": ["..."],
158
- "retry_count": 0
159
- }
160
- ```
161
-
162
- ### Embedded research-mode policy text (audit behavior)
163
-
164
- The audit agent must enforce these constraints as policy text in the audit prompt (do not rely on a global mode switch):
165
-
166
- - "Every recommendation, claim, or piece of advice must cite a specific source."
167
- - "Ground your response in word-for-word quotes, not paraphrased summaries."
168
- - "If you don't have a credible source for a claim, say 'I don't know'."
169
- - Source priority:
170
- 1. Official vendor/creator docs for external tools
171
- 2. Local project files for local behavior
172
- 3. Academic or named expert sources
173
- 4. Reputable external sources with URLs
174
- 5. Blogs/community posts (lowest)
175
-
176
- Policy source: `packages/claude-dev-env/skills/prompt-generator/REFINEMENT_PIPELINE_RUNBOOK.md`
177
-
178
- ## Section-refinement acceptance criteria
179
-
180
- Section-refinement orchestration is done only when all are true:
181
-
182
- - All 6 section agents ran, each scoped to exactly one section
183
- - Merge produced one canonical prompt containing all six sections
184
- - Final audit returned `overall_status: pass`
185
- - Any non-pass audit was resolved through targeted revisions within retry cap
186
- - AskUserQuestion approval gate was honored before launch
187
- - Final user artifact includes one complete pasteable prompt block
188
-
189
- ## Constraints
190
-
191
- - Present every launch for approval via AskUserQuestion before spawning
192
- - Always run agents in background
193
- - Gather context before crafting -- do not send an agent in blind
194
- - Start only after explicit user execution intent; keep prompt authoring/refinement in `/prompt-generator`
195
- - Default to `section_refinement_with_final_audit` orchestration for execution tasks unless user requests simplified mode
196
- - Carry scope-block context into execution prompts; native Agent/Task tools have no custom intent metadata
197
- - If the task is too small for an agent (single file read, quick grep), say so and just do it directly
198
- - Include obstacle handling: "When encountering obstacles, do not use destructive actions as a shortcut (e.g. --no-verify, discarding unfamiliar files)" -- agents without this guidance may take irreversible shortcuts
199
- - Frame agent tasks with collaborative language and include permission to express uncertainty — agents produce higher-quality output with collaborative briefing (Anthropic emotion concepts research, 2026)
@@ -1,18 +0,0 @@
1
- # prompt-generator — file map
2
-
3
- Baseline inventory of files in the prompt-generator skill package.
4
-
5
- ## Baseline inventory
6
-
7
- | Path | Role |
8
- | --- | --- |
9
- | `SKILL.md` | Orchestrator rules, subagent contract, compliance audit |
10
- | `TARGET_OUTPUT.md` | User-visible output contract for evals and hooks |
11
- | `REFERENCE.md` | Tiered sources, harness patterns, debug schema |
12
- | `REFINEMENT_PIPELINE_RUNBOOK.md` | Evidence-grounding runbook |
13
- | `evals/prompt-generator.json` | Scenario eval rows |
14
- | `templates/skill-from-ground-up.md` | Net-new skill checkpoint template |
15
- | `templates/skill-refinement-package.md` | Existing-skill refinement template |
16
- | `hooks/blocking/prompt_workflow_validate.py` | Validator CLI (file-based loop) |
17
- | `hooks/blocking/prompt_workflow_gate_core.py` | Fence extraction, markers |
18
- | `hooks/blocking/prompt_workflow_clipboard.py` | Clipboard copy for artifacts |