claude-dev-env 1.17.1 → 1.17.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,339 @@
1
+ """Tests for prompt_workflow_validate module (shared validator + CLI entry point)."""
2
+
3
+ import subprocess
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ import pytest
8
+
9
+ from prompt_workflow_validate import ValidationResult, validate_prompt_workflow
10
+
11
+ VALIDATOR_MODULE_PATH = Path(__file__).parent / "prompt_workflow_validate.py"
12
+
13
+
14
+ def _full_checklist_rows() -> str:
15
+ return (
16
+ "checklist_results:\n"
17
+ "- structured_scoped_instructions\n"
18
+ "- sequential_steps_present\n"
19
+ "- positive_framing\n"
20
+ "- acceptance_criteria_defined\n"
21
+ "- safety_reversibility_language\n"
22
+ "- reversible_action_and_safety_check_guidance\n"
23
+ "- concrete_output_contract\n"
24
+ "- scope_boundary_present\n"
25
+ "- explicit_scope_anchors_present\n"
26
+ "- all_instructions_artifact_bound\n"
27
+ "- scope_terms_explicit_and_anchored\n"
28
+ "- completion_boundary_measurable\n"
29
+ "- citation_grounding_policy_present\n"
30
+ "- source_priority_rules_present\n"
31
+ "- artifact_language_confidence\n"
32
+ )
33
+
34
+
35
+ def _wrap_five_section_scaffold(inner_body: str) -> str:
36
+ has_instructions = "<instructions>" in inner_body
37
+ has_constraints = "<constraints>" in inner_body
38
+ instructions_section = (
39
+ "" if has_instructions else "<instructions>Test instructions sentence one.</instructions>\n"
40
+ )
41
+ constraints_section = (
42
+ "" if has_constraints else "<constraints>Test constraints sentence one.</constraints>\n"
43
+ )
44
+ return (
45
+ "<role>Test role sentence one.</role>\n"
46
+ "<background>Test background sentence one.</background>\n"
47
+ f"{instructions_section}"
48
+ f"{inner_body}\n"
49
+ f"{constraints_section}"
50
+ "<output_format>Test output format sentence one.</output_format>\n"
51
+ )
52
+
53
+
54
+ def _build_prompt_workflow_message_with_fenced_xml(fenced_xml_body: str) -> str:
55
+ return (
56
+ "Audit: pass 15/15\n"
57
+ "```xml\n" + fenced_xml_body + "\n```\n"
58
+ "overall_status: pass\n" + _full_checklist_rows() + "target_local_roots\n"
59
+ "target_canonical_roots\n"
60
+ "target_file_globs\n"
61
+ "comparison_basis\n"
62
+ "completion_boundary\n"
63
+ "base_minimal_instruction_layer: true\n"
64
+ "on_demand_skill_loading: true\n"
65
+ )
66
+
67
+
68
+ class TestValidatePromptWorkflowFunction:
69
+ """Tests that exercise the shared validate_prompt_workflow function directly."""
70
+
71
+ def test_allowed_complete_message_with_fenced_xml(self) -> None:
72
+ fenced_content = _wrap_five_section_scaffold(
73
+ "<instructions>Ensure all functions have explicit return types.</instructions>"
74
+ )
75
+ message = _build_prompt_workflow_message_with_fenced_xml(fenced_content)
76
+ validation_result = validate_prompt_workflow(message)
77
+ assert validation_result.allowed is True
78
+ assert validation_result.reasons == ()
79
+
80
+ def test_blocked_missing_context_control_lines(self) -> None:
81
+ message = (
82
+ "overall_status: pass\n"
83
+ + _full_checklist_rows()
84
+ + "target_local_roots\n"
85
+ + "target_canonical_roots\n"
86
+ + "target_file_globs\n"
87
+ + "comparison_basis\n"
88
+ + "completion_boundary\n"
89
+ )
90
+ validation_result = validate_prompt_workflow(message)
91
+ assert validation_result.allowed is False
92
+ assert "missing_context_signals" in validation_result.reason_codes
93
+ assert any(
94
+ "context-control" in each_message
95
+ for each_message in validation_result.reason_messages
96
+ )
97
+
98
+ def test_allowed_empty_message(self) -> None:
99
+ validation_result = validate_prompt_workflow("")
100
+ assert validation_result.allowed is True
101
+
102
+ def test_allowed_non_workflow_message(self) -> None:
103
+ validation_result = validate_prompt_workflow("Just a regular response.")
104
+ assert validation_result.allowed is True
105
+
106
+ def test_blocked_internal_object_leak(self) -> None:
107
+ leak_message = (
108
+ '{"pipeline_mode": "internal_section_refinement_with_final_audit"}'
109
+ )
110
+ validation_result = validate_prompt_workflow(leak_message)
111
+ assert validation_result.allowed is False
112
+ assert "internal_object_leak" in validation_result.reason_codes
113
+
114
+ def test_allowed_internal_object_with_debug_context(self) -> None:
115
+ leak_message = (
116
+ '{"pipeline_mode": "internal_section_refinement_with_final_audit"}'
117
+ )
118
+ validation_result = validate_prompt_workflow(
119
+ leak_message,
120
+ user_context="debug: show internal pipeline object",
121
+ )
122
+ assert validation_result.allowed is True
123
+
124
+ def test_blocked_missing_checklist_rows(self) -> None:
125
+ message = (
126
+ "overall_status: pass\n"
127
+ "checklist_results: structured_scoped_instructions\n"
128
+ "target_local_roots\n"
129
+ "target_canonical_roots\n"
130
+ "target_file_globs\n"
131
+ "comparison_basis\n"
132
+ "completion_boundary\n"
133
+ )
134
+ validation_result = validate_prompt_workflow(message)
135
+ assert validation_result.allowed is False
136
+ assert "missing_checklist_rows" in validation_result.reason_codes
137
+
138
+ def test_blocked_negative_keywords_in_fenced_xml(self) -> None:
139
+ fenced_content = _wrap_five_section_scaffold(
140
+ "<instructions>Do not leave return types implicit.</instructions>"
141
+ )
142
+ message = _build_prompt_workflow_message_with_fenced_xml(fenced_content)
143
+ validation_result = validate_prompt_workflow(message)
144
+ assert validation_result.allowed is False
145
+ assert "negative_keywords_in_artifact" in validation_result.reason_codes
146
+
147
+ def test_blocked_ambiguous_scope(self) -> None:
148
+ message = (
149
+ "overall_status: pass\n"
150
+ + _full_checklist_rows()
151
+ + "scope block includes target_local_roots target_canonical_roots "
152
+ + "target_file_globs comparison_basis completion_boundary "
153
+ + "base_minimal_instruction_layer: true\n"
154
+ + "on_demand_skill_loading: true\n"
155
+ + "and applies to this session."
156
+ )
157
+ validation_result = validate_prompt_workflow(message)
158
+ assert validation_result.allowed is False
159
+ assert "ambiguous_scope" in validation_result.reason_codes
160
+
161
+ def test_reason_messages_property(self) -> None:
162
+ message = (
163
+ "overall_status: pass\n"
164
+ + _full_checklist_rows()
165
+ + "target_local_roots\n"
166
+ + "target_canonical_roots\n"
167
+ + "target_file_globs\n"
168
+ + "comparison_basis\n"
169
+ + "completion_boundary\n"
170
+ )
171
+ validation_result = validate_prompt_workflow(message)
172
+ assert len(validation_result.reason_messages) == 1
173
+ assert len(validation_result.reason_codes) == 1
174
+
175
+ def test_blocked_missing_scope_anchors(self) -> None:
176
+ message = (
177
+ "overall_status: pass\n"
178
+ + _full_checklist_rows()
179
+ + "base_minimal_instruction_layer: true\n"
180
+ + "on_demand_skill_loading: true\n"
181
+ )
182
+ validation_result = validate_prompt_workflow(message)
183
+ assert validation_result.allowed is False
184
+ assert "missing_scope_anchors" in validation_result.reason_codes
185
+
186
+ def test_blocked_missing_xml_sections_in_fenced_artifact(self) -> None:
187
+ fenced_body = (
188
+ "<role>Test role sentence one.</role>\n"
189
+ "<instructions>Test instructions sentence one.</instructions>\n"
190
+ "<constraints>Test constraints sentence one.</constraints>\n"
191
+ "<output_format>Test output format sentence one.</output_format>\n"
192
+ )
193
+ message = _build_prompt_workflow_message_with_fenced_xml(fenced_body)
194
+ validation_result = validate_prompt_workflow(message)
195
+ assert validation_result.allowed is False
196
+ assert "missing_xml_sections" in validation_result.reason_codes
197
+ assert any(
198
+ "background" in each_message
199
+ for each_message in validation_result.reason_messages
200
+ )
201
+
202
+ def test_allows_positive_phrasing_inside_fenced_xml(self) -> None:
203
+ fenced_content = _wrap_five_section_scaffold(
204
+ "<instructions>Ensure all functions have explicit return types.</instructions>"
205
+ )
206
+ message = _build_prompt_workflow_message_with_fenced_xml(fenced_content)
207
+ validation_result = validate_prompt_workflow(message)
208
+ assert validation_result.allowed is True
209
+
210
+ def test_permits_negative_keywords_outside_fenced_xml(self) -> None:
211
+ fenced_inner = _wrap_five_section_scaffold(
212
+ "<instructions>Ensure all functions have explicit return types.</instructions>"
213
+ )
214
+ message = (
215
+ "Audit: pass 15/15\n"
216
+ "Do not skip the audit line.\n"
217
+ "```xml\n" + fenced_inner + "\n```\n"
218
+ "overall_status: pass\n" + _full_checklist_rows() + "target_local_roots\n"
219
+ "target_canonical_roots\n"
220
+ "target_file_globs\n"
221
+ "comparison_basis\n"
222
+ "completion_boundary\n"
223
+ "base_minimal_instruction_layer: true\n"
224
+ "on_demand_skill_loading: true\n"
225
+ )
226
+ validation_result = validate_prompt_workflow(message)
227
+ assert validation_result.allowed is True
228
+
229
+
230
+ @pytest.mark.parametrize(
231
+ ("banned_pattern_name", "fenced_xml_content"),
232
+ [
233
+ ("do_not", "<instructions>Do not leave return types implicit.</instructions>"),
234
+ ("avoid", "<instructions>Avoid missing return types.</instructions>"),
235
+ ("never", "<constraints>Never store credentials in plain text.</constraints>"),
236
+ ("without", "<instructions>Deploy without running tests first.</instructions>"),
237
+ ("prevent", "<constraints>Prevent unauthorized access to the API.</constraints>"),
238
+ ("reject", "<constraints>Reject all unsigned commits.</constraints>"),
239
+ ("cannot", "<constraints>The API cannot accept unauthenticated requests.</constraints>"),
240
+ ("unless", "<constraints>Skip the build step unless the user explicitly approves.</constraints>"),
241
+ ("must_not", "<constraints>The script must not produce duplicates.</constraints>"),
242
+ ("must_never", "<constraints>You must never store credentials in environment variables.</constraints>"),
243
+ ("instead_of", "<instructions>Use explicit types instead of implicit ones.</instructions>"),
244
+ ("rather_than", "<constraints>Prefer explicit types rather than inferred ones.</constraints>"),
245
+ ("as_opposed_to", "<instructions>Use Grid as opposed to floats for layout.</instructions>"),
246
+ ],
247
+ )
248
+ def test_blocks_banned_pattern_inside_fenced_xml(
249
+ banned_pattern_name: str,
250
+ fenced_xml_content: str,
251
+ ) -> None:
252
+ message = _build_prompt_workflow_message_with_fenced_xml(
253
+ _wrap_five_section_scaffold(fenced_xml_content)
254
+ )
255
+ validation_result = validate_prompt_workflow(message)
256
+ assert validation_result.allowed is False
257
+ assert "negative_keywords_in_artifact" in validation_result.reason_codes
258
+
259
+
260
+ class TestValidatorCli:
261
+ """Tests that exercise the CLI entry point via subprocess."""
262
+
263
+ def test_cli_exits_zero_for_valid_content(self, tmp_path: Path) -> None:
264
+ fenced_content = _wrap_five_section_scaffold(
265
+ "<instructions>Ensure all functions have explicit return types.</instructions>"
266
+ )
267
+ draft_file = tmp_path / "draft.xml"
268
+ draft_file.write_text(
269
+ _build_prompt_workflow_message_with_fenced_xml(fenced_content),
270
+ encoding="utf-8",
271
+ )
272
+ completed_process = subprocess.run(
273
+ [sys.executable, str(VALIDATOR_MODULE_PATH), str(draft_file)],
274
+ capture_output=True,
275
+ text=True,
276
+ check=False,
277
+ )
278
+ assert completed_process.returncode == 0
279
+ assert completed_process.stderr.strip() == ""
280
+
281
+ def test_cli_exits_two_with_bracketed_reason_code_on_stderr(
282
+ self,
283
+ tmp_path: Path,
284
+ ) -> None:
285
+ message = (
286
+ "overall_status: pass\n"
287
+ + _full_checklist_rows()
288
+ + "target_local_roots\n"
289
+ + "target_canonical_roots\n"
290
+ + "target_file_globs\n"
291
+ + "comparison_basis\n"
292
+ + "completion_boundary\n"
293
+ )
294
+ draft_file = tmp_path / "draft.xml"
295
+ draft_file.write_text(message, encoding="utf-8")
296
+ completed_process = subprocess.run(
297
+ [sys.executable, str(VALIDATOR_MODULE_PATH), str(draft_file)],
298
+ capture_output=True,
299
+ text=True,
300
+ check=False,
301
+ )
302
+ assert completed_process.returncode == 2
303
+ assert "[missing_context_signals]" in completed_process.stderr
304
+
305
+ def test_cli_stderr_format_uses_reason_code_prefix(
306
+ self,
307
+ tmp_path: Path,
308
+ ) -> None:
309
+ fenced_content = _wrap_five_section_scaffold(
310
+ "<instructions>Do not leave return types implicit.</instructions>"
311
+ )
312
+ draft_file = tmp_path / "draft.xml"
313
+ draft_file.write_text(
314
+ _build_prompt_workflow_message_with_fenced_xml(fenced_content),
315
+ encoding="utf-8",
316
+ )
317
+ completed_process = subprocess.run(
318
+ [sys.executable, str(VALIDATOR_MODULE_PATH), str(draft_file)],
319
+ capture_output=True,
320
+ text=True,
321
+ check=False,
322
+ )
323
+ assert completed_process.returncode == 2
324
+ assert "[negative_keywords_in_artifact]" in completed_process.stderr
325
+ assert "Banned negative keywords" in completed_process.stderr
326
+
327
+ def test_cli_reads_from_stdin_when_no_file_argument(self) -> None:
328
+ fenced_content = _wrap_five_section_scaffold(
329
+ "<instructions>Ensure all functions have explicit return types.</instructions>"
330
+ )
331
+ valid_message = _build_prompt_workflow_message_with_fenced_xml(fenced_content)
332
+ completed_process = subprocess.run(
333
+ [sys.executable, str(VALIDATOR_MODULE_PATH)],
334
+ input=valid_message,
335
+ capture_output=True,
336
+ text=True,
337
+ check=False,
338
+ )
339
+ assert completed_process.returncode == 0
package/hooks/hooks.json CHANGED
@@ -150,11 +150,6 @@
150
150
  "type": "command",
151
151
  "command": "python3 ${CLAUDE_PLUGIN_ROOT}/hooks/blocking/hedging-language-blocker.py",
152
152
  "timeout": 10
153
- },
154
- {
155
- "type": "command",
156
- "command": "python3 ${CLAUDE_PLUGIN_ROOT}/hooks/blocking/prompt-workflow-stop-guard.py",
157
- "timeout": 10
158
153
  }
159
154
  ]
160
155
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-dev-env",
3
- "version": "1.17.1",
3
+ "version": "1.17.5",
4
4
  "description": "Claude Code development standards — rules, hooks, agents, commands, and skills",
5
5
  "type": "module",
6
6
  "bin": {
@@ -22,6 +22,9 @@
22
22
  "tdd",
23
23
  "code-quality"
24
24
  ],
25
+ "dependencies": {
26
+ "@jl-cmd/prompt-generator": "^1.0.0"
27
+ },
25
28
  "license": "MIT",
26
29
  "repository": {
27
30
  "type": "git",
@@ -13,5 +13,6 @@ Baseline inventory of files in the prompt-generator skill package.
13
13
  | `evals/prompt-generator.json` | Scenario eval rows |
14
14
  | `templates/skill-from-ground-up.md` | Net-new skill checkpoint template |
15
15
  | `templates/skill-refinement-package.md` | Existing-skill refinement template |
16
- | `hooks/blocking/prompt-workflow-stop-guard.py` | Stop gate + clipboard |
16
+ | `hooks/blocking/prompt_workflow_validate.py` | Validator CLI (file-based loop) |
17
17
  | `hooks/blocking/prompt_workflow_gate_core.py` | Fence extraction, markers |
18
+ | `hooks/blocking/prompt_workflow_clipboard.py` | Clipboard copy for artifacts |
@@ -7,8 +7,8 @@ When authoring or refining prompts, ground decisions in these sources. If guidan
7
7
  ### Tier 1: Anthropic (primary authority for Claude)
8
8
 
9
9
  - https://platform.claude.com/docs/en/build-with-claude/prompt-engineering/overview -- overview, links to all sub-guides
10
- - https://platform.claude.com/docs/en/build-with-claude/prompt-engineering/claude-prompting-best-practices -- the single living reference for Claude's latest models. Covers general principles, XML tags, prefill deprecation, tool use, thinking, agentic systems, overeagerness, evidence-grounding and citing sources before strong claims.
11
- - https://transformer-circuits.pub/2026/emotions/index.html -- emotion concepts research (April 2026): 171 internal activation patterns that causally influence behavior. Key prompt-engineering takeaways: clear criteria and escape routes improve output quality, collaborative framing activates engagement, positive task framing correlates with better results, inviting transparency produces more reliable output. Cross-model caveat: studied on Sonnet 4.5; patterns align with best practices independently.
10
+ - https://platform.claude.com/docs/en/build-with-claude/prompt-engineering/claude-prompting-best-practices -- the single living reference for Claude's latest models.
11
+ - https://transformer-circuits.pub/2026/emotions/index.html -- emotion concepts research (April 2026). Key takeaways: clear criteria and escape routes, collaborative framing, positive task framing, inviting transparency. Full catalog: `packages/claude-dev-env/docs/emotion-informed-prompt-design.md`.
12
12
  - https://www.anthropic.com/research/emotion-concepts-function -- blog summary of the above paper.
13
13
  - https://platform.claude.com/docs/en/build-with-claude/adaptive-thinking -- adaptive thinking reference; replaces manual budget_tokens with effort-based control.
14
14
  - https://claude.com/blog/harnessing-claudes-intelligence -- harness evolution: primitives Claude already knows, what to stop doing in the harness, deliberate boundaries (context economics, caching, typed tools). Local inventory: `docs/references/anthropic-harnessing-claudes-intelligence-technique-inventory.md`.
@@ -37,7 +37,11 @@ When authoring or refining prompts, ground decisions in these sources. If guidan
37
37
 
38
38
  ### Conflict resolution rule
39
39
 
40
- If sources disagree on a technique, apply in order: Anthropic documentation first (it describes the actual model behavior), then OpenAI/Google/Microsoft (large-scale research with cross-model relevance), then community sources (patterns and intuition, not authoritative on model internals). When Tier 3 contradicts Tier 1, Tier 1 wins without exception.
40
+ If sources disagree, apply tier order: Anthropic first, then OpenAI/Google/Microsoft, then community. Tier 1 wins when conflicting with lower tiers.
41
+
42
+ ### Outcome preview gate and digest (`prompt-generator`)
43
+
44
+ See SKILL.md §§107-115 (Phases 4-5) and `TARGET_OUTPUT.md` for the full contract. **Clipboard safety:** `extract_fenced_xml_content` concatenates every ` ```xml ` block—follow §7 sample formatting so clipboard copy stays the lone artifact body.
41
45
 
42
46
  ### Outcome preview gate and digest (`prompt-generator`)
43
47
 
@@ -74,7 +78,7 @@ Jump from concept to the platform specs the post names:
74
78
 
75
79
  ### Prompt caching (Hook 6)
76
80
 
77
- The [Messages API](https://platform.claude.com/docs/en/build-with-claude/working-with-messages) is stateless—re-supply prior actions, tool definitions, and instructions each turn. Maximize [prompt caching](https://platform.claude.com/docs/en/build-with-claude/prompt-caching) hits: **stable prefix first, dynamic tail last**; **append** new content via **messages** instead of rewriting the cached prompt; **avoid mid-session model switches** (caches are model-specific—use a **subagent** for a cheaper model); **treat the tool list as part of the cached prefix** and avoid churn; use **tool search** so dynamic discovery **appends** without invalidating the prefix; for multi-turn agents, **advance breakpoints** toward the latest message (**auto-caching**). Cached input tokens are priced at **10% of base input** per [pricing](https://platform.claude.com/docs/en/about-claude/pricing).
81
+ The Messages API is stateless. Maximize [prompt caching](https://platform.claude.com/docs/en/build-with-claude/prompt-caching): **stable prefix first, dynamic tail last**; **append** via messages; **avoid mid-session model switches** (use a subagent for cheaper models); **treat tool list as cached prefix**; use **tool search** to append without invalidation; **advance breakpoints** toward the latest message. Cached tokens cost **10% of base input**.
78
82
 
79
83
  ### Typed tools vs bash strings (Hook 7)
80
84
 
@@ -179,12 +183,6 @@ Search for this information in a structured way. As you gather data, develop sev
179
183
  </research_approach>
180
184
  ```
181
185
 
182
- Key elements:
183
- - Define clear **success criteria** for the research question
184
- - Encourage **source verification** across multiple sources
185
- - Track **competing hypotheses** with confidence levels
186
- - **Self-critique** approach and plan regularly
187
-
188
186
  ## Evaluation loop
189
187
 
190
188
  For prompt drafts that must hold up over time:
@@ -209,7 +207,7 @@ When deciding how to approach a problem, choose an approach and commit to it. Av
209
207
 
210
208
  ## Debug JSON schema (prompt-generator pipeline)
211
209
 
212
- Use **only** when the user explicitly requests debug output (for example `show debug`, `full audit table`, `raw internal object`). Default assistant turns complete the normal handoff first: **audit line** + one `xml` fence + **`## Outcome digest`** + optional hook validation block (defined in SKILL.md Terminology; see also `TARGET_OUTPUT.md`); this JSON object is an optional appendix **after** that handoff (and after any hook validation block).
210
+ Use **only** when the user explicitly requests debug output (for example `show debug`, `full audit table`, `raw internal object`). Default assistant turns complete the normal handoff first: one `xml` fence + **`## Outcome digest`** (see also `TARGET_OUTPUT.md`); this JSON object is an optional appendix **after** that handoff.
213
211
 
214
212
  Shape (field names stable for internal audit helpers and Stop-hook leak detection):
215
213