claude-dev-env 1.11.0 → 1.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,7 @@ from pathlib import Path
11
11
 
12
12
  from prompt_workflow_gate_core import (
13
13
  find_ambiguous_scope_terms,
14
+ find_negative_keywords_in_fenced_xml,
14
15
  has_debug_intent,
15
16
  has_checklist_container,
16
17
  has_internal_object_leak,
@@ -82,6 +83,8 @@ def _check_checklist_container(assistant_message: str) -> dict | None:
82
83
  )
83
84
 
84
85
  def _check_missing_checklist_rows(assistant_message: str) -> dict | None:
86
+ if not has_checklist_container(assistant_message):
87
+ return None
85
88
  missing_rows = missing_checklist_rows(assistant_message)
86
89
  if not missing_rows:
87
90
  return None
@@ -130,15 +133,32 @@ def _check_ambiguous_scope(assistant_message: str) -> dict | None:
130
133
  ),
131
134
  )
132
135
 
136
+ def _check_negative_keywords_in_artifact(assistant_message: str) -> dict | None:
137
+ violations = find_negative_keywords_in_fenced_xml(assistant_message)
138
+ if not violations:
139
+ return None
140
+ violation_descriptions = [
141
+ f" line {each_violation['line_number']}: \"{each_violation['keyword']}\" in: {each_violation['line_text']}"
142
+ for each_violation in violations
143
+ ]
144
+ return _build_block(
145
+ brief_label="retrying: rephrase negative keywords in artifact",
146
+ full_reason=(
147
+ "PROMPT-WORKFLOW GATE: Banned negative keywords found inside fenced XML artifact. "
148
+ "Rephrase as positive directives (what TO do, not what to avoid):\n"
149
+ + "\n".join(violation_descriptions)
150
+ ),
151
+ )
152
+
133
153
  def _evaluate_workflow_gates(assistant_message: str) -> dict | None:
134
154
  if not is_prompt_workflow_response(assistant_message):
135
155
  return None
136
156
  workflow_gate_checks: tuple[Callable[[str], dict | None], ...] = (
137
- _check_checklist_container,
138
157
  _check_missing_checklist_rows,
139
158
  _check_missing_scope_anchors,
140
159
  _check_missing_context_signals,
141
160
  _check_ambiguous_scope,
161
+ _check_negative_keywords_in_artifact,
142
162
  )
143
163
  for check in workflow_gate_checks:
144
164
  block = check(assistant_message)
@@ -29,6 +29,7 @@ REQUIRED_CHECKLIST_ROWS: tuple[str, ...] = (
29
29
  "completion_boundary_measurable",
30
30
  "citation_grounding_policy_present",
31
31
  "source_priority_rules_present",
32
+ "artifact_language_confidence",
32
33
  )
33
34
 
34
35
  REQUIRED_CONTEXT_CONTROL_SIGNALS: tuple[str, ...] = (
@@ -80,6 +81,79 @@ DEBUG_INTENT_MARKERS: tuple[str, ...] = (
80
81
  )
81
82
 
82
83
 
84
+ NEGATIVE_KEYWORDS_IN_ARTIFACT: tuple[str, ...] = (
85
+ "no",
86
+ "not",
87
+ "don't",
88
+ "do not",
89
+ "never",
90
+ "avoid",
91
+ "without",
92
+ "refrain",
93
+ "stop",
94
+ "prevent",
95
+ "exclude",
96
+ "prohibit",
97
+ "forbid",
98
+ "reject",
99
+ "cannot",
100
+ "unless",
101
+ )
102
+
103
+ NEGATIVE_INDIRECT_PATTERNS_IN_ARTIFACT: tuple[str, ...] = (
104
+ r"instead of\s+\w+",
105
+ r"rather than\s+\w+",
106
+ r"as opposed to\s+\w+",
107
+ )
108
+
109
+ COMPILED_NEGATIVE_KEYWORD_PATTERNS: tuple[re.Pattern[str], ...] = tuple(
110
+ re.compile(rf"\b{re.escape(keyword)}\b", re.IGNORECASE)
111
+ for keyword in NEGATIVE_KEYWORDS_IN_ARTIFACT
112
+ )
113
+
114
+ COMPILED_NEGATIVE_INDIRECT_PATTERNS: tuple[re.Pattern[str], ...] = tuple(
115
+ re.compile(pattern, re.IGNORECASE)
116
+ for pattern in NEGATIVE_INDIRECT_PATTERNS_IN_ARTIFACT
117
+ )
118
+
119
+ FENCED_XML_BLOCK_PATTERN: re.Pattern[str] = re.compile(
120
+ r"```xml\s*\n(.*?)```", re.DOTALL
121
+ )
122
+
123
+
124
+ def extract_fenced_xml_content(text: str) -> str:
125
+ all_matches = FENCED_XML_BLOCK_PATTERN.findall(text)
126
+ return "\n".join(all_matches)
127
+
128
+
129
+ def find_negative_keywords_in_fenced_xml(
130
+ text: str,
131
+ ) -> list[dict[str, str | int]]:
132
+ fenced_content = extract_fenced_xml_content(text)
133
+ if not fenced_content:
134
+ return []
135
+ fenced_lines = fenced_content.splitlines()
136
+ all_violations: list[dict[str, str | int]] = []
137
+ for line_index, each_line in enumerate(fenced_lines):
138
+ for each_pattern in COMPILED_NEGATIVE_KEYWORD_PATTERNS:
139
+ each_match = each_pattern.search(each_line)
140
+ if each_match:
141
+ all_violations.append({
142
+ "keyword": each_match.group(),
143
+ "line_number": line_index + 1,
144
+ "line_text": each_line.strip(),
145
+ })
146
+ for each_pattern in COMPILED_NEGATIVE_INDIRECT_PATTERNS:
147
+ each_match = each_pattern.search(each_line)
148
+ if each_match:
149
+ all_violations.append({
150
+ "keyword": each_match.group(),
151
+ "line_number": line_index + 1,
152
+ "line_text": each_line.strip(),
153
+ })
154
+ return all_violations
155
+
156
+
83
157
  def _contains_any_marker(text: str, markers: Iterable[str]) -> bool:
84
158
  lower_text = text.lower()
85
159
  return any(marker.lower() in lower_text for marker in markers)
@@ -5,6 +5,8 @@ import subprocess
5
5
  import sys
6
6
  from pathlib import Path
7
7
 
8
+ import pytest
9
+
8
10
 
9
11
  SCRIPT_PATH = Path(__file__).parent / "prompt-workflow-stop-guard.py"
10
12
 
@@ -34,6 +36,7 @@ def _full_checklist_rows() -> str:
34
36
  "- completion_boundary_measurable\n"
35
37
  "- citation_grounding_policy_present\n"
36
38
  "- source_priority_rules_present\n"
39
+ "- artifact_language_confidence\n"
37
40
  )
38
41
 
39
42
  def test_blocks_internal_object_leak_without_debug_intent() -> None:
@@ -63,7 +66,7 @@ def test_blocks_missing_checklist_rows() -> None:
63
66
  assert response["decision"] == "block"
64
67
  assert "Deterministic checklist rows missing" in response["reason"]
65
68
 
66
- def test_blocks_missing_checklist_container_for_prompt_workflow_output() -> None:
69
+ def test_allows_prompt_workflow_output_without_checklist_container() -> None:
67
70
  payload = {
68
71
  "last_assistant_message": (
69
72
  "overall_status: pass\n"
@@ -77,9 +80,7 @@ def test_blocks_missing_checklist_container_for_prompt_workflow_output() -> None
77
80
  ),
78
81
  }
79
82
  result = _run_hook(payload)
80
- response = json.loads(result.stdout)
81
- assert response["decision"] == "block"
82
- assert "Deterministic checklist container missing" in response["reason"]
83
+ assert result.stdout.strip() == ""
83
84
 
84
85
  def test_blocks_missing_context_control_signals() -> None:
85
86
  payload = {
@@ -117,6 +118,89 @@ def test_blocks_ambiguous_scope_phrasing() -> None:
117
118
  assert response["decision"] == "block"
118
119
  assert "Ambiguous scope phrasing detected" in response["reason"]
119
120
 
121
+ def _build_prompt_workflow_message_with_fenced_xml(fenced_xml_body: str) -> str:
122
+ return (
123
+ "Audit: pass 15/15\n"
124
+ "```xml\n"
125
+ + fenced_xml_body
126
+ + "\n```\n"
127
+ "overall_status: pass\n"
128
+ + _full_checklist_rows()
129
+ + "target_local_roots\n"
130
+ "target_canonical_roots\n"
131
+ "target_file_globs\n"
132
+ "comparison_basis\n"
133
+ "completion_boundary\n"
134
+ "base_minimal_instruction_layer: true\n"
135
+ "on_demand_skill_loading: true\n"
136
+ )
137
+
138
+
139
+ def test_allows_positive_phrasing_inside_fenced_xml() -> None:
140
+ fenced_content = "<instructions>Ensure all functions have explicit return types.</instructions>"
141
+ payload = {
142
+ "last_assistant_message": _build_prompt_workflow_message_with_fenced_xml(fenced_content),
143
+ }
144
+ result = _run_hook(payload)
145
+ assert result.stdout.strip() == ""
146
+
147
+
148
+ BANNED_KEYWORD_TEST_CASES: list[tuple[str, str]] = [
149
+ ("do_not", "<instructions>Do not leave return types implicit.</instructions>"),
150
+ ("avoid", "<instructions>Avoid missing return types.</instructions>"),
151
+ ("never", "<constraints>Never store credentials in plain text.</constraints>"),
152
+ ("without", "<instructions>Deploy without running tests first.</instructions>"),
153
+ ("prevent", "<constraints>Prevent unauthorized access to the API.</constraints>"),
154
+ ("reject", "<constraints>Reject all unsigned commits.</constraints>"),
155
+ ("cannot", "<constraints>The API cannot accept unauthenticated requests.</constraints>"),
156
+ ("unless", "<constraints>Skip the build step unless the user explicitly approves.</constraints>"),
157
+ ("must_not", "<constraints>The script must not produce duplicates.</constraints>"),
158
+ ("must_never", "<constraints>You must never store credentials in environment variables.</constraints>"),
159
+ ("instead_of", "<instructions>Use explicit types instead of implicit ones.</instructions>"),
160
+ ("rather_than", "<constraints>Prefer explicit types rather than inferred ones.</constraints>"),
161
+ ("as_opposed_to", "<instructions>Use Grid as opposed to floats for layout.</instructions>"),
162
+ ]
163
+
164
+
165
+ @pytest.mark.parametrize(
166
+ ("banned_pattern_name", "fenced_xml_content"),
167
+ BANNED_KEYWORD_TEST_CASES,
168
+ ids=[each_case[0] for each_case in BANNED_KEYWORD_TEST_CASES],
169
+ )
170
+ def test_blocks_banned_pattern_inside_fenced_xml(
171
+ banned_pattern_name: str,
172
+ fenced_xml_content: str,
173
+ ) -> None:
174
+ payload = {
175
+ "last_assistant_message": _build_prompt_workflow_message_with_fenced_xml(fenced_xml_content),
176
+ }
177
+ result = _run_hook(payload)
178
+ response = json.loads(result.stdout)
179
+ assert response["decision"] == "block"
180
+
181
+
182
+ def test_permits_negative_keywords_outside_fenced_xml() -> None:
183
+ message = (
184
+ "Audit: pass 15/15\n"
185
+ "Do not skip the audit line.\n"
186
+ "```xml\n"
187
+ "<instructions>Ensure all functions have explicit return types.</instructions>\n"
188
+ "```\n"
189
+ "overall_status: pass\n"
190
+ + _full_checklist_rows()
191
+ + "target_local_roots\n"
192
+ "target_canonical_roots\n"
193
+ "target_file_globs\n"
194
+ "comparison_basis\n"
195
+ "completion_boundary\n"
196
+ "base_minimal_instruction_layer: true\n"
197
+ "on_demand_skill_loading: true\n"
198
+ )
199
+ payload = {"last_assistant_message": message}
200
+ result = _run_hook(payload)
201
+ assert result.stdout.strip() == ""
202
+
203
+
120
204
  def test_allows_fully_structured_prompt_workflow_output() -> None:
121
205
  payload = {
122
206
  "last_assistant_message": (
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-dev-env",
3
- "version": "1.11.0",
3
+ "version": "1.12.1",
4
4
  "description": "Claude Code development standards — rules, hooks, agents, commands, and skills",
5
5
  "type": "module",
6
6
  "bin": {
@@ -84,6 +84,7 @@ Audit report must include all check IDs:
84
84
  - `completion_boundary_measurable`
85
85
  - `citation_grounding_policy_present`
86
86
  - `source_priority_rules_present`
87
+ - `artifact_language_confidence`
87
88
 
88
89
  ## Citation and Grounding Validation
89
90
 
@@ -134,6 +135,8 @@ Validate fail-closed runtime gates:
134
135
  - Block responses that leak raw internal refinement object fields unless debug intent is explicit.
135
136
  - Block responses missing deterministic checklist rows when audit output is present.
136
137
  - Block responses using ambiguous scope phrasing in scope-bound sections.
138
+ - Block responses containing negative keywords (no, not, don't, never, avoid, etc.) inside fenced XML artifacts.
139
+ - Block responses containing hedging language (might be, possibly, I think, etc.) inside fenced XML artifacts.
137
140
 
138
141
  ## Context-Footprint Controls
139
142
 
@@ -150,6 +153,8 @@ Validate fail-closed runtime gates:
150
153
  - Raw internal object leakage without debug intent
151
154
  - Missing required checklist rows in audit output
152
155
  - Ambiguous scope terms in scope-bound text
156
+ - Negative keywords inside fenced XML artifacts
157
+ - Hedging language inside fenced XML artifacts
153
158
  - **Semantic-only (auditor layer):**
154
159
  - Overall quality/readability of scope wording beyond banned-term checks
155
160
  - Whether instruction binding quality is "good enough" beyond explicit anchor presence
@@ -23,13 +23,13 @@ description: >-
23
23
 
24
24
  **Eval contract:** The user-visible behavior this skill must satisfy is defined in `packages/claude-dev-env/skills/prompt-generator/TARGET_OUTPUT.md`. Automated evals live in `packages/claude-dev-env/skills/prompt-generator/evals/prompt-generator.json`.
25
25
 
26
- **Terminology:** **Prompt artifact** — the full XML inside the single user-facing `xml` fence (the paste-ready handoff). **Scope block** — the five-key contract in §3A that grounds instructions. **Default refinement pipeline** — §10: base draft → section refine → merge → 14-row compliance audit → capped fixes (subagent-internal unless draft-only). **Light self-check** — §8: fast pre-return sanity pass (shape, tools, scope, patterns); *not* the compliance audit. **Compliance audit (14-row)** — §11: hook-keyed rows that set the `Audit: pass|fail` numerator. **Execution handoff** — `/agent-prompt` after explicit user intent to run work.
26
+ **Terminology:** **Prompt artifact** — the full XML inside the single user-facing `xml` fence (the paste-ready handoff). **Scope block** — the five-key contract in §3A that grounds instructions. **Default refinement pipeline** — §10: base draft → section refine → merge → 15-row compliance audit → capped fixes (subagent-internal unless draft-only). **Light self-check** — §8: fast pre-return sanity pass (shape, tools, scope, patterns); *not* the compliance audit. **Compliance audit (15-row)** — §11: hook-keyed rows that set the `Audit: pass|fail` numerator. **Execution handoff** — `/agent-prompt` after explicit user intent to run work.
27
27
 
28
28
  **Hook-survival invariant (read first):** The fenced XML artifact is the primary deliverable and MUST survive Stop-hook retries. If a Stop hook rejects the response, only the surrounding audit summary and runtime signal scaffolding may change between retries—the XML inside the fence MUST be re-emitted in full on every retry. Recovery pattern: re-emit the complete fenced XML first, then adjust the audit line. Trimming, summarizing, or deferring the prompt artifact to satisfy a hook gate is forbidden.
29
29
 
30
30
  **Turn shape:** Each orchestrator turn is either **AskUserQuestion** only (then wait for answers), or **`Audit: …` + exactly one `xml` fenced block** (then **send boundary**)—per `TARGET_OUTPUT.md`. Do not substitute free-form question paragraphs for AskUserQuestion; do not append commentary after the closing fence on the default path.
31
31
 
32
- **Happy path:** (1) Choose scenario **1–4** from the router table. (2) Run discovery when that scenario calls for repo tools. (3) Collect answers through **AskUserQuestion** (one form per round, **2–4** options per field, recommended first). (4) Subagent produces XML, runs **light self-check**, then **14-row compliance audit** + refinement loop. (5) Orchestrator prints **`Audit: pass 14/14`** or **`Audit: fail N/14 — [reason]`** and the **complete fenced XML**. (6) **Send boundary:** end the message immediately after the closing fence. (7) If the user names a debug phrase, append the full table / JSON per `TARGET_OUTPUT.md`.
32
+ **Happy path:** (1) Choose scenario **1–4** from the router table. (2) Run discovery when that scenario calls for repo tools. (3) Collect answers through **AskUserQuestion** (one form per round, **2–4** options per field, recommended first). (4) Subagent produces XML, runs **light self-check**, then **15-row compliance audit** + refinement loop. (5) Orchestrator prints **`Audit: pass 15/15`** or **`Audit: fail N/15 — [reason]`** and the **complete fenced XML**. (6) **Send boundary:** end the message immediately after the closing fence. (7) If the user names a debug phrase, append the full table / JSON per `TARGET_OUTPUT.md`.
33
33
 
34
34
  **Clarity bar:** Ship concrete, outcome-first copy everywhere (AskUserQuestion fields, audit line, XML body): name *what* to do, *where* it applies, and *how* to verify done—per [Be clear and direct](https://platform.claude.com/docs/en/build-with-claude/prompt-engineering/claude-prompting-best-practices#be-clear-and-direct) and [Control the format of responses](https://platform.claude.com/docs/en/build-with-claude/prompt-engineering/claude-prompting-best-practices#control-the-format-of-responses). This skill **authors** prompts; downstream execution stays out of the default path until `/agent-prompt`.
35
35
 
@@ -39,7 +39,7 @@ description: >-
39
39
 
40
40
  **Hook-survival invariant:** Treat the fenced XML as the immutable payload for the user. On every Stop-hook retry, print the **same full** XML between the opening and closing fences; adjust only the one-line audit prefix (or other non-fence scaffolding) if a hook requires a format tweak. Re-emit the **entire** XML body before tweaking surrounding text—never shorten the artifact to pass a gate.
41
41
 
42
- **Orchestrator vs subagent:** The **orchestrator** runs ordered discovery, issues **AskUserQuestion**, and owns the **final** user-visible line: audit + fence. The **subagent** owns base draft, per-section refinement, merge, and the **14-row compliance audit**, returning **only** final XML plus pass/fail counts (no user-facing table)—unless the user asked for **draft-only** / **no refinement**, in which case you may draft inline with the same output shape. Keep hook retries internal; expose at most one short line such as `Retrying: scope anchor missing` before the successful audit + fence.
42
+ **Orchestrator vs subagent:** The **orchestrator** runs ordered discovery, issues **AskUserQuestion**, and owns the **final** user-visible line: audit + fence. The **subagent** owns base draft, per-section refinement, merge, and the **15-row compliance audit**, returning **only** final XML plus pass/fail counts (no user-facing table)—unless the user asked for **draft-only** / **no refinement**, in which case you may draft inline with the same output shape. Keep hook retries internal; expose at most one short line such as `Retrying: scope anchor missing` before the successful audit + fence.
43
43
 
44
44
  **Interaction shape:** Route clarifications through **AskUserQuestion** only. Close each successful artifact turn with **audit line + one fenced XML block**; keep implementation plans **inside** that XML for the downstream consumer, not as a chat to-do list.
45
45
 
@@ -49,7 +49,7 @@ Match `TARGET_OUTPUT.md`. Summary:
49
49
 
50
50
  1. **Questions:** Use **AskUserQuestion** for every clarification (one multi-field form per round); keep normal assistant text free of standalone question paragraphs.
51
51
  2. **Options:** Supply **2–4** options per question, **recommended option first**; label discovery-sourced choices **`[discovered]`**.
52
- 3. **Final message (exactly):** Line 1 = `Audit: pass 14/14` or `Audit: fail N/14 — [short reason]`; immediately after, output **one** Markdown code fence whose language tag is `xml` and whose body is the **complete** prompt; **send boundary** = right after that fence closes—the visible message is exactly those two consecutive blocks, copy-ready together, before any later user message.
52
+ 3. **Final message (exactly):** Line 1 = `Audit: pass 15/15` or `Audit: fail N/15 — [short reason]`; immediately after, output **one** Markdown code fence whose language tag is `xml` and whose body is the **complete** prompt; **send boundary** = right after that fence closes—the visible message is exactly those two consecutive blocks, copy-ready together, before any later user message.
53
53
  4. **Full audit table / JSON debug object:** Append only after the user uses an explicit debug phrase such as `show debug`, `full audit table`, or `raw internal object`.
54
54
  5. **Commit-and-execute:** Pick a drafting approach, run it to completion, ship the XML; change plans only when **new** facts from the user or tools contradict the earlier scope.
55
55
 
@@ -94,9 +94,9 @@ Issue **one** AskUserQuestion with all fields populated from discovery and the u
94
94
  Spawn a **subagent** (Agent tool) with:
95
95
 
96
96
  - Scenario id (1–4), user goal, discovery summary, AskUserQuestion answers
97
- - Instruction: produce **one** well-formed XML prompt (required sections) + run the internal refinement loop and **14-row compliance audit**; return **only** the final XML string and a pass/fail + fail count for that audit (no user-facing table)
97
+ - Instruction: produce **one** well-formed XML prompt (required sections) + run the internal refinement loop and **15-row compliance audit**; return **only** the final XML string and a pass/fail + fail count for that audit (no user-facing table)
98
98
 
99
- The orchestrator then prints **`Audit: pass 14/14`** or **`Audit: fail N/14 — [reason]`** immediately followed by the fenced XML. Keep subagent reasoning in the Agent transcript; the user-facing turn contains **only** audit + artifact.
99
+ The orchestrator then prints **`Audit: pass 15/15`** or **`Audit: fail N/15 — [reason]`** immediately followed by the fenced XML. Keep subagent reasoning in the Agent transcript; the user-facing turn contains **only** audit + artifact.
100
100
 
101
101
  **Draft-only:** If the user explicitly requests no refinement (“quick draft”, “no refinement loop”), the subagent may skip Steps 10–12 below but must still return valid XML and a honest audit line.
102
102
 
@@ -140,7 +140,7 @@ Apply principles from Anthropic’s prompting guide (see REFERENCE.md): XML sect
140
140
 
141
141
  **Add motivation behind constraints** in `<context>`. Anthropic: "Providing context or motivation behind your instructions... can help Claude better understand your goals and deliver more targeted responses." Claude generalizes from the explanation.
142
142
 
143
- **Frame positively (zero-negative-keyword rule).** Anthropic: state the desired outcome directly. "Your response should be composed of smoothly flowing prose paragraphs" provides clearer guidance than a prohibition-only instruction. Apply this rule absolutely inside the fenced XML artifact across all sections (`<role>`, `<context>`, `<instructions>`, `<constraints>`, `<output_format>`): every instruction states what to do, what to produce, what to enforce. Use affirmative directives exclusively: "only X", "always X", "ensure X", "require X." Banned keywords inside generated XML: "no", "not", "don't", "do not", "never", "avoid", "without", "refrain", "stop", "prevent", "exclude", "prohibit", "forbid", "reject." Also banned: indirect negative patterns such as "instead of X", "rather than X", "as opposed to." Example pass: "Ensure all functions have explicit return types." Example fail: "Do not leave return types implicit." When a boundary is needed, phrase it as what is permitted: "only run commands within the scoped paths" rather than a prohibition.
143
+ **Frame positively (zero-negative-keyword rule).** Anthropic: state the desired outcome directly. "Your response should be composed of smoothly flowing prose paragraphs" provides clearer guidance than a prohibition-only instruction. Apply this rule absolutely inside the fenced XML artifact across all sections (`<role>`, `<context>`, `<instructions>`, `<constraints>`, `<output_format>`): every instruction states what to do, what to produce, what to enforce. Use affirmative directives exclusively: "only X", "always X", "ensure X", "require X." Banned keywords inside generated XML: "no", "not", "don't", "do not", "never", "avoid", "without", "refrain", "stop", "prevent", "exclude", "prohibit", "forbid", "reject", "cannot", "unless." Also banned: indirect negative patterns such as "instead of X", "rather than X", "as opposed to." Example pass: "Ensure all functions have explicit return types." Example fail: "Do not leave return types implicit." When a boundary is needed, phrase it as what is permitted: "only run commands within the scoped paths" rather than a prohibition.
144
144
 
145
145
  **Emotion-informed framing.** Anthropic's emotion concepts research (2026) shows that internal activation patterns causally influence output quality. Apply: explicit success criteria with "say so if you're unsure" as an accepted answer; collaborative language ("help figure out", "work on this together"); framing tasks as interesting problems rather than chores; constructive, forward-looking tone. Cross-model caveat: studied on Sonnet 4.5; the patterns align with Anthropic's prompting best practices independently. Full pattern catalog and citations: `packages/claude-dev-env/docs/emotion-informed-prompt-design.md`.
146
146
 
@@ -170,7 +170,7 @@ For format- or tone-sensitive **generated** prompts, include 3–5 `<example>` b
170
170
 
171
171
  ### 8. Light self-check (subagent, pre-return)
172
172
 
173
- **Two-tier validation — tier 1:** Before the subagent returns XML, run a quick pass on output shape, tool phrasing, scope anchors, and safety / research / agentic patterns as applicable (see REFERENCE.md and patterns below). This **light self-check** is not interchangeable with the **14-row compliance audit** in §11; tier 2 supplies the hook-keyed pass/fail counts for the `Audit:` line.
173
+ **Two-tier validation — tier 1:** Before the subagent returns XML, run a quick pass on output shape, tool phrasing, scope anchors, and safety / research / agentic patterns as applicable (see REFERENCE.md and patterns below). This **light self-check** is not interchangeable with the **15-row compliance audit** in §11; tier 2 supplies the hook-keyed pass/fail counts for the `Audit:` line.
174
174
 
175
175
  Expand the light self-check with this internal checklist when useful:
176
176
 
@@ -191,10 +191,10 @@ Expand the light self-check with this internal checklist when useful:
191
191
  The orchestrator’s **only** delivery to the user is:
192
192
 
193
193
  ```text
194
- Audit: pass 14/14
194
+ Audit: pass 15/15
195
195
  ```
196
196
 
197
- (or `fail N/14 — …`), immediately followed by **one** fenced XML block; **send boundary** is immediately after the closing fence so the user receives a copy-ready pair (audit line + artifact) in one assistant message before the conversation continues.
197
+ (or `fail N/15 — …`), immediately followed by **one** fenced XML block; **send boundary** is immediately after the closing fence so the user receives a copy-ready pair (audit line + artifact) in one assistant message before the conversation continues.
198
198
 
199
199
  ### 10. Default refinement mode (subagent-internal)
200
200
 
@@ -203,14 +203,14 @@ For non-trivial requests, run inside the drafting subagent (use **draft-only** w
203
203
  1. Base draft
204
204
  2. Section refinement in order: `role`, `context`, `instructions`, `constraints`, `output_format`, `examples` (examples optional if unused)
205
205
  3. Merge to one canonical XML prompt
206
- 4. Final **14-row compliance audit** pass/fail with evidence (internal)
206
+ 4. Final **15-row compliance audit** pass/fail with evidence (internal)
207
207
  5. If fail: targeted fixes + capped re-audit rounds
208
208
 
209
209
  Required section list is immutable for this pipeline: `role`, `context`, `instructions`, `constraints`, `output_format`, `examples`.
210
210
 
211
- ### 11. Compliance audit — 14-row checklist (internal, audit numerator)
211
+ ### 11. Compliance audit — 15-row checklist (internal, audit numerator)
212
212
 
213
- **Two-tier validation — tier 2:** The `14` in `Audit: pass 14/14` counts these **compliance** rows (stable ids for hooks). Tier 1 is the **light self-check** in §8—keep the steps separate so models do not merge them.
213
+ **Two-tier validation — tier 2:** The `15` in `Audit: pass 15/15` counts these **compliance** rows (stable ids for hooks). Tier 1 is the **light self-check** in §8—keep the steps separate so models do not merge them.
214
214
 
215
215
  | # | Row name |
216
216
  |---|----------|
@@ -228,6 +228,7 @@ Required section list is immutable for this pipeline: `role`, `context`, `instru
228
228
  | 12 | completion_boundary_measurable |
229
229
  | 13 | citation_grounding_policy_present |
230
230
  | 14 | source_priority_rules_present |
231
+ | 15 | artifact_language_confidence |
231
232
 
232
233
  For each row, maintain `status`, `evidence_quote`, `source_ref`, and `fix_if_fail` internally (see **REFERENCE.md** debug schema). A debug-path markdown table surfaces `status` and a one-phrase evidence summary. **Default user-visible path:** omit this table; **debug path:** after phrases like `show debug` or `full audit table`, print the table plus evidence snippets.
233
234
 
@@ -9,7 +9,7 @@ This file is the **target output spec** for eval-driven iteration of the `prompt
9
9
  - **Clarity bar:** Every deliverable (AskUserQuestion fields, audit line, XML body) states concrete outcomes, explicit formats, and checkable done-when signals—aligned with Anthropic [Be clear and direct](https://platform.claude.com/docs/en/build-with-claude/prompt-engineering/claude-prompting-best-practices#be-clear-and-direct) and [Control the format of responses](https://platform.claude.com/docs/en/build-with-claude/prompt-engineering/claude-prompting-best-practices#control-the-format-of-responses). Prefer what to do and how to verify it over empty prohibitions or vague quality adjectives.
10
10
  - **Questions:** Deliver every clarifying question through **AskUserQuestion** (one form per round), with **2–4** options per question and the **recommended** option listed **first**. Tag discovery-sourced options **`[discovered]`** when they came from repo search.
11
11
  - **Final assistant message (complete handoff in one send):**
12
- 1. **Audit line:** `Audit: pass 14/14` or `Audit: fail N/14 — [reason]`
12
+ 1. **Audit line:** `Audit: pass 15/15` or `Audit: fail N/15 — [reason]`
13
13
  2. **Artifact:** the full XML prompt inside **one** Markdown code fence whose language tag is `xml`
14
14
  3. **Send boundary:** stop typing as soon as the closing fence ends—the message body is exactly those two blocks back-to-back, ready to copy; your next tokens belong to the user’s following turn
15
15
  - **Full audit table / JSON debug bundle:** Stay internal until the user names debug with a phrase such as `show debug`, `full audit table`, or `raw internal object`; then append the table/JSON after the usual audit line + XML fence.
@@ -99,6 +99,6 @@ Include at least:
99
99
 
100
100
  Add `<examples>` when format or tone is easy to misunderstand; nest sections when the task has natural hierarchy.
101
101
 
102
- ## Internal 14-row compliance checklist (audit numerator)
102
+ ## Internal 15-row compliance checklist (audit numerator)
103
103
 
104
- The `14` in `Audit: pass 14/14` maps to the named rows in `SKILL.md` (§11 **Compliance audit — 14-row checklist**), including `reversible_action_and_safety_check_guidance` and `scope_terms_explicit_and_anchored`. **Default user path:** keep the table internal; print the expanded table + JSON only after an explicit debug request. On failure, set the audit line to `Audit: fail N/14 — [primary theme]` where the theme names one concrete gap (e.g. `scope_block missing completion_boundary`, `output_format lacks acceptance checks`).
104
+ The `15` in `Audit: pass 15/15` maps to the named rows in `SKILL.md` (§11 **Compliance audit — 15-row checklist**), including `reversible_action_and_safety_check_guidance` and `scope_terms_explicit_and_anchored`. **Default user path:** keep the table internal; print the expanded table + JSON only after an explicit debug request. On failure, set the audit line to `Audit: fail N/15 — [primary theme]` where the theme names one concrete gap (e.g. `scope_block missing completion_boundary`, `output_format lacks acceptance checks`).