claude-dev-env 1.15.0 → 1.16.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/skill-writer-agent.md +3 -3
- package/hooks/HOOK_SPECS_PROMPT_WORKFLOW.md +6 -0
- package/hooks/blocking/prompt_workflow_gate_core.py +174 -56
- package/hooks/blocking/test_prompt_workflow_gate_core.py +84 -13
- package/package.json +1 -1
- package/skills/prompt-generator/REFERENCE.md +2 -2
- package/skills/prompt-generator/REFINEMENT_PIPELINE_RUNBOOK.md +4 -4
- package/skills/prompt-generator/SKILL.md +24 -14
- package/skills/prompt-generator/TARGET_OUTPUT.md +11 -9
- package/skills/prompt-generator/evals/prompt-generator.json +25 -13
- package/skills/skill-builder/SKILL.md +87 -0
- package/skills/skill-builder/references/delegation-map.md +151 -0
- package/skills/skill-builder/templates/gap-analysis.md +41 -0
- package/skills/skill-builder/workflows/improve-skill.md +88 -0
- package/skills/skill-builder/workflows/new-skill.md +223 -0
- package/skills/skill-builder/workflows/polish-skill.md +83 -0
|
@@ -268,7 +268,7 @@ You are a specialized agent that orchestrates the skill-writer skill for creatin
|
|
|
268
268
|
## Examples
|
|
269
269
|
|
|
270
270
|
<example>
|
|
271
|
-
<
|
|
271
|
+
<background>User wants to create a simple skill for git commit message formatting (single file, straightforward).</background>
|
|
272
272
|
|
|
273
273
|
<user>
|
|
274
274
|
Create a skill for writing good git commit messages following conventional commits.
|
|
@@ -303,7 +303,7 @@ This is straightforward enough for the skill to handle directly without needing
|
|
|
303
303
|
</example>
|
|
304
304
|
|
|
305
305
|
<example>
|
|
306
|
-
<
|
|
306
|
+
<background>User needs comprehensive web automation skill with TDD patterns, helper scripts, and team deployment.</background>
|
|
307
307
|
|
|
308
308
|
<user>
|
|
309
309
|
We need a comprehensive skill for web automation development. It should include:
|
|
@@ -378,7 +378,7 @@ Once I have these details, I'll create the complete package with progressive dis
|
|
|
378
378
|
</example>
|
|
379
379
|
|
|
380
380
|
<example>
|
|
381
|
-
<
|
|
381
|
+
<background>User asks about allowed-tools frontmatter field.</background>
|
|
382
382
|
|
|
383
383
|
<user>
|
|
384
384
|
When should I use the allowed-tools field in SKILL.md frontmatter?
|
|
@@ -54,3 +54,9 @@ These two signals are runtime-checked by the Stop guard whenever a prompt-workfl
|
|
|
54
54
|
## Deterministic Boundary
|
|
55
55
|
|
|
56
56
|
These hooks enforce only structural/runtime checks. Semantic quality remains in auditor layer.
|
|
57
|
+
|
|
58
|
+
## Reviewing Flattened Transcript Exports
|
|
59
|
+
|
|
60
|
+
- Live prompt-workflow responses still require an explicit `Audit:` line plus one outer `xml` fence. The Stop guard and clipboard path continue to evaluate that literal boundary.
|
|
61
|
+
- Saved transcript exports can flatten blocked retry turns and omit the outer fence lines. Normalize those files with `prompt_workflow_gate_core.normalize_prompt_workflow_export(...)`, then evaluate the rebuilt message with `extract_fenced_xml_content(...)` or `extract_fenced_xml_content_from_export(...)`.
|
|
62
|
+
- Fence-relative evals review the **last successful Audit + artifact pair** after normalization. Earlier blocked retries in the flattened transcript remain diagnostic evidence and do not count as extra delivered artifacts.
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
6
|
import re
|
|
7
|
+
import textwrap
|
|
7
8
|
from typing import Iterable
|
|
8
9
|
|
|
9
10
|
from prompt_workflow_gate_config import (
|
|
@@ -18,25 +19,57 @@ from prompt_workflow_gate_config import (
|
|
|
18
19
|
REQUIRED_XML_SECTIONS,
|
|
19
20
|
)
|
|
20
21
|
|
|
22
|
+
TRIPLE_BACKTICK = "```"
|
|
23
|
+
AUDIT_LINE_PATTERN = re.compile(r"^\s*[●•]?\s*(Audit:\s*.+?)\s*$")
|
|
21
24
|
|
|
22
25
|
def _line_opens_xml_fence(line: str) -> bool:
|
|
23
26
|
stripped = line.strip()
|
|
24
|
-
if not stripped.startswith(
|
|
27
|
+
if not stripped.startswith(TRIPLE_BACKTICK):
|
|
25
28
|
return False
|
|
26
|
-
|
|
29
|
+
fence_marker_length = len(TRIPLE_BACKTICK)
|
|
30
|
+
remainder = stripped[fence_marker_length:].strip()
|
|
27
31
|
return remainder == "xml" or remainder.startswith("xml ")
|
|
28
32
|
|
|
29
|
-
|
|
30
33
|
def _line_is_bare_fence_close(line: str) -> bool:
|
|
31
|
-
return line.strip() ==
|
|
32
|
-
|
|
34
|
+
return line.strip() == TRIPLE_BACKTICK
|
|
33
35
|
|
|
34
36
|
def _line_opens_inner_markdown_fence(line: str) -> bool:
|
|
35
37
|
stripped = line.strip()
|
|
36
|
-
if not stripped.startswith(
|
|
38
|
+
if not stripped.startswith(TRIPLE_BACKTICK):
|
|
37
39
|
return False
|
|
38
|
-
return stripped !=
|
|
39
|
-
|
|
40
|
+
return stripped != TRIPLE_BACKTICK
|
|
41
|
+
|
|
42
|
+
def _collect_inner_markdown_fence(
|
|
43
|
+
lines: list[str],
|
|
44
|
+
start_index: int,
|
|
45
|
+
) -> tuple[list[str], int]:
|
|
46
|
+
inner_lines: list[str] = []
|
|
47
|
+
index = start_index
|
|
48
|
+
while index < len(lines):
|
|
49
|
+
current_line = lines[index]
|
|
50
|
+
inner_lines.append(current_line)
|
|
51
|
+
index += 1
|
|
52
|
+
if _line_is_bare_fence_close(current_line):
|
|
53
|
+
break
|
|
54
|
+
return inner_lines, index
|
|
55
|
+
|
|
56
|
+
def _collect_xml_fence_body(
|
|
57
|
+
lines: list[str],
|
|
58
|
+
start_index: int,
|
|
59
|
+
) -> tuple[list[str], int]:
|
|
60
|
+
body_lines: list[str] = []
|
|
61
|
+
index = start_index
|
|
62
|
+
while index < len(lines):
|
|
63
|
+
current_line = lines[index]
|
|
64
|
+
if _line_is_bare_fence_close(current_line):
|
|
65
|
+
return body_lines, index + 1
|
|
66
|
+
if _line_opens_inner_markdown_fence(current_line):
|
|
67
|
+
inner_lines, index = _collect_inner_markdown_fence(lines, index)
|
|
68
|
+
body_lines.extend(inner_lines)
|
|
69
|
+
continue
|
|
70
|
+
body_lines.append(current_line)
|
|
71
|
+
index += 1
|
|
72
|
+
return body_lines, index
|
|
40
73
|
|
|
41
74
|
def extract_fenced_xml_content(text: str) -> str:
|
|
42
75
|
"""Extract bodies of ```xml fenced blocks.
|
|
@@ -50,31 +83,104 @@ def extract_fenced_xml_content(text: str) -> str:
|
|
|
50
83
|
lines = text.splitlines()
|
|
51
84
|
index = 0
|
|
52
85
|
while index < len(lines):
|
|
53
|
-
if _line_opens_xml_fence(lines[index]):
|
|
86
|
+
if not _line_opens_xml_fence(lines[index]):
|
|
54
87
|
index += 1
|
|
55
|
-
body_lines: list[str] = []
|
|
56
|
-
while index < len(lines):
|
|
57
|
-
line = lines[index]
|
|
58
|
-
if _line_is_bare_fence_close(line):
|
|
59
|
-
index += 1
|
|
60
|
-
break
|
|
61
|
-
if _line_opens_inner_markdown_fence(line):
|
|
62
|
-
body_lines.append(line)
|
|
63
|
-
index += 1
|
|
64
|
-
while index < len(lines):
|
|
65
|
-
inner_line = lines[index]
|
|
66
|
-
body_lines.append(inner_line)
|
|
67
|
-
index += 1
|
|
68
|
-
if _line_is_bare_fence_close(inner_line):
|
|
69
|
-
break
|
|
70
|
-
continue
|
|
71
|
-
body_lines.append(line)
|
|
72
|
-
index += 1
|
|
73
|
-
results.append("\n".join(body_lines))
|
|
74
88
|
continue
|
|
75
|
-
index
|
|
89
|
+
body_lines, index = _collect_xml_fence_body(lines, index + 1)
|
|
90
|
+
results.append("\n".join(body_lines))
|
|
76
91
|
return "\n".join(results)
|
|
77
92
|
|
|
93
|
+
def _line_is_audit_line(line: str) -> bool:
|
|
94
|
+
return AUDIT_LINE_PATTERN.match(line) is not None
|
|
95
|
+
|
|
96
|
+
def _normalize_audit_line(line: str) -> str:
|
|
97
|
+
match = AUDIT_LINE_PATTERN.match(line)
|
|
98
|
+
if match:
|
|
99
|
+
return match.group(1).strip()
|
|
100
|
+
return line.strip()
|
|
101
|
+
|
|
102
|
+
def _line_starts_exported_artifact(line: str) -> bool:
|
|
103
|
+
stripped = line.strip()
|
|
104
|
+
if not stripped:
|
|
105
|
+
return False
|
|
106
|
+
if _line_opens_xml_fence(stripped):
|
|
107
|
+
return True
|
|
108
|
+
exported_artifact_pattern = re.compile(
|
|
109
|
+
r"^<(\?xml\b|prompt\b|runtime_context\b|role\b|background\b|instructions\b|constraints\b|output_format\b|illustrations\b|open_question\b)",
|
|
110
|
+
)
|
|
111
|
+
return exported_artifact_pattern.match(stripped) is not None
|
|
112
|
+
|
|
113
|
+
def _trim_trailing_blank_lines(lines: list[str]) -> list[str]:
|
|
114
|
+
trimmed = list(lines)
|
|
115
|
+
while trimmed and not trimmed[-1].strip():
|
|
116
|
+
trimmed.pop()
|
|
117
|
+
return trimmed
|
|
118
|
+
|
|
119
|
+
def _trim_flattened_export_tail(lines: list[str]) -> list[str]:
|
|
120
|
+
trimmed = _trim_trailing_blank_lines(lines)
|
|
121
|
+
while trimmed and trimmed[-1].lstrip().startswith("✻ "):
|
|
122
|
+
trimmed.pop()
|
|
123
|
+
trimmed = _trim_trailing_blank_lines(trimmed)
|
|
124
|
+
return trimmed
|
|
125
|
+
|
|
126
|
+
def _find_last_audit_index(lines: list[str]) -> int | None:
|
|
127
|
+
last_audit_index: int | None = None
|
|
128
|
+
for index, line in enumerate(lines):
|
|
129
|
+
if _line_is_audit_line(line):
|
|
130
|
+
last_audit_index = index
|
|
131
|
+
return last_audit_index
|
|
132
|
+
|
|
133
|
+
def _find_first_artifact_index(lines: list[str]) -> int | None:
|
|
134
|
+
for index, line in enumerate(lines):
|
|
135
|
+
if _line_starts_exported_artifact(line):
|
|
136
|
+
return index
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
def _rebuild_from_existing_fence(audit_line: str, artifact_text: str) -> str:
|
|
140
|
+
fenced_body = extract_fenced_xml_content(artifact_text).strip()
|
|
141
|
+
if not fenced_body:
|
|
142
|
+
return audit_line
|
|
143
|
+
return f"{audit_line}\n```xml\n{fenced_body}\n```"
|
|
144
|
+
|
|
145
|
+
def _rebuild_from_flattened_body(audit_line: str, artifact_text: str) -> str:
|
|
146
|
+
dedented_body = textwrap.dedent(artifact_text).strip("\n")
|
|
147
|
+
if not dedented_body:
|
|
148
|
+
return audit_line
|
|
149
|
+
return f"{audit_line}\n```xml\n{dedented_body}\n```"
|
|
150
|
+
|
|
151
|
+
def _rebuild_canonical_export(audit_line: str, artifact_lines: list[str]) -> str:
|
|
152
|
+
if not artifact_lines:
|
|
153
|
+
return audit_line
|
|
154
|
+
artifact_text = "\n".join(artifact_lines).rstrip()
|
|
155
|
+
if _line_opens_xml_fence(artifact_lines[0]):
|
|
156
|
+
return _rebuild_from_existing_fence(audit_line, artifact_text)
|
|
157
|
+
return _rebuild_from_flattened_body(audit_line, artifact_text)
|
|
158
|
+
|
|
159
|
+
def normalize_prompt_workflow_export(text: str) -> str:
|
|
160
|
+
"""Return the last successful Audit + fenced XML pair from a message or export.
|
|
161
|
+
|
|
162
|
+
Saved transcript exports can flatten blocked retry turns and strip the outer
|
|
163
|
+
``xml`` fence. This helper keeps only the last successful ``Audit:`` attempt
|
|
164
|
+
and rebuilds the canonical audit-plus-fence shape used by prompt-workflow
|
|
165
|
+
hooks and reviewers.
|
|
166
|
+
"""
|
|
167
|
+
lines = text.splitlines()
|
|
168
|
+
last_audit_index = _find_last_audit_index(lines)
|
|
169
|
+
if last_audit_index is None:
|
|
170
|
+
return text.strip()
|
|
171
|
+
audit_line = _normalize_audit_line(lines[last_audit_index])
|
|
172
|
+
artifact_index = _find_first_artifact_index(lines[last_audit_index + 1 :])
|
|
173
|
+
if artifact_index is None:
|
|
174
|
+
return audit_line
|
|
175
|
+
artifact_lines = _trim_flattened_export_tail(
|
|
176
|
+
lines[last_audit_index + 1 + artifact_index :],
|
|
177
|
+
)
|
|
178
|
+
return _rebuild_canonical_export(audit_line, artifact_lines)
|
|
179
|
+
|
|
180
|
+
def extract_fenced_xml_content_from_export(text: str) -> str:
|
|
181
|
+
"""Extract fenced XML from a canonical message or flattened transcript export."""
|
|
182
|
+
normalized = normalize_prompt_workflow_export(text)
|
|
183
|
+
return extract_fenced_xml_content(normalized)
|
|
78
184
|
|
|
79
185
|
def missing_required_xml_sections(text: str) -> list[str]:
|
|
80
186
|
fenced_body = extract_fenced_xml_content(text)
|
|
@@ -88,6 +194,30 @@ def missing_required_xml_sections(text: str) -> list[str]:
|
|
|
88
194
|
missing_sections.append(section_name)
|
|
89
195
|
return missing_sections
|
|
90
196
|
|
|
197
|
+
def _build_negative_keyword_violation(
|
|
198
|
+
match: re.Match[str],
|
|
199
|
+
line_number: int,
|
|
200
|
+
line_text: str,
|
|
201
|
+
) -> dict[str, str | int]:
|
|
202
|
+
return {
|
|
203
|
+
"keyword": match.group(),
|
|
204
|
+
"line_number": line_number,
|
|
205
|
+
"line_text": line_text.strip(),
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
def _find_pattern_violations(
|
|
209
|
+
patterns: Iterable[re.Pattern[str]],
|
|
210
|
+
line_text: str,
|
|
211
|
+
line_number: int,
|
|
212
|
+
) -> list[dict[str, str | int]]:
|
|
213
|
+
violations: list[dict[str, str | int]] = []
|
|
214
|
+
for pattern in patterns:
|
|
215
|
+
match = pattern.search(line_text)
|
|
216
|
+
if match:
|
|
217
|
+
violations.append(
|
|
218
|
+
_build_negative_keyword_violation(match, line_number, line_text),
|
|
219
|
+
)
|
|
220
|
+
return violations
|
|
91
221
|
|
|
92
222
|
def find_negative_keywords_in_fenced_xml(
|
|
93
223
|
text: str,
|
|
@@ -95,45 +225,37 @@ def find_negative_keywords_in_fenced_xml(
|
|
|
95
225
|
fenced_content = extract_fenced_xml_content(text)
|
|
96
226
|
if not fenced_content:
|
|
97
227
|
return []
|
|
98
|
-
fenced_lines = fenced_content.splitlines()
|
|
99
228
|
all_violations: list[dict[str, str | int]] = []
|
|
100
|
-
for line_index, each_line in enumerate(
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
"line_text": each_line.strip(),
|
|
116
|
-
})
|
|
229
|
+
for line_index, each_line in enumerate(fenced_content.splitlines(), start=1):
|
|
230
|
+
all_violations.extend(
|
|
231
|
+
_find_pattern_violations(
|
|
232
|
+
COMPILED_NEGATIVE_KEYWORD_PATTERNS,
|
|
233
|
+
each_line,
|
|
234
|
+
line_index,
|
|
235
|
+
),
|
|
236
|
+
)
|
|
237
|
+
all_violations.extend(
|
|
238
|
+
_find_pattern_violations(
|
|
239
|
+
COMPILED_NEGATIVE_INDIRECT_PATTERNS,
|
|
240
|
+
each_line,
|
|
241
|
+
line_index,
|
|
242
|
+
),
|
|
243
|
+
)
|
|
117
244
|
return all_violations
|
|
118
245
|
|
|
119
|
-
|
|
120
246
|
def _contains_any_marker(text: str, markers: Iterable[str]) -> bool:
|
|
121
247
|
lower_text = text.lower()
|
|
122
248
|
return any(marker.lower() in lower_text for marker in markers)
|
|
123
249
|
|
|
124
|
-
|
|
125
250
|
def has_debug_intent(text: str) -> bool:
|
|
126
251
|
return _contains_any_marker(text, DEBUG_INTENT_MARKERS)
|
|
127
252
|
|
|
128
|
-
|
|
129
253
|
def has_internal_object_leak(text: str) -> bool:
|
|
130
254
|
return _contains_any_marker(text, INTERNAL_OBJECT_MARKERS)
|
|
131
255
|
|
|
132
|
-
|
|
133
256
|
def missing_scope_anchors(text: str) -> list[str]:
|
|
134
257
|
return [anchor for anchor in REQUIRED_SCOPE_ANCHORS if anchor not in text]
|
|
135
258
|
|
|
136
|
-
|
|
137
259
|
def find_ambiguous_scope_terms(text: str) -> list[str]:
|
|
138
260
|
if "scope" not in text.lower():
|
|
139
261
|
return []
|
|
@@ -144,16 +266,13 @@ def find_ambiguous_scope_terms(text: str) -> list[str]:
|
|
|
144
266
|
matches.append(term)
|
|
145
267
|
return matches
|
|
146
268
|
|
|
147
|
-
|
|
148
269
|
def has_checklist_container(text: str) -> bool:
|
|
149
270
|
lower_text = text.lower()
|
|
150
271
|
return "checklist_results" in lower_text or "checklist:" in lower_text
|
|
151
272
|
|
|
152
|
-
|
|
153
273
|
def missing_checklist_rows(text: str) -> list[str]:
|
|
154
274
|
return [row for row in REQUIRED_CHECKLIST_ROWS if row not in text]
|
|
155
275
|
|
|
156
|
-
|
|
157
276
|
def is_prompt_workflow_response(text: str) -> bool:
|
|
158
277
|
lower_text = text.lower()
|
|
159
278
|
matched_markers = [
|
|
@@ -161,7 +280,6 @@ def is_prompt_workflow_response(text: str) -> bool:
|
|
|
161
280
|
]
|
|
162
281
|
return len(matched_markers) >= 2
|
|
163
282
|
|
|
164
|
-
|
|
165
283
|
def missing_context_control_signals(text: str) -> list[str]:
|
|
166
284
|
required_signals: tuple[str, ...] = (
|
|
167
285
|
"base_minimal_instruction_layer: true",
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from prompt_workflow_gate_core import (
|
|
4
4
|
extract_fenced_xml_content,
|
|
5
|
+
extract_fenced_xml_content_from_export,
|
|
5
6
|
find_ambiguous_scope_terms,
|
|
6
7
|
has_checklist_container,
|
|
7
8
|
has_internal_object_leak,
|
|
@@ -10,31 +11,27 @@ from prompt_workflow_gate_core import (
|
|
|
10
11
|
missing_checklist_rows,
|
|
11
12
|
missing_required_xml_sections,
|
|
12
13
|
missing_scope_anchors,
|
|
14
|
+
normalize_prompt_workflow_export,
|
|
13
15
|
)
|
|
14
16
|
|
|
15
|
-
|
|
16
17
|
def test_internal_object_leak_detected() -> None:
|
|
17
18
|
text = '{"pipeline_mode": "internal_section_refinement_with_final_audit"}'
|
|
18
19
|
assert has_internal_object_leak(text)
|
|
19
20
|
|
|
20
|
-
|
|
21
21
|
def test_missing_scope_anchors_returns_expected_rows() -> None:
|
|
22
22
|
text = "target_local_roots only."
|
|
23
23
|
missing = missing_scope_anchors(text)
|
|
24
24
|
assert "target_canonical_roots" in missing
|
|
25
25
|
assert "completion_boundary" in missing
|
|
26
26
|
|
|
27
|
-
|
|
28
27
|
def test_missing_checklist_rows_detected() -> None:
|
|
29
28
|
text = "checklist_results: structured_scoped_instructions only"
|
|
30
29
|
missing = missing_checklist_rows(text)
|
|
31
30
|
assert "completion_boundary_measurable" in missing
|
|
32
31
|
|
|
33
|
-
|
|
34
32
|
def test_checklist_container_detection() -> None:
|
|
35
33
|
assert has_checklist_container("checklist_results:\n- structured_scoped_instructions")
|
|
36
34
|
|
|
37
|
-
|
|
38
35
|
def test_prompt_workflow_response_detection() -> None:
|
|
39
36
|
message = (
|
|
40
37
|
"overall_status: pass\n"
|
|
@@ -43,22 +40,36 @@ def test_prompt_workflow_response_detection() -> None:
|
|
|
43
40
|
)
|
|
44
41
|
assert is_prompt_workflow_response(message)
|
|
45
42
|
|
|
46
|
-
|
|
47
43
|
def test_missing_context_control_signals_detected() -> None:
|
|
48
44
|
missing = missing_context_control_signals("base_minimal_instruction_layer: true")
|
|
49
45
|
assert "on_demand_skill_loading: true" in missing
|
|
50
46
|
|
|
51
|
-
|
|
52
47
|
def test_ambiguous_scope_terms_detected() -> None:
|
|
53
48
|
text = "Scope applies to this session and current files."
|
|
54
49
|
terms = find_ambiguous_scope_terms(text)
|
|
55
50
|
assert "this session" in terms
|
|
56
51
|
assert "current files" in terms
|
|
57
52
|
|
|
58
|
-
|
|
59
53
|
def _fenced_xml(body: str) -> str:
|
|
60
54
|
return f"```xml\n{body}\n```"
|
|
61
55
|
|
|
56
|
+
def _runtime_context_lines() -> tuple[str, ...]:
|
|
57
|
+
return (
|
|
58
|
+
"<runtime_context>",
|
|
59
|
+
"base_minimal_instruction_layer: true",
|
|
60
|
+
"on_demand_skill_loading: true",
|
|
61
|
+
"</runtime_context>",
|
|
62
|
+
"",
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
def _flattened_transcript(*lines: str) -> str:
|
|
66
|
+
return "\n".join(lines) + "\n"
|
|
67
|
+
|
|
68
|
+
def _flattened_attempt(*body_lines: str, audit_line: str = "Audit: pass 15/15") -> str:
|
|
69
|
+
flattened_lines = [audit_line, ""]
|
|
70
|
+
for line in body_lines:
|
|
71
|
+
flattened_lines.append(f" {line}" if line else "")
|
|
72
|
+
return "\n".join(flattened_lines)
|
|
62
73
|
|
|
63
74
|
def test_missing_required_xml_sections_all_present_returns_empty() -> None:
|
|
64
75
|
body = (
|
|
@@ -70,7 +81,6 @@ def test_missing_required_xml_sections_all_present_returns_empty() -> None:
|
|
|
70
81
|
)
|
|
71
82
|
assert missing_required_xml_sections(_fenced_xml(body)) == []
|
|
72
83
|
|
|
73
|
-
|
|
74
84
|
def test_missing_required_xml_sections_missing_background() -> None:
|
|
75
85
|
body = (
|
|
76
86
|
"<role>R.</role>\n"
|
|
@@ -80,7 +90,6 @@ def test_missing_required_xml_sections_missing_background() -> None:
|
|
|
80
90
|
)
|
|
81
91
|
assert missing_required_xml_sections(_fenced_xml(body)) == ["background"]
|
|
82
92
|
|
|
83
|
-
|
|
84
93
|
def test_missing_required_xml_sections_missing_role_and_output_format() -> None:
|
|
85
94
|
body = (
|
|
86
95
|
"<background>C.</background>\n"
|
|
@@ -90,11 +99,9 @@ def test_missing_required_xml_sections_missing_role_and_output_format() -> None:
|
|
|
90
99
|
missing = missing_required_xml_sections(_fenced_xml(body))
|
|
91
100
|
assert missing == ["role", "output_format"]
|
|
92
101
|
|
|
93
|
-
|
|
94
102
|
def test_missing_required_xml_sections_no_fence_returns_empty() -> None:
|
|
95
103
|
assert missing_required_xml_sections("no fenced xml here") == []
|
|
96
104
|
|
|
97
|
-
|
|
98
105
|
def test_missing_required_xml_sections_prose_without_tags_counts_as_missing() -> None:
|
|
99
106
|
body = (
|
|
100
107
|
"<role>R.</role>\n"
|
|
@@ -105,7 +112,6 @@ def test_missing_required_xml_sections_prose_without_tags_counts_as_missing() ->
|
|
|
105
112
|
)
|
|
106
113
|
assert missing_required_xml_sections(_fenced_xml(body)) == ["background"]
|
|
107
114
|
|
|
108
|
-
|
|
109
115
|
def test_extract_fenced_xml_preserves_content_after_nested_inner_fence() -> None:
|
|
110
116
|
message = (
|
|
111
117
|
"```xml\n"
|
|
@@ -122,3 +128,68 @@ def test_extract_fenced_xml_preserves_content_after_nested_inner_fence() -> None
|
|
|
122
128
|
extracted = extract_fenced_xml_content(message)
|
|
123
129
|
assert "</illustrations>" in extracted
|
|
124
130
|
assert "<background>B</background>" in extracted
|
|
131
|
+
|
|
132
|
+
def test_normalize_prompt_workflow_export_rebuilds_fence_from_flattened_transcript() -> None:
|
|
133
|
+
transcript = _flattened_transcript(
|
|
134
|
+
_flattened_attempt(
|
|
135
|
+
*_runtime_context_lines(),
|
|
136
|
+
"<role>R</role>",
|
|
137
|
+
"<background>B</background>",
|
|
138
|
+
"<instructions>I</instructions>",
|
|
139
|
+
"<constraints>C</constraints>",
|
|
140
|
+
"<output_format>O</output_format>",
|
|
141
|
+
"✻ Worked for 1m 7s",
|
|
142
|
+
audit_line="● Audit: pass 15/15",
|
|
143
|
+
),
|
|
144
|
+
)
|
|
145
|
+
normalized = normalize_prompt_workflow_export(transcript)
|
|
146
|
+
assert normalized.startswith("Audit: pass 15/15\n```xml\n")
|
|
147
|
+
assert normalized.endswith("\n```")
|
|
148
|
+
assert "<runtime_context>" in normalized
|
|
149
|
+
assert "✻ Worked for 1m 7s" not in normalized
|
|
150
|
+
|
|
151
|
+
def test_normalize_prompt_workflow_export_uses_last_audit_attempt() -> None:
|
|
152
|
+
first_attempt = _flattened_attempt(
|
|
153
|
+
"<role>FIRST</role>",
|
|
154
|
+
"<background>Old</background>",
|
|
155
|
+
"<instructions>Old</instructions>",
|
|
156
|
+
"<constraints>Old</constraints>",
|
|
157
|
+
"<output_format>Old</output_format>",
|
|
158
|
+
audit_line="● Audit: pass 15/15",
|
|
159
|
+
)
|
|
160
|
+
second_attempt = _flattened_attempt(
|
|
161
|
+
*_runtime_context_lines(),
|
|
162
|
+
"<role>FINAL</role>",
|
|
163
|
+
"<background>Fresh</background>",
|
|
164
|
+
"<instructions>I</instructions>",
|
|
165
|
+
"<constraints>C</constraints>",
|
|
166
|
+
"<output_format>O</output_format>",
|
|
167
|
+
"✻ Worked for 2m 8s",
|
|
168
|
+
)
|
|
169
|
+
transcript = _flattened_transcript(
|
|
170
|
+
first_attempt,
|
|
171
|
+
"",
|
|
172
|
+
"● Re-emitting the full artifact with the runtime signals added.",
|
|
173
|
+
"",
|
|
174
|
+
second_attempt,
|
|
175
|
+
)
|
|
176
|
+
normalized = normalize_prompt_workflow_export(transcript)
|
|
177
|
+
assert "<role>FINAL</role>" in normalized
|
|
178
|
+
assert "<role>FIRST</role>" not in normalized
|
|
179
|
+
|
|
180
|
+
def test_extract_fenced_xml_content_from_export_supports_flattened_transcript() -> None:
|
|
181
|
+
transcript = _flattened_transcript(
|
|
182
|
+
_flattened_attempt(
|
|
183
|
+
"<role>R</role>",
|
|
184
|
+
"<background>B</background>",
|
|
185
|
+
"<instructions>I</instructions>",
|
|
186
|
+
"<constraints>C</constraints>",
|
|
187
|
+
"<output_format>O</output_format>",
|
|
188
|
+
"✻ Worked for 31s",
|
|
189
|
+
audit_line="● Audit: pass 15/15",
|
|
190
|
+
),
|
|
191
|
+
)
|
|
192
|
+
extracted = extract_fenced_xml_content_from_export(transcript)
|
|
193
|
+
assert extracted.startswith("<role>R</role>")
|
|
194
|
+
assert "<output_format>O</output_format>" in extracted
|
|
195
|
+
assert "Worked for" not in extracted
|
package/package.json
CHANGED
|
@@ -217,8 +217,8 @@ Shape (field names stable for internal audit helpers and Stop-hook leak detectio
|
|
|
217
217
|
"comparison_basis": "...",
|
|
218
218
|
"completion_boundary": "..."
|
|
219
219
|
},
|
|
220
|
-
"required_sections": ["role", "
|
|
221
|
-
"base_prompt_xml": "<role>...</role><
|
|
220
|
+
"required_sections": ["role", "background", "instructions", "constraints", "output_format", "illustrations"],
|
|
221
|
+
"base_prompt_xml": "<role>...</role><background>...</background><instructions>...</instructions><constraints>...</constraints><illustrations>...</illustrations><output_format>...</output_format>",
|
|
222
222
|
"section_scope_rule": "Each refiner edits exactly one section and returns sibling sections unchanged.",
|
|
223
223
|
"section_output_contract": {
|
|
224
224
|
"required_fields": ["improved_block", "rationale", "concise_diff"]
|
|
@@ -28,11 +28,11 @@ Use this command:
|
|
|
28
28
|
- `completion_boundary`
|
|
29
29
|
- XML scaffold includes all sections — verified by the Stop hook at runtime; each required section tag must have both an opening and a closing tag:
|
|
30
30
|
- `<role>`
|
|
31
|
-
- `<
|
|
31
|
+
- `<background>`
|
|
32
32
|
- `<instructions>`
|
|
33
33
|
- `<constraints>`
|
|
34
34
|
- `<output_format>`
|
|
35
|
-
- `<
|
|
35
|
+
- `<illustrations>`
|
|
36
36
|
- Includes internal refinement object with:
|
|
37
37
|
- `pipeline_mode: internal_section_refinement_with_final_audit`
|
|
38
38
|
- `required_sections` list with all six sections
|
|
@@ -129,7 +129,7 @@ If `overall_status` is `fail`:
|
|
|
129
129
|
Validate fail-closed runtime gates:
|
|
130
130
|
|
|
131
131
|
1. **Stop leakage/scope/checklist gate**
|
|
132
|
-
- **Section-presence gate (Stop)** — Block responses where the fenced XML artifact is missing any of the five required section tag pairs: `role`, `
|
|
132
|
+
- **Section-presence gate (Stop)** — Block responses where the fenced XML artifact is missing any of the five required section tag pairs: `role`, `background`, `instructions`, `constraints`, `output_format`.
|
|
133
133
|
- Block responses that leak raw internal refinement object fields unless debug intent is explicit.
|
|
134
134
|
- Block responses missing deterministic checklist rows when audit output is present.
|
|
135
135
|
- Block responses using ambiguous scope phrasing in scope-bound sections.
|
|
@@ -149,7 +149,7 @@ Validate fail-closed runtime gates:
|
|
|
149
149
|
- Missing required scope anchors (when Stop guard applies)
|
|
150
150
|
- Raw internal object leakage without debug intent
|
|
151
151
|
- Missing required checklist rows in audit output
|
|
152
|
-
- Missing required XML sections (`role`, `
|
|
152
|
+
- Missing required XML sections (`role`, `background`, `instructions`, `constraints`, `output_format`) in the fenced artifact (opening and closing tags)
|
|
153
153
|
- Ambiguous scope terms in scope-bound text
|
|
154
154
|
- Negative keywords inside fenced XML artifacts
|
|
155
155
|
- Hedging language inside fenced XML artifacts
|