claude-dev-env 1.16.0 → 1.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,3 +54,9 @@ These two signals are runtime-checked by the Stop guard whenever a prompt-workfl
54
54
  ## Deterministic Boundary
55
55
 
56
56
  These hooks enforce only structural/runtime checks. Semantic quality remains in auditor layer.
57
+
58
+ ## Reviewing Flattened Transcript Exports
59
+
60
+ - Live prompt-workflow responses still require an explicit `Audit:` line plus one outer `xml` fence. The Stop guard and clipboard path continue to evaluate that literal boundary.
61
+ - Saved transcript exports can flatten blocked retry turns and omit the outer fence lines. Normalize those files with `prompt_workflow_gate_core.normalize_prompt_workflow_export(...)`, then evaluate the rebuilt message with `extract_fenced_xml_content(...)` or `extract_fenced_xml_content_from_export(...)`.
62
+ - Fence-relative evals review the **last successful Audit + artifact pair** after normalization. Earlier blocked retries in the flattened transcript remain diagnostic evidence and do not count as extra delivered artifacts.
@@ -4,6 +4,7 @@
4
4
  from __future__ import annotations
5
5
 
6
6
  import re
7
+ import textwrap
7
8
  from typing import Iterable
8
9
 
9
10
  from prompt_workflow_gate_config import (
@@ -18,25 +19,57 @@ from prompt_workflow_gate_config import (
18
19
  REQUIRED_XML_SECTIONS,
19
20
  )
20
21
 
22
+ TRIPLE_BACKTICK = "```"
23
+ AUDIT_LINE_PATTERN = re.compile(r"^\s*[●•]?\s*(Audit:\s*.+?)\s*$")
21
24
 
22
25
  def _line_opens_xml_fence(line: str) -> bool:
23
26
  stripped = line.strip()
24
- if not stripped.startswith("```"):
27
+ if not stripped.startswith(TRIPLE_BACKTICK):
25
28
  return False
26
- remainder = stripped[3:].strip()
29
+ fence_marker_length = len(TRIPLE_BACKTICK)
30
+ remainder = stripped[fence_marker_length:].strip()
27
31
  return remainder == "xml" or remainder.startswith("xml ")
28
32
 
29
-
30
33
  def _line_is_bare_fence_close(line: str) -> bool:
31
- return line.strip() == "```"
32
-
34
+ return line.strip() == TRIPLE_BACKTICK
33
35
 
34
36
  def _line_opens_inner_markdown_fence(line: str) -> bool:
35
37
  stripped = line.strip()
36
- if not stripped.startswith("```"):
38
+ if not stripped.startswith(TRIPLE_BACKTICK):
37
39
  return False
38
- return stripped != "```"
39
-
40
+ return stripped != TRIPLE_BACKTICK
41
+
42
+ def _collect_inner_markdown_fence(
43
+ lines: list[str],
44
+ start_index: int,
45
+ ) -> tuple[list[str], int]:
46
+ inner_lines: list[str] = []
47
+ index = start_index
48
+ while index < len(lines):
49
+ current_line = lines[index]
50
+ inner_lines.append(current_line)
51
+ index += 1
52
+ if _line_is_bare_fence_close(current_line):
53
+ break
54
+ return inner_lines, index
55
+
56
+ def _collect_xml_fence_body(
57
+ lines: list[str],
58
+ start_index: int,
59
+ ) -> tuple[list[str], int]:
60
+ body_lines: list[str] = []
61
+ index = start_index
62
+ while index < len(lines):
63
+ current_line = lines[index]
64
+ if _line_is_bare_fence_close(current_line):
65
+ return body_lines, index + 1
66
+ if _line_opens_inner_markdown_fence(current_line):
67
+ inner_lines, index = _collect_inner_markdown_fence(lines, index)
68
+ body_lines.extend(inner_lines)
69
+ continue
70
+ body_lines.append(current_line)
71
+ index += 1
72
+ return body_lines, index
40
73
 
41
74
  def extract_fenced_xml_content(text: str) -> str:
42
75
  """Extract bodies of ```xml fenced blocks.
@@ -50,31 +83,104 @@ def extract_fenced_xml_content(text: str) -> str:
50
83
  lines = text.splitlines()
51
84
  index = 0
52
85
  while index < len(lines):
53
- if _line_opens_xml_fence(lines[index]):
86
+ if not _line_opens_xml_fence(lines[index]):
54
87
  index += 1
55
- body_lines: list[str] = []
56
- while index < len(lines):
57
- line = lines[index]
58
- if _line_is_bare_fence_close(line):
59
- index += 1
60
- break
61
- if _line_opens_inner_markdown_fence(line):
62
- body_lines.append(line)
63
- index += 1
64
- while index < len(lines):
65
- inner_line = lines[index]
66
- body_lines.append(inner_line)
67
- index += 1
68
- if _line_is_bare_fence_close(inner_line):
69
- break
70
- continue
71
- body_lines.append(line)
72
- index += 1
73
- results.append("\n".join(body_lines))
74
88
  continue
75
- index += 1
89
+ body_lines, index = _collect_xml_fence_body(lines, index + 1)
90
+ results.append("\n".join(body_lines))
76
91
  return "\n".join(results)
77
92
 
93
+ def _line_is_audit_line(line: str) -> bool:
94
+ return AUDIT_LINE_PATTERN.match(line) is not None
95
+
96
+ def _normalize_audit_line(line: str) -> str:
97
+ match = AUDIT_LINE_PATTERN.match(line)
98
+ if match:
99
+ return match.group(1).strip()
100
+ return line.strip()
101
+
102
+ def _line_starts_exported_artifact(line: str) -> bool:
103
+ stripped = line.strip()
104
+ if not stripped:
105
+ return False
106
+ if _line_opens_xml_fence(stripped):
107
+ return True
108
+ exported_artifact_pattern = re.compile(
109
+ r"^<(\?xml\b|prompt\b|runtime_context\b|role\b|background\b|instructions\b|constraints\b|output_format\b|illustrations\b|open_question\b)",
110
+ )
111
+ return exported_artifact_pattern.match(stripped) is not None
112
+
113
+ def _trim_trailing_blank_lines(lines: list[str]) -> list[str]:
114
+ trimmed = list(lines)
115
+ while trimmed and not trimmed[-1].strip():
116
+ trimmed.pop()
117
+ return trimmed
118
+
119
+ def _trim_flattened_export_tail(lines: list[str]) -> list[str]:
120
+ trimmed = _trim_trailing_blank_lines(lines)
121
+ while trimmed and trimmed[-1].lstrip().startswith("✻ "):
122
+ trimmed.pop()
123
+ trimmed = _trim_trailing_blank_lines(trimmed)
124
+ return trimmed
125
+
126
+ def _find_last_audit_index(lines: list[str]) -> int | None:
127
+ last_audit_index: int | None = None
128
+ for index, line in enumerate(lines):
129
+ if _line_is_audit_line(line):
130
+ last_audit_index = index
131
+ return last_audit_index
132
+
133
+ def _find_first_artifact_index(lines: list[str]) -> int | None:
134
+ for index, line in enumerate(lines):
135
+ if _line_starts_exported_artifact(line):
136
+ return index
137
+ return None
138
+
139
+ def _rebuild_from_existing_fence(audit_line: str, artifact_text: str) -> str:
140
+ fenced_body = extract_fenced_xml_content(artifact_text).strip()
141
+ if not fenced_body:
142
+ return audit_line
143
+ return f"{audit_line}\n```xml\n{fenced_body}\n```"
144
+
145
+ def _rebuild_from_flattened_body(audit_line: str, artifact_text: str) -> str:
146
+ dedented_body = textwrap.dedent(artifact_text).strip("\n")
147
+ if not dedented_body:
148
+ return audit_line
149
+ return f"{audit_line}\n```xml\n{dedented_body}\n```"
150
+
151
+ def _rebuild_canonical_export(audit_line: str, artifact_lines: list[str]) -> str:
152
+ if not artifact_lines:
153
+ return audit_line
154
+ artifact_text = "\n".join(artifact_lines).rstrip()
155
+ if _line_opens_xml_fence(artifact_lines[0]):
156
+ return _rebuild_from_existing_fence(audit_line, artifact_text)
157
+ return _rebuild_from_flattened_body(audit_line, artifact_text)
158
+
159
+ def normalize_prompt_workflow_export(text: str) -> str:
160
+ """Return the last successful Audit + fenced XML pair from a message or export.
161
+
162
+ Saved transcript exports can flatten blocked retry turns and strip the outer
163
+ ``xml`` fence. This helper keeps only the last successful ``Audit:`` attempt
164
+ and rebuilds the canonical audit-plus-fence shape used by prompt-workflow
165
+ hooks and reviewers.
166
+ """
167
+ lines = text.splitlines()
168
+ last_audit_index = _find_last_audit_index(lines)
169
+ if last_audit_index is None:
170
+ return text.strip()
171
+ audit_line = _normalize_audit_line(lines[last_audit_index])
172
+ artifact_index = _find_first_artifact_index(lines[last_audit_index + 1 :])
173
+ if artifact_index is None:
174
+ return audit_line
175
+ artifact_lines = _trim_flattened_export_tail(
176
+ lines[last_audit_index + 1 + artifact_index :],
177
+ )
178
+ return _rebuild_canonical_export(audit_line, artifact_lines)
179
+
180
+ def extract_fenced_xml_content_from_export(text: str) -> str:
181
+ """Extract fenced XML from a canonical message or flattened transcript export."""
182
+ normalized = normalize_prompt_workflow_export(text)
183
+ return extract_fenced_xml_content(normalized)
78
184
 
79
185
  def missing_required_xml_sections(text: str) -> list[str]:
80
186
  fenced_body = extract_fenced_xml_content(text)
@@ -88,6 +194,30 @@ def missing_required_xml_sections(text: str) -> list[str]:
88
194
  missing_sections.append(section_name)
89
195
  return missing_sections
90
196
 
197
+ def _build_negative_keyword_violation(
198
+ match: re.Match[str],
199
+ line_number: int,
200
+ line_text: str,
201
+ ) -> dict[str, str | int]:
202
+ return {
203
+ "keyword": match.group(),
204
+ "line_number": line_number,
205
+ "line_text": line_text.strip(),
206
+ }
207
+
208
+ def _find_pattern_violations(
209
+ patterns: Iterable[re.Pattern[str]],
210
+ line_text: str,
211
+ line_number: int,
212
+ ) -> list[dict[str, str | int]]:
213
+ violations: list[dict[str, str | int]] = []
214
+ for pattern in patterns:
215
+ match = pattern.search(line_text)
216
+ if match:
217
+ violations.append(
218
+ _build_negative_keyword_violation(match, line_number, line_text),
219
+ )
220
+ return violations
91
221
 
92
222
  def find_negative_keywords_in_fenced_xml(
93
223
  text: str,
@@ -95,45 +225,37 @@ def find_negative_keywords_in_fenced_xml(
95
225
  fenced_content = extract_fenced_xml_content(text)
96
226
  if not fenced_content:
97
227
  return []
98
- fenced_lines = fenced_content.splitlines()
99
228
  all_violations: list[dict[str, str | int]] = []
100
- for line_index, each_line in enumerate(fenced_lines):
101
- for each_pattern in COMPILED_NEGATIVE_KEYWORD_PATTERNS:
102
- each_match = each_pattern.search(each_line)
103
- if each_match:
104
- all_violations.append({
105
- "keyword": each_match.group(),
106
- "line_number": line_index + 1,
107
- "line_text": each_line.strip(),
108
- })
109
- for each_pattern in COMPILED_NEGATIVE_INDIRECT_PATTERNS:
110
- each_match = each_pattern.search(each_line)
111
- if each_match:
112
- all_violations.append({
113
- "keyword": each_match.group(),
114
- "line_number": line_index + 1,
115
- "line_text": each_line.strip(),
116
- })
229
+ for line_index, each_line in enumerate(fenced_content.splitlines(), start=1):
230
+ all_violations.extend(
231
+ _find_pattern_violations(
232
+ COMPILED_NEGATIVE_KEYWORD_PATTERNS,
233
+ each_line,
234
+ line_index,
235
+ ),
236
+ )
237
+ all_violations.extend(
238
+ _find_pattern_violations(
239
+ COMPILED_NEGATIVE_INDIRECT_PATTERNS,
240
+ each_line,
241
+ line_index,
242
+ ),
243
+ )
117
244
  return all_violations
118
245
 
119
-
120
246
  def _contains_any_marker(text: str, markers: Iterable[str]) -> bool:
121
247
  lower_text = text.lower()
122
248
  return any(marker.lower() in lower_text for marker in markers)
123
249
 
124
-
125
250
  def has_debug_intent(text: str) -> bool:
126
251
  return _contains_any_marker(text, DEBUG_INTENT_MARKERS)
127
252
 
128
-
129
253
  def has_internal_object_leak(text: str) -> bool:
130
254
  return _contains_any_marker(text, INTERNAL_OBJECT_MARKERS)
131
255
 
132
-
133
256
  def missing_scope_anchors(text: str) -> list[str]:
134
257
  return [anchor for anchor in REQUIRED_SCOPE_ANCHORS if anchor not in text]
135
258
 
136
-
137
259
  def find_ambiguous_scope_terms(text: str) -> list[str]:
138
260
  if "scope" not in text.lower():
139
261
  return []
@@ -144,16 +266,13 @@ def find_ambiguous_scope_terms(text: str) -> list[str]:
144
266
  matches.append(term)
145
267
  return matches
146
268
 
147
-
148
269
  def has_checklist_container(text: str) -> bool:
149
270
  lower_text = text.lower()
150
271
  return "checklist_results" in lower_text or "checklist:" in lower_text
151
272
 
152
-
153
273
  def missing_checklist_rows(text: str) -> list[str]:
154
274
  return [row for row in REQUIRED_CHECKLIST_ROWS if row not in text]
155
275
 
156
-
157
276
  def is_prompt_workflow_response(text: str) -> bool:
158
277
  lower_text = text.lower()
159
278
  matched_markers = [
@@ -161,7 +280,6 @@ def is_prompt_workflow_response(text: str) -> bool:
161
280
  ]
162
281
  return len(matched_markers) >= 2
163
282
 
164
-
165
283
  def missing_context_control_signals(text: str) -> list[str]:
166
284
  required_signals: tuple[str, ...] = (
167
285
  "base_minimal_instruction_layer: true",
@@ -2,6 +2,7 @@
2
2
 
3
3
  from prompt_workflow_gate_core import (
4
4
  extract_fenced_xml_content,
5
+ extract_fenced_xml_content_from_export,
5
6
  find_ambiguous_scope_terms,
6
7
  has_checklist_container,
7
8
  has_internal_object_leak,
@@ -10,31 +11,27 @@ from prompt_workflow_gate_core import (
10
11
  missing_checklist_rows,
11
12
  missing_required_xml_sections,
12
13
  missing_scope_anchors,
14
+ normalize_prompt_workflow_export,
13
15
  )
14
16
 
15
-
16
17
  def test_internal_object_leak_detected() -> None:
17
18
  text = '{"pipeline_mode": "internal_section_refinement_with_final_audit"}'
18
19
  assert has_internal_object_leak(text)
19
20
 
20
-
21
21
  def test_missing_scope_anchors_returns_expected_rows() -> None:
22
22
  text = "target_local_roots only."
23
23
  missing = missing_scope_anchors(text)
24
24
  assert "target_canonical_roots" in missing
25
25
  assert "completion_boundary" in missing
26
26
 
27
-
28
27
  def test_missing_checklist_rows_detected() -> None:
29
28
  text = "checklist_results: structured_scoped_instructions only"
30
29
  missing = missing_checklist_rows(text)
31
30
  assert "completion_boundary_measurable" in missing
32
31
 
33
-
34
32
  def test_checklist_container_detection() -> None:
35
33
  assert has_checklist_container("checklist_results:\n- structured_scoped_instructions")
36
34
 
37
-
38
35
  def test_prompt_workflow_response_detection() -> None:
39
36
  message = (
40
37
  "overall_status: pass\n"
@@ -43,22 +40,36 @@ def test_prompt_workflow_response_detection() -> None:
43
40
  )
44
41
  assert is_prompt_workflow_response(message)
45
42
 
46
-
47
43
  def test_missing_context_control_signals_detected() -> None:
48
44
  missing = missing_context_control_signals("base_minimal_instruction_layer: true")
49
45
  assert "on_demand_skill_loading: true" in missing
50
46
 
51
-
52
47
  def test_ambiguous_scope_terms_detected() -> None:
53
48
  text = "Scope applies to this session and current files."
54
49
  terms = find_ambiguous_scope_terms(text)
55
50
  assert "this session" in terms
56
51
  assert "current files" in terms
57
52
 
58
-
59
53
  def _fenced_xml(body: str) -> str:
60
54
  return f"```xml\n{body}\n```"
61
55
 
56
+ def _runtime_context_lines() -> tuple[str, ...]:
57
+ return (
58
+ "<runtime_context>",
59
+ "base_minimal_instruction_layer: true",
60
+ "on_demand_skill_loading: true",
61
+ "</runtime_context>",
62
+ "",
63
+ )
64
+
65
+ def _flattened_transcript(*lines: str) -> str:
66
+ return "\n".join(lines) + "\n"
67
+
68
+ def _flattened_attempt(*body_lines: str, audit_line: str = "Audit: pass 15/15") -> str:
69
+ flattened_lines = [audit_line, ""]
70
+ for line in body_lines:
71
+ flattened_lines.append(f" {line}" if line else "")
72
+ return "\n".join(flattened_lines)
62
73
 
63
74
  def test_missing_required_xml_sections_all_present_returns_empty() -> None:
64
75
  body = (
@@ -70,7 +81,6 @@ def test_missing_required_xml_sections_all_present_returns_empty() -> None:
70
81
  )
71
82
  assert missing_required_xml_sections(_fenced_xml(body)) == []
72
83
 
73
-
74
84
  def test_missing_required_xml_sections_missing_background() -> None:
75
85
  body = (
76
86
  "<role>R.</role>\n"
@@ -80,7 +90,6 @@ def test_missing_required_xml_sections_missing_background() -> None:
80
90
  )
81
91
  assert missing_required_xml_sections(_fenced_xml(body)) == ["background"]
82
92
 
83
-
84
93
  def test_missing_required_xml_sections_missing_role_and_output_format() -> None:
85
94
  body = (
86
95
  "<background>C.</background>\n"
@@ -90,11 +99,9 @@ def test_missing_required_xml_sections_missing_role_and_output_format() -> None:
90
99
  missing = missing_required_xml_sections(_fenced_xml(body))
91
100
  assert missing == ["role", "output_format"]
92
101
 
93
-
94
102
  def test_missing_required_xml_sections_no_fence_returns_empty() -> None:
95
103
  assert missing_required_xml_sections("no fenced xml here") == []
96
104
 
97
-
98
105
  def test_missing_required_xml_sections_prose_without_tags_counts_as_missing() -> None:
99
106
  body = (
100
107
  "<role>R.</role>\n"
@@ -105,7 +112,6 @@ def test_missing_required_xml_sections_prose_without_tags_counts_as_missing() ->
105
112
  )
106
113
  assert missing_required_xml_sections(_fenced_xml(body)) == ["background"]
107
114
 
108
-
109
115
  def test_extract_fenced_xml_preserves_content_after_nested_inner_fence() -> None:
110
116
  message = (
111
117
  "```xml\n"
@@ -122,3 +128,68 @@ def test_extract_fenced_xml_preserves_content_after_nested_inner_fence() -> None
122
128
  extracted = extract_fenced_xml_content(message)
123
129
  assert "</illustrations>" in extracted
124
130
  assert "<background>B</background>" in extracted
131
+
132
+ def test_normalize_prompt_workflow_export_rebuilds_fence_from_flattened_transcript() -> None:
133
+ transcript = _flattened_transcript(
134
+ _flattened_attempt(
135
+ *_runtime_context_lines(),
136
+ "<role>R</role>",
137
+ "<background>B</background>",
138
+ "<instructions>I</instructions>",
139
+ "<constraints>C</constraints>",
140
+ "<output_format>O</output_format>",
141
+ "✻ Worked for 1m 7s",
142
+ audit_line="● Audit: pass 15/15",
143
+ ),
144
+ )
145
+ normalized = normalize_prompt_workflow_export(transcript)
146
+ assert normalized.startswith("Audit: pass 15/15\n```xml\n")
147
+ assert normalized.endswith("\n```")
148
+ assert "<runtime_context>" in normalized
149
+ assert "✻ Worked for 1m 7s" not in normalized
150
+
151
+ def test_normalize_prompt_workflow_export_uses_last_audit_attempt() -> None:
152
+ first_attempt = _flattened_attempt(
153
+ "<role>FIRST</role>",
154
+ "<background>Old</background>",
155
+ "<instructions>Old</instructions>",
156
+ "<constraints>Old</constraints>",
157
+ "<output_format>Old</output_format>",
158
+ audit_line="● Audit: pass 15/15",
159
+ )
160
+ second_attempt = _flattened_attempt(
161
+ *_runtime_context_lines(),
162
+ "<role>FINAL</role>",
163
+ "<background>Fresh</background>",
164
+ "<instructions>I</instructions>",
165
+ "<constraints>C</constraints>",
166
+ "<output_format>O</output_format>",
167
+ "✻ Worked for 2m 8s",
168
+ )
169
+ transcript = _flattened_transcript(
170
+ first_attempt,
171
+ "",
172
+ "● Re-emitting the full artifact with the runtime signals added.",
173
+ "",
174
+ second_attempt,
175
+ )
176
+ normalized = normalize_prompt_workflow_export(transcript)
177
+ assert "<role>FINAL</role>" in normalized
178
+ assert "<role>FIRST</role>" not in normalized
179
+
180
+ def test_extract_fenced_xml_content_from_export_supports_flattened_transcript() -> None:
181
+ transcript = _flattened_transcript(
182
+ _flattened_attempt(
183
+ "<role>R</role>",
184
+ "<background>B</background>",
185
+ "<instructions>I</instructions>",
186
+ "<constraints>C</constraints>",
187
+ "<output_format>O</output_format>",
188
+ "✻ Worked for 31s",
189
+ audit_line="● Audit: pass 15/15",
190
+ ),
191
+ )
192
+ extracted = extract_fenced_xml_content_from_export(transcript)
193
+ assert extracted.startswith("<role>R</role>")
194
+ assert "<output_format>O</output_format>" in extracted
195
+ assert "Worked for" not in extracted
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-dev-env",
3
- "version": "1.16.0",
3
+ "version": "1.17.0",
4
4
  "description": "Claude Code development standards — rules, hooks, agents, commands, and skills",
5
5
  "type": "module",
6
6
  "bin": {
@@ -23,6 +23,8 @@ description: >-
23
23
 
24
24
  **Eval contract:** The user-visible behavior this skill must satisfy is defined in `packages/claude-dev-env/skills/prompt-generator/TARGET_OUTPUT.md`. Automated evals live in `packages/claude-dev-env/skills/prompt-generator/evals/prompt-generator.json`.
25
25
 
26
+ **Templates:** Under `packages/claude-dev-env/skills/prompt-generator/templates/`, `skill-from-ground-up.md` is the collaborative prompt for **net-new** checkpointed Agent Skill packages; `skill-refinement-package.md` is the sibling prompt for **existing-skill** multi-file refinements and package-aware polish. Skill-builder and skill-writer in this repo require implementers to use the matching template before checkpointed package work.
27
+
26
28
  **Terminology:** **Prompt artifact** — the full XML inside the single user-facing `xml` fence (the paste-ready handoff). **Scope block** — the five-key contract in §3A that grounds instructions. **Default refinement pipeline** — §10: base draft → section refine → merge → 15-row compliance audit → capped fixes (subagent-internal unless draft-only). **Light self-check** — §8: fast pre-return sanity pass (shape, tools, scope, patterns); *not* the compliance audit. **Compliance audit (15-row)** — §11: hook-keyed rows that set the `Audit: pass|fail` numerator. **Execution handoff** — `/agent-prompt` after explicit user intent to run work.
27
29
 
28
30
  **Hook-survival invariant (read first):** The fenced XML artifact is the primary deliverable and MUST survive Stop-hook retries. If a Stop hook rejects the response, only the surrounding audit summary and runtime signal scaffolding may change between retries—the XML inside the fence MUST be re-emitted in full on every retry. Recovery pattern: re-emit the complete fenced XML first, then adjust the audit line. Trimming, summarizing, or deferring the prompt artifact to satisfy a hook gate is forbidden.
@@ -72,10 +72,10 @@
72
72
  "prompt": "/prompt-generator Create a prompt for an agent that traces a routing bug across shared_utils/export_handler.py, orchestrator.py, and download_manager.py — find where extract_apk is called and whether it handles APK signature check failures",
73
73
  "files": ["packages/samsung-automation/shared_utils/export_handler.py"],
74
74
  "expected_behavior": [
75
- "No tool_use blocks appear after the first fence marker of the prompt artifact",
75
+ "No tool_use blocks appear after the first fence marker of the canonical prompt artifact",
76
76
  "All Glob/Grep discovery calls precede the AskUserQuestion",
77
77
  "All AskUserQuestion interactions precede the fenced block",
78
- "Prompt artifact emits in a single uninterrupted response"
78
+ "Review the last successful Audit + fenced xml pair; blocked retry attempts preserved by flattened transcript exports do not count as additional delivered artifacts"
79
79
  ]
80
80
  },
81
81
  {
@@ -85,7 +85,7 @@
85
85
  "prompt": "/prompt-generator Write a detailed agent-harness prompt for a TDD bug-fix workflow that traces a routing error across 5+ files, with state management for multi-window execution and structured test tracking",
86
86
  "files": [],
87
87
  "expected_behavior": [
88
- "Opening fence has a matching closing fence",
88
+ "The canonical prompt artifact has one opening xml fence and one matching closing fence; flattened transcript exports are normalized to that same boundary before review",
89
89
  "Every XML tag properly opened and closed",
90
90
  "No truncation at numbered-list bullets (the Issue #41 failure mode)",
91
91
  "No mid-sentence cuts or incomplete sections",
@@ -102,8 +102,8 @@
102
102
  "Discovery tool calls attempt to locate scoring logic before prompt generation",
103
103
  "If resolved: prompt references concrete file paths from discovery",
104
104
  "If unresolved: prompt contains <open_question> in <background> for downstream agent",
105
- "No re-entry to discovery after fenced block starts",
106
- "AskUserQuestion may surface the uncertainty if discovery was inconclusive"
105
+ "No re-entry to discovery after the canonical artifact fence starts",
106
+ "AskUserQuestion may surface the uncertainty if discovery was inconclusive; when discovery resolves concrete paths before the artifact, absence of <open_question> is expected"
107
107
  ]
108
108
  },
109
109
  {