codd-dev 0.2.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codd/generator.py ADDED
@@ -0,0 +1,644 @@
1
+ """CoDD template generator driven by wave_config."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from copy import deepcopy
6
+ from dataclasses import dataclass
7
+ from pathlib import Path, PurePosixPath
8
+ import re
9
+ import shlex
10
+ import subprocess
11
+ from typing import Any
12
+
13
+ import yaml
14
+
15
+ from codd.config import load_project_config
16
+
17
+
18
+ DEFAULT_AI_COMMAND = "claude --print"
19
+ DEFAULT_RELATION = "depends_on"
20
+ DEFAULT_SEMANTIC = "governance"
21
+ DOC_TYPE_BY_DIR = {
22
+ "requirements": "requirement",
23
+ "design": "design",
24
+ "detailed_design": "design",
25
+ "plan": "plan",
26
+ "governance": "governance",
27
+ "test": "test",
28
+ "operations": "operations",
29
+ }
30
+ TYPE_SECTIONS = {
31
+ "requirement": ["Overview", "Scope", "Open Questions"],
32
+ "design": ["Overview", "Architecture", "Open Questions"],
33
+ "plan": ["Overview", "Milestones", "Risks"],
34
+ "governance": ["Overview", "Decision Log", "Follow-ups"],
35
+ "test": ["Overview", "Acceptance Criteria", "Failure Criteria"],
36
+ "operations": ["Overview", "Runbook", "Monitoring"],
37
+ "document": ["Overview", "Details", "Open Questions"],
38
+ }
39
+ DETAILED_DESIGN_SECTIONS = [
40
+ "Overview",
41
+ "Mermaid Diagrams",
42
+ "Ownership Boundaries",
43
+ "Implementation Implications",
44
+ "Open Questions",
45
+ ]
46
+ MARKDOWN_FENCE_RE = re.compile(r"^\s*```(?:markdown|md)?\s*\n(?P<body>.*)\n```\s*$", re.IGNORECASE | re.DOTALL)
47
+ FENCE_LINE_RE = re.compile(r"^\s*```(?:[a-zA-Z0-9_-]+)?\s*$")
48
+ TITLE_HEADING_RE = re.compile(r"^\s*#\s+(?P<title>.+?)\s*$")
49
+ SECTION_HEADING_RE = re.compile(r"^##\s+.+$", re.MULTILINE)
50
+ MERMAID_FENCE_RE = re.compile(r"```mermaid\b", re.IGNORECASE)
51
+ META_PREAMBLE_PATTERNS = (
52
+ re.compile(r"^\s*the\s+docs?(?:/[a-z0-9._-]+)*\s+directory\b.*$", re.IGNORECASE),
53
+ re.compile(r"^\s*the\s+dependency\s+documents\s+provided\s+inline\b.*$", re.IGNORECASE),
54
+ re.compile(r"^\s*the\s+existing\s+(?:file|document|content)\b.*$", re.IGNORECASE),
55
+ re.compile(r"^\s*now\s+i\s+have\s+enough\s+context\b.*$", re.IGNORECASE),
56
+ re.compile(r"^\s*no\s+existing\s+file\s+found\b.*$", re.IGNORECASE),
57
+ re.compile(r"^\s*since the user\b.*$", re.IGNORECASE),
58
+ re.compile(r"^\s*i\s+need\s+to\s+write\s+just\s+the\s+document\s+body\b.*$", re.IGNORECASE),
59
+ re.compile(
60
+ r"^\s*.*\b(?:i(?:'|’)ll\s+(?:now\s+)?(?:output|write|create)|let me(?:\s+now)?\s+write)\b.*$",
61
+ re.IGNORECASE,
62
+ ),
63
+ re.compile(r"^\s*let me(?:\s+(?:review|verify|check|compare))\b.*$", re.IGNORECASE),
64
+ re.compile(
65
+ r"^\s*(?:here is|here(?:'|’)s)\b.*\b(?:document|markdown|body|content)\b.*$",
66
+ re.IGNORECASE,
67
+ ),
68
+ re.compile(r"^\s*[-*]\s+.+→\s+covered\b.*$", re.IGNORECASE),
69
+ re.compile(r"^\s*`[^`]+`\s+を(?:作成|生成)しました。?\s*$"),
70
+ re.compile(r"^\s*(?:主要|主な)な?構成[::]\s*$"),
71
+ re.compile(r"^\s*(?:以下|上記)の(?:内容|構成|設計)で(?:作成|生成)しました。?\s*$"),
72
+ )
73
+
74
+
75
+ @dataclass(frozen=True)
76
+ class WaveArtifact:
77
+ """Normalized wave_config entry."""
78
+
79
+ wave: int
80
+ node_id: str
81
+ output: str
82
+ title: str
83
+ depends_on: list[dict[str, Any]]
84
+ conventions: list[dict[str, Any]]
85
+
86
+
87
+ @dataclass(frozen=True)
88
+ class GenerationResult:
89
+ """Result of rendering one artifact."""
90
+
91
+ node_id: str
92
+ path: Path
93
+ status: str
94
+
95
+
96
+ @dataclass(frozen=True)
97
+ class DependencyDocument:
98
+ """Resolved dependency document used as AI context."""
99
+
100
+ node_id: str
101
+ path: Path
102
+ content: str
103
+
104
+
105
+ def generate_wave(
106
+ project_root: Path,
107
+ wave: int,
108
+ force: bool = False,
109
+ ai_command: str | None = None,
110
+ ) -> list[GenerationResult]:
111
+ """Generate or skip all documents configured for a wave."""
112
+ from codd.scanner import build_document_node_path_map
113
+
114
+ config = _load_project_config(project_root)
115
+ artifacts = _load_wave_artifacts(config)
116
+ selected = [artifact for artifact in artifacts if artifact.wave == wave]
117
+ if not selected:
118
+ raise ValueError(f"wave_config has no entries for wave {wave}")
119
+
120
+ resolved_ai_command = _resolve_ai_command(config, ai_command)
121
+ global_conventions = _normalize_conventions(config.get("conventions", []))
122
+ depended_by_map = _build_depended_by_map(artifacts)
123
+ document_node_paths = build_document_node_path_map(project_root, config)
124
+
125
+ results: list[GenerationResult] = []
126
+ for artifact in selected:
127
+ output_path = project_root / artifact.output
128
+ if output_path.exists() and not force:
129
+ results.append(GenerationResult(node_id=artifact.node_id, path=output_path, status="skipped"))
130
+ continue
131
+
132
+ dependency_documents = _load_dependency_documents(project_root, artifact.depends_on, document_node_paths)
133
+ output_path.parent.mkdir(parents=True, exist_ok=True)
134
+ combined_conventions = deepcopy(global_conventions) + deepcopy(artifact.conventions)
135
+ content = _render_document(
136
+ artifact=artifact,
137
+ global_conventions=global_conventions,
138
+ depended_by=depended_by_map.get(artifact.node_id, []),
139
+ body=_generate_document_body(
140
+ artifact=artifact,
141
+ dependency_documents=dependency_documents,
142
+ conventions=combined_conventions,
143
+ ai_command=resolved_ai_command,
144
+ ),
145
+ )
146
+ output_path.write_text(content, encoding="utf-8")
147
+ results.append(GenerationResult(node_id=artifact.node_id, path=output_path, status="generated"))
148
+
149
+ return results
150
+
151
+
152
+ def _load_project_config(project_root: Path) -> dict[str, Any]:
153
+ return load_project_config(project_root)
154
+
155
+
156
+ def _load_wave_artifacts(config: dict[str, Any]) -> list[WaveArtifact]:
157
+ wave_config = config.get("wave_config")
158
+ if not isinstance(wave_config, dict) or not wave_config:
159
+ raise ValueError("codd.yaml is missing wave_config")
160
+
161
+ artifacts: list[WaveArtifact] = []
162
+ for wave_key, entries in wave_config.items():
163
+ try:
164
+ wave = int(wave_key)
165
+ except (TypeError, ValueError) as exc:
166
+ raise ValueError(f"wave_config key must be an integer wave number, got {wave_key!r}") from exc
167
+
168
+ if not isinstance(entries, list):
169
+ raise ValueError(f"wave_config[{wave_key!r}] must be a list of artifacts")
170
+
171
+ for entry in entries:
172
+ if not isinstance(entry, dict):
173
+ raise ValueError(f"wave_config[{wave_key!r}] entries must be mappings")
174
+
175
+ missing = [field for field in ("node_id", "output", "title") if not entry.get(field)]
176
+ if missing:
177
+ raise ValueError(
178
+ f"wave_config[{wave_key!r}] entry is missing required fields: {', '.join(missing)}"
179
+ )
180
+
181
+ artifacts.append(
182
+ WaveArtifact(
183
+ wave=wave,
184
+ node_id=str(entry["node_id"]),
185
+ output=str(entry["output"]),
186
+ title=str(entry["title"]),
187
+ depends_on=_normalize_dependencies(entry.get("depends_on", [])),
188
+ conventions=_normalize_conventions(entry.get("conventions", [])),
189
+ )
190
+ )
191
+
192
+ return artifacts
193
+
194
+
195
+ def _normalize_dependencies(entries: Any) -> list[dict[str, Any]]:
196
+ if not entries:
197
+ return []
198
+ if not isinstance(entries, list):
199
+ raise ValueError("depends_on must be a list")
200
+
201
+ normalized: list[dict[str, Any]] = []
202
+ for entry in entries:
203
+ if isinstance(entry, str):
204
+ data: dict[str, Any] = {"id": entry}
205
+ elif isinstance(entry, dict):
206
+ data = deepcopy(entry)
207
+ else:
208
+ raise ValueError(f"depends_on entries must be strings or mappings, got {type(entry).__name__}")
209
+
210
+ node_id = data.get("id") or data.get("node_id")
211
+ if not isinstance(node_id, str) or not node_id:
212
+ raise ValueError("depends_on entries require a non-empty id")
213
+
214
+ data["id"] = node_id
215
+ data.setdefault("relation", DEFAULT_RELATION)
216
+ data.setdefault("semantic", DEFAULT_SEMANTIC)
217
+ normalized.append(data)
218
+
219
+ return normalized
220
+
221
+
222
+ def _normalize_conventions(entries: Any) -> list[dict[str, Any]]:
223
+ if not entries:
224
+ return []
225
+ if not isinstance(entries, list):
226
+ raise ValueError("conventions must be a list")
227
+
228
+ normalized: list[dict[str, Any]] = []
229
+ for entry in entries:
230
+ if isinstance(entry, str):
231
+ normalized.append({"targets": [entry], "reason": ""})
232
+ continue
233
+ if not isinstance(entry, dict):
234
+ raise ValueError(f"conventions entries must be strings or mappings, got {type(entry).__name__}")
235
+
236
+ data = deepcopy(entry)
237
+ targets = data.get("targets", [])
238
+ if isinstance(targets, str):
239
+ data["targets"] = [targets]
240
+ elif isinstance(targets, list):
241
+ data["targets"] = [target for target in targets if isinstance(target, str)]
242
+ else:
243
+ raise ValueError("convention targets must be a string or list of strings")
244
+ data.setdefault("reason", "")
245
+ normalized.append(data)
246
+
247
+ return normalized
248
+
249
+
250
+ def _build_depended_by_map(artifacts: list[WaveArtifact]) -> dict[str, list[dict[str, Any]]]:
251
+ depended_by: dict[str, list[dict[str, Any]]] = {artifact.node_id: [] for artifact in artifacts}
252
+
253
+ for artifact in artifacts:
254
+ for dependent in artifacts:
255
+ if dependent.wave <= artifact.wave:
256
+ continue
257
+
258
+ for dependency in dependent.depends_on:
259
+ if dependency["id"] != artifact.node_id:
260
+ continue
261
+
262
+ reverse = {"id": dependent.node_id}
263
+ for key, value in dependency.items():
264
+ if key == "id":
265
+ continue
266
+ reverse[key] = deepcopy(value)
267
+ depended_by[artifact.node_id].append(reverse)
268
+
269
+ return depended_by
270
+
271
+
272
+ def _render_document(
273
+ artifact: WaveArtifact,
274
+ global_conventions: list[dict[str, Any]],
275
+ depended_by: list[dict[str, Any]],
276
+ body: str,
277
+ ) -> str:
278
+ doc_type = _infer_doc_type(artifact.output)
279
+ codd_block = {
280
+ "node_id": artifact.node_id,
281
+ "type": doc_type,
282
+ "depends_on": deepcopy(artifact.depends_on),
283
+ "depended_by": deepcopy(depended_by),
284
+ "conventions": deepcopy(global_conventions) + deepcopy(artifact.conventions),
285
+ }
286
+ frontmatter = yaml.safe_dump(
287
+ {"codd": codd_block},
288
+ allow_unicode=True,
289
+ sort_keys=False,
290
+ )
291
+ return f"---\n{frontmatter}---\n\n{body.rstrip()}\n"
292
+
293
+
294
+ def _infer_doc_type(output_path: str) -> str:
295
+ parts = PurePosixPath(output_path).parts
296
+ if len(parts) >= 3 and parts[0] == "docs":
297
+ return DOC_TYPE_BY_DIR.get(parts[1], "document")
298
+ return "document"
299
+
300
+
301
+ def _resolve_ai_command(config: dict[str, Any], override: str | None) -> str:
302
+ raw_command = override if override is not None else config.get("ai_command", DEFAULT_AI_COMMAND)
303
+ if not isinstance(raw_command, str) or not raw_command.strip():
304
+ raise ValueError("ai_command must be a non-empty string")
305
+ return raw_command.strip()
306
+
307
+
308
+ def _load_dependency_documents(
309
+ project_root: Path,
310
+ dependencies: list[dict[str, Any]],
311
+ document_node_paths: dict[str, Path],
312
+ ) -> list[DependencyDocument]:
313
+ documents: list[DependencyDocument] = []
314
+ missing_node_ids: list[str] = []
315
+ seen_node_ids: set[str] = set()
316
+
317
+ for dependency in dependencies:
318
+ node_id = dependency["id"]
319
+ if node_id in seen_node_ids:
320
+ continue
321
+ seen_node_ids.add(node_id)
322
+
323
+ rel_path = document_node_paths.get(node_id)
324
+ if rel_path is None:
325
+ missing_node_ids.append(node_id)
326
+ continue
327
+
328
+ file_path = project_root / rel_path
329
+ if not file_path.exists():
330
+ raise ValueError(
331
+ f"dependency document {node_id!r} maps to {rel_path.as_posix()}, but the file does not exist"
332
+ )
333
+
334
+ documents.append(
335
+ DependencyDocument(
336
+ node_id=node_id,
337
+ path=rel_path,
338
+ content=file_path.read_text(encoding="utf-8"),
339
+ )
340
+ )
341
+
342
+ if missing_node_ids:
343
+ raise ValueError(f"unable to resolve dependency document paths for: {', '.join(missing_node_ids)}")
344
+
345
+ return documents
346
+
347
+
348
+ def _generate_document_body(
349
+ artifact: WaveArtifact,
350
+ dependency_documents: list[DependencyDocument],
351
+ conventions: list[dict[str, Any]],
352
+ ai_command: str,
353
+ ) -> str:
354
+ prompt = _build_generation_prompt(artifact, dependency_documents, conventions)
355
+ return _sanitize_generated_body(
356
+ artifact.title,
357
+ _invoke_ai_command(ai_command, prompt),
358
+ output_path=artifact.output,
359
+ )
360
+
361
+
362
+ def _build_generation_prompt(
363
+ artifact: WaveArtifact,
364
+ dependency_documents: list[DependencyDocument],
365
+ conventions: list[dict[str, Any]],
366
+ ) -> str:
367
+ doc_type = _infer_doc_type(artifact.output)
368
+ is_detailed_design = _is_detailed_design_output(artifact.output)
369
+ section_names = DETAILED_DESIGN_SECTIONS if is_detailed_design else TYPE_SECTIONS.get(doc_type, TYPE_SECTIONS["document"])
370
+ preferred_sections = ", ".join(section_names)
371
+ required_section_headings = [f"## {index}. {name}" for index, name in enumerate(section_names, start=1)]
372
+
373
+ lines = [
374
+ f"You are writing a CoDD {doc_type} document.",
375
+ f"Node ID: {artifact.node_id}",
376
+ f"Title: {artifact.title}",
377
+ "Use the dependency documents below as the primary context, synthesize them, and write a complete Markdown document body.",
378
+ (
379
+ "ABSOLUTE PROHIBITION: **Do not emit** YAML frontmatter, implementation notes, "
380
+ "TODO placeholders, or any meta-commentary about the writing process "
381
+ "(e.g. 'I'll write...', 'No existing file found...', 'Here is...', "
382
+ "'Let me...', 'Now I have enough context...'). **Start directly with the document content.** "
383
+ "Violating this instruction is a **CRITICAL ERROR** and breaks a release-blocking constraint."
384
+ ),
385
+ "Treat requirement documents as the source of truth and reflect every feature, screen, workflow, API, integration, and operational rule they describe.",
386
+ "Before finalizing, self-check that every capability and constraint mentioned in the depends_on documents is represented in the document body.",
387
+ "Use concrete tool names, framework names, services, table names, endpoints, thresholds, counts, and timelines wherever applicable.",
388
+ "Never use vague placeholders such as '推奨なし', '要検討', or 'TBD'.",
389
+ f"Prefer a structure that covers: {preferred_sections}.",
390
+ "After the title, immediately continue with section headings such as '## Overview' or '## 1. Overview'; do not acknowledge that you created the file.",
391
+ "Do not write summary phrases like '`docs/...` を作成しました。', '本設計書は以下を網羅しています:', or '主な構成:'. Write the actual sections instead.",
392
+ ]
393
+
394
+ if is_detailed_design:
395
+ lines.extend(
396
+ [
397
+ "This artifact lives under docs/detailed_design/ and must serve as a downstream-ready detailed design document.",
398
+ "Use Mermaid diagrams when they clarify ownership, dependencies, sequences, states, CRUD boundaries, or module/component structure.",
399
+ "Choose only the diagram types justified by the dependency documents; do not force every possible diagram.",
400
+ "For every diagram, add concise prose that explains canonical ownership, reuse/import expectations, and implementation boundaries.",
401
+ "If a shared type, module, or workflow should have a single owner, state that ownership explicitly to prevent reimplementation drift.",
402
+ "Include at least one Mermaid diagram and at least three section headings in the final document body.",
403
+ ]
404
+ )
405
+
406
+ lines.extend(
407
+ [
408
+ "",
409
+ "Output contract:",
410
+ "- Write the finished document body now, not a summary of what it would contain.",
411
+ "- The first content line after the title must be the first required section heading below.",
412
+ "- Use these section headings exactly once and in this order:",
413
+ ]
414
+ )
415
+ lines.extend(required_section_headings)
416
+ if is_detailed_design:
417
+ lines.extend(
418
+ [
419
+ "- Under '## 2. Mermaid Diagrams', include at least one ```mermaid``` fenced block.",
420
+ "- Use prose after each Mermaid block to explain ownership boundaries and implementation consequences.",
421
+ ]
422
+ )
423
+
424
+ if conventions:
425
+ lines.extend(
426
+ [
427
+ "",
428
+ "Non-negotiable conventions:",
429
+ "- These are release-blocking constraints. Reflect them explicitly in the document body.",
430
+ "- Explicitly state how the document complies with each convention and invariant listed below.",
431
+ "- For security or access-control constraints, state the concrete controls in architecture, security, data, or workflow sections.",
432
+ "- For legal/privacy constraints, add explicit compliance or data-handling requirements.",
433
+ "- For SLA/performance constraints, include measurable thresholds in non-functional sections.",
434
+ ]
435
+ )
436
+ for index, convention in enumerate(conventions, start=1):
437
+ targets = ", ".join(str(target) for target in convention.get("targets", []) if isinstance(target, str))
438
+ reason = str(convention.get("reason") or "").strip() or "(no reason provided)"
439
+ lines.append(f"{index}. Targets: {targets or '(no explicit targets)'}")
440
+ lines.append(f" Reason: {reason}")
441
+
442
+ lines.extend(
443
+ [
444
+ "- Example reflections: tenant isolation in security/data model sections, auth requirements in access control, privacy rules in compliance, performance thresholds in non-functional requirements.",
445
+ ]
446
+ )
447
+
448
+ lines.extend(
449
+ [
450
+ "",
451
+ "Dependency documents:",
452
+ ]
453
+ )
454
+
455
+ for document in dependency_documents:
456
+ lines.extend(
457
+ [
458
+ f"--- BEGIN DEPENDENCY {document.path.as_posix()} ({document.node_id}) ---",
459
+ document.content.rstrip(),
460
+ f"--- END DEPENDENCY {document.path.as_posix()} ---",
461
+ "",
462
+ ]
463
+ )
464
+
465
+ lines.extend(
466
+ [
467
+ "Final instruction: output the real Markdown document body now using the required section headings above. "
468
+ "Do not describe the document. Do not announce completion. Do not provide a summary list.",
469
+ ]
470
+ )
471
+
472
+ return "\n".join(lines).rstrip() + "\n"
473
+
474
+
475
+ def _is_detailed_design_output(output_path: str) -> bool:
476
+ parts = PurePosixPath(output_path).parts
477
+ return len(parts) >= 2 and parts[0] == "docs" and parts[1] == "detailed_design"
478
+
479
+
480
+ def _invoke_ai_command(ai_command: str, prompt: str) -> str:
481
+ command = shlex.split(ai_command)
482
+ if not command:
483
+ raise ValueError("ai_command must not be empty")
484
+
485
+ try:
486
+ result = subprocess.run(
487
+ command,
488
+ input=prompt,
489
+ capture_output=True,
490
+ text=True,
491
+ check=False,
492
+ )
493
+ except FileNotFoundError as exc:
494
+ raise ValueError(f"AI command not found: {command[0]}") from exc
495
+
496
+ if result.returncode != 0:
497
+ detail = result.stderr.strip() or result.stdout.strip() or f"exit code {result.returncode}"
498
+ raise ValueError(f"AI command failed: {detail}")
499
+
500
+ if not result.stdout.strip():
501
+ raise ValueError("AI command returned empty output")
502
+
503
+ return result.stdout
504
+
505
+
506
+ def _sanitize_generated_body(title: str, body: str, *, output_path: str | None = None) -> str:
507
+ normalized = body.lstrip()
508
+ if normalized.startswith("---"):
509
+ match = re.match(r"^---\s*\n.*?\n---\s*\n?", normalized, re.DOTALL)
510
+ if match:
511
+ normalized = normalized[match.end():]
512
+
513
+ normalized = _strip_meta_preamble(normalized)
514
+ normalized = normalized.strip()
515
+ if not normalized:
516
+ raise ValueError("AI command returned empty output")
517
+ if re.search(r"\bTODO\b", normalized, re.IGNORECASE):
518
+ raise ValueError("AI command returned scaffold content containing TODO")
519
+ if not normalized.startswith("# "):
520
+ normalized = f"# {title}\n\n{normalized}"
521
+ normalized = _normalize_title_heading_block(title, normalized)
522
+ normalized = _collapse_blank_line_runs(normalized)
523
+ _validate_generated_body(title, normalized, output_path=output_path)
524
+
525
+ return normalized.rstrip() + "\n"
526
+
527
+
528
+ def _strip_meta_preamble(body: str) -> str:
529
+ fenced = MARKDOWN_FENCE_RE.match(body)
530
+ if fenced:
531
+ body = fenced.group("body")
532
+
533
+ lines = [line for line in body.splitlines() if not _is_meta_preamble_line(line)]
534
+ _trim_outer_non_content_lines(lines)
535
+
536
+ return "\n".join(lines)
537
+
538
+
539
+ def _is_meta_preamble_line(line: str) -> bool:
540
+ stripped = line.strip()
541
+ if not stripped or stripped.startswith("#"):
542
+ return False
543
+ return any(pattern.match(stripped) for pattern in META_PREAMBLE_PATTERNS)
544
+
545
+
546
+ def _trim_outer_non_content_lines(lines: list[str]) -> None:
547
+ while lines:
548
+ stripped = lines[0].strip()
549
+ if not stripped or stripped == "---":
550
+ lines.pop(0)
551
+ continue
552
+ break
553
+
554
+ while lines:
555
+ stripped = lines[-1].strip()
556
+ if not stripped or stripped == "---":
557
+ lines.pop()
558
+ continue
559
+ break
560
+
561
+
562
+ def _collapse_blank_line_runs(body: str) -> str:
563
+ lines = body.splitlines()
564
+ collapsed: list[str] = []
565
+ in_fence = False
566
+ blank_run = 0
567
+
568
+ for line in lines:
569
+ if FENCE_LINE_RE.match(line.strip()):
570
+ in_fence = not in_fence
571
+ blank_run = 0
572
+ collapsed.append(line)
573
+ continue
574
+
575
+ if not in_fence and not line.strip():
576
+ blank_run += 1
577
+ if blank_run > 1:
578
+ continue
579
+ else:
580
+ blank_run = 0
581
+
582
+ collapsed.append(line)
583
+
584
+ return "\n".join(collapsed)
585
+
586
+
587
+ def _normalize_title_heading_block(title: str, body: str) -> str:
588
+ lines = body.splitlines()
589
+ if not lines:
590
+ return body
591
+
592
+ expected = re.sub(r"\s+", " ", title).strip().casefold()
593
+ if _normalize_heading_text(lines[0]) != expected:
594
+ return body
595
+
596
+ retained: list[str] = [lines[0]]
597
+ index = 1
598
+ while index < len(lines):
599
+ stripped = lines[index].strip()
600
+ if not stripped or stripped == "---" or FENCE_LINE_RE.match(stripped):
601
+ index += 1
602
+ continue
603
+ if _is_meta_preamble_line(lines[index]):
604
+ index += 1
605
+ continue
606
+ if _normalize_heading_text(lines[index]) == expected:
607
+ index += 1
608
+ continue
609
+ break
610
+
611
+ if index < len(lines):
612
+ retained.extend(["", *lines[index:]])
613
+
614
+ return "\n".join(retained)
615
+
616
+
617
+ def _normalize_heading_text(line: str) -> str | None:
618
+ match = TITLE_HEADING_RE.match(line)
619
+ if not match:
620
+ return None
621
+ return re.sub(r"\s+", " ", match.group("title")).strip().casefold()
622
+
623
+
624
+ def _validate_generated_body(title: str, body: str, *, output_path: str | None = None) -> None:
625
+ if not SECTION_HEADING_RE.search(body):
626
+ raise ValueError(f"AI command returned unstructured summary for {title!r}; missing section headings")
627
+
628
+ first_content_line = _first_content_line_after_title(body)
629
+ if first_content_line and any(pattern.match(first_content_line) for pattern in META_PREAMBLE_PATTERNS):
630
+ raise ValueError(f"AI command returned meta commentary instead of document content for {title!r}")
631
+
632
+ if output_path and _is_detailed_design_output(output_path):
633
+ if not MERMAID_FENCE_RE.search(body):
634
+ raise ValueError(f"AI command returned detailed design without Mermaid diagrams for {title!r}")
635
+
636
+
637
+ def _first_content_line_after_title(body: str) -> str | None:
638
+ lines = body.splitlines()
639
+ start_index = 1 if lines and TITLE_HEADING_RE.match(lines[0]) else 0
640
+ for line in lines[start_index:]:
641
+ stripped = line.strip()
642
+ if stripped:
643
+ return stripped
644
+ return None